Fixup divider save_restore for floating point too; improve tests (#405)

- The divider state needs to be saved for __aeabi_ddiv, __aeabi_fdiv, __aeabi_dtan and __aeabi_ftan or they won't work in interrupts *(probably not used much youd hope), or on an RTOS context switch
 - Refactored code out for the integer and floating point cases
 - Improved the floating point 'tests' in passing to check more return values against GCC implementations
 - Added floating point usage to the IRQ nesting test case
This commit is contained in:
Graham Sanderson 2021-05-13 07:38:42 -05:00 committed by GitHub
parent c6c4eeb122
commit 574fdee37b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 501 additions and 158 deletions

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "hardware/regs/addressmap.h"
#include "hardware/regs/sio.h"
#if SIO_DIV_CSR_READY_LSB == 0
.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1
#else
need to change SHIFT above
#endif
#if SIO_DIV_CSR_DIRTY_LSB == 1
.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2
#else
need to change SHIFT above
#endif
// SIO_BASE ptr in r2; pushes r4-r7, lr to stack
// requires that division started at least 2 cycles prior to the start of the macro
.macro save_div_state_and_lr
// originally we did this, however a) it uses r3, and b) the push takes 6 cycles, b)
// any IRQ which uses the divider will necessarily put the data back, which will
// immediately make it ready
//
// // ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
// // // wait for results as we can't save signed-ness of operation
// // 1:
// // lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
// // bcc 1b
// 6 cycles
push {r4, r5, r6, r7, lr}
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
ldr r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
ldr r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
.endm
// restores divider state from r4-r7, then pops them and pc
.macro restore_div_state_and_return
// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order
//
// it is worth considering what happens if we are interrupted
//
// after writing r4: we are DIRTY and !READY
// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be
// saved/restored correctly and we'll restore the rest ourselves
// after writing r4, r5: we are DIRTY and !READY
// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor
// at least will be saved/restored correctly and and we'll restore the rest ourselves
// after writing r4, r5, r6: we are DIRTY and READY
// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves),
// and we'll restore the remainder after the fact
// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space
// and so 4 reads is cheaper (and we don't have to adjust r2)
// note also, that we must restore via UDIVI* rather than SDIVI* to prevent the quotient/remainder being negated on read based
// on the signs of the inputs
str r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
str r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
str r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
pop {r4, r5, r6, r7, pc}
.endm

View File

@ -4,8 +4,8 @@
* SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause
*/ */
#include "hardware/regs/sio.h"
#include "hardware/regs/addressmap.h" #include "hardware/regs/addressmap.h"
#include "hardware/divider_helper.S"
.syntax unified .syntax unified
.cpu cortex-m0plus .cpu cortex-m0plus
@ -34,17 +34,6 @@
#endif #endif
.endm .endm
#if SIO_DIV_CSR_READY_LSB == 0
.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1
#else
need to change SHIFT above
#endif
#if SIO_DIV_CSR_DIRTY_LSB == 1
.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2
#else
need to change SHIFT above
#endif
@ wait 8-n cycles for the hardware divider @ wait 8-n cycles for the hardware divider
.macro wait_div n .macro wait_div n
.rept (8-\n) / 2 .rept (8-\n) / 2
@ -56,58 +45,17 @@ need to change SHIFT above
.endif .endif
.endm .endm
#if (SIO_DIV_SDIVISOR_OFFSET != SIO_DIV_SDIVIDEND_OFFSET + 4) || (SIO_DIV_QUOTIENT_OFFSET != SIO_DIV_SDIVISOR_OFFSET + 4) || (SIO_DIV_REMAINDER_OFFSET != SIO_DIV_QUOTIENT_OFFSET + 4) #if (SIO_DIV_SDIVISOR_OFFSET != SIO_DIV_SDIVIDEND_OFFSET + 4) || (SIO_DIV_QUOTIENT_OFFSET != SIO_DIV_SDIVISOR_OFFSET + 4) || (SIO_DIV_REMAINDER_OFFSET != SIO_DIV_QUOTIENT_OFFSET + 4)
#error register layout has changed - we rely on this order to make sure we save/restore in the right order #error register layout has changed - we rely on this order to make sure we save/restore in the right order
#endif #endif
#if !PICO_DIVIDER_DISABLE_INTERRUPTS #if !PICO_DIVIDER_DISABLE_INTERRUPTS
# SIO_BASE ptr in r2
.macro save_div_state_and_lr
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
# wait for results as we can't save signed-ness of operation
1:
lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
bcc 1b
push {r4, r5, r6, r7, lr}
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
ldr r4, [r2, #SIO_DIV_SDIVIDEND_OFFSET]
ldr r5, [r2, #SIO_DIV_SDIVISOR_OFFSET]
ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
.endm
.macro restore_div_state_and_return
// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order
//
// it is worth considering what happens if we are interrupted
//
// after writing r4: we are DIRTY and !READY
// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be
// saved/restored correctly and we'll restore the rest ourselves
// after writing r4, r5: we are DIRTY and !READY
// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor
// at least will be saved/restored correctly and and we'll restore the rest ourselves
// after writing r4, r5, r6: we are DIRTY and READY
// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves),
// and we'll restore the remainder after the fact
// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space
// and so 4 reads is cheaper (and we don't have to adjust r2)
str r4, [r2, #SIO_DIV_SDIVIDEND_OFFSET]
str r5, [r2, #SIO_DIV_SDIVISOR_OFFSET]
str r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
pop {r4, r5, r6, r7, pc}
.endm
.macro save_div_state_and_lr_64 .macro save_div_state_and_lr_64
push {r4, r5, r6, r7, lr} push {r4, r5, r6, r7, lr}
ldr r6, =SIO_BASE ldr r6, =SIO_BASE
1: 1:
ldr r5, [r6, #SIO_DIV_CSR_OFFSET] ldr r5, [r6, #SIO_DIV_CSR_OFFSET]
# wait for results as we can't save signed-ness of operation // wait for results as we can't save signed-ness of operation
lsrs r5, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY lsrs r5, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
bcc 1b bcc 1b
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia! // note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
@ -154,17 +102,18 @@ wrapper_func __aeabi_idivmod
regular_func div_s32s32 regular_func div_s32s32
regular_func divmod_s32s32 regular_func divmod_s32s32
#if !PICO_DIVIDER_DISABLE_INTERRUPTS #if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r3, [r2, #SIO_DIV_CSR_OFFSET] ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs divmod_s32s32_savestate bcs divmod_s32s32_savestate
regular_func divmod_s32s32_unsafe regular_func divmod_s32s32_unsafe
#else #else
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs // to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there // in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
# are the hardware_divider functions that can be used instead anyway // are the hardware_divider functions that can be used instead anyway
regular_func divmod_s32s32_unsafe regular_func divmod_s32s32_unsafe
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
mrs r3, PRIMASK mrs r3, PRIMASK
cpsid i cpsid i
@ -203,6 +152,8 @@ regular_func divmod_s32s32_unsafe
#if !PICO_DIVIDER_DISABLE_INTERRUPTS #if !PICO_DIVIDER_DISABLE_INTERRUPTS
.align 2 .align 2
regular_func divmod_s32s32_savestate regular_func divmod_s32s32_savestate
// note that we must be at least 2 cycles into division at this point,
// which we are because of the firty check before getting here (and of course the function call before that)
save_div_state_and_lr save_div_state_and_lr
bl divmod_s32s32_unsafe bl divmod_s32s32_unsafe
restore_div_state_and_return restore_div_state_and_return
@ -215,17 +166,18 @@ regular_func divmod_u32u32
wrapper_func __aeabi_uidiv wrapper_func __aeabi_uidiv
wrapper_func __aeabi_uidivmod wrapper_func __aeabi_uidivmod
#if !PICO_DIVIDER_DISABLE_INTERRUPTS #if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r3, [r2, #SIO_DIV_CSR_OFFSET] ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs divmod_u32u32_savestate bcs divmod_u32u32_savestate
regular_func divmod_u32u32_unsafe regular_func divmod_u32u32_unsafe
#else #else
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs // to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there // in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
# are the hardware_divider functions that can be used instead anyway // are the hardware_divider functions that can be used instead anyway
regular_func divmod_u32u32_unsafe regular_func divmod_u32u32_unsafe
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
mrs r3, PRIMASK mrs r3, PRIMASK
cpsid i cpsid i
@ -273,9 +225,9 @@ wrapper_func __aeabi_ldivmod
regular_func div_s64s64 regular_func div_s64s64
regular_func divmod_s64s64 regular_func divmod_s64s64
#if !PICO_DIVIDER_DISABLE_INTERRUPTS #if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2 mov ip, r2
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r2, [r2, #SIO_DIV_CSR_OFFSET] ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
mov r2, ip mov r2, ip
@ -287,6 +239,7 @@ divmod_s64s64_savestate:
bl divmod_s64s64_unsafe bl divmod_s64s64_unsafe
restore_div_state_and_return_64 restore_div_state_and_return_64
#else #else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr} push {r4, lr}
mrs r4, PRIMASK mrs r4, PRIMASK
cpsid i cpsid i
@ -300,9 +253,9 @@ wrapper_func __aeabi_uldivmod
regular_func div_u64u64 regular_func div_u64u64
regular_func divmod_u64u64 regular_func divmod_u64u64
#if !PICO_DIVIDER_DISABLE_INTERRUPTS #if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2 mov ip, r2
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r2, [r2, #SIO_DIV_CSR_OFFSET] ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
mov r2, ip mov r2, ip
@ -314,6 +267,7 @@ regular_func divmod_u64u64_savestate
bl divmod_u64u64_unsafe bl divmod_u64u64_unsafe
restore_div_state_and_return_64 restore_div_state_and_return_64
#else #else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr} push {r4, lr}
mrs r4, PRIMASK mrs r4, PRIMASK
cpsid i cpsid i

View File

@ -6,6 +6,7 @@
#include "pico/asm_helper.S" #include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h" #include "pico/bootrom/sf_table.h"
#include "hardware/divider_helper.S"
__pre_init __aeabi_double_init, 00020 __pre_init __aeabi_double_init, 00020
@ -131,16 +132,16 @@ regular_func pop_r8_r11
mov r11,r7 mov r11,r7
bx r14 bx r14
# note generally each function is in a separate section unless there is fall thru or branching between them // note generally each function is in a separate section unless there is fall thru or branching between them
# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool // note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
# note functions are word aligned except where they are an odd number of linear instructions // note functions are word aligned except where they are an odd number of linear instructions
// double FUNC_NAME(__aeabi_dadd)(double, double) double-precision addition // double FUNC_NAME(__aeabi_dadd)(double, double) double-precision addition
double_wrapper_section __aeabi_darithmetic double_wrapper_section __aeabi_darithmetic
// double FUNC_NAME(__aeabi_drsub)(double x, double y) double-precision reverse subtraction, y - x // double FUNC_NAME(__aeabi_drsub)(double x, double y) double-precision reverse subtraction, y - x
# frsub first because it is the only one that needs alignment // frsub first because it is the only one that needs alignment
.align 2 .align 2
wrapper_func __aeabi_drsub wrapper_func __aeabi_drsub
eors r0, r1 eors r0, r1
@ -177,7 +178,35 @@ wrapper_func_d2 __aeabi_ddiv
b ddiv_dsub_nan_helper b ddiv_dsub_nan_helper
1: 1:
#endif #endif
shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim #if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs ddiv_save_state
mov r2, ip
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl ddiv_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
ddiv_shim_call:
shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
ddiv_save_state:
ldr r2, =(SIO_BASE)
save_div_state_and_lr
mov r2, ip
bl ddiv_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
ddiv_dsub_nan_helper: ddiv_dsub_nan_helper:
#if PICO_DOUBLE_PROPAGATE_NANS #if PICO_DOUBLE_PROPAGATE_NANS
@ -592,6 +621,8 @@ regular_func sincostan_remainder
ldr r2, =0x54442D18 // 2 * M_PI ldr r2, =0x54442D18 // 2 * M_PI
ldr r3, =0x401921FB ldr r3, =0x401921FB
push {lr} push {lr}
// note remainder only uses the divider thru integer divider functions
// which save and restore themselves
bl remainder bl remainder
pop {pc} pop {pc}
@ -752,13 +783,40 @@ double_wrapper_section tan
wrapper_func tan wrapper_func tan
// rom version only works for -1024 < angle < 1024 // rom version only works for -1024 < angle < 1024
lsls r2, r1, #2 lsls r2, r1, #2
bcc 1f bcc dtan_in_range
lsrs r2, #22 lsrs r2, #22
cmp r2, #9 cmp r2, #9
bge 2f bge dtan_angle_out_of_range
1: dtan_in_range:
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs dtan_save_state
mov r2, ip
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl dtan_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
dtan_shim_call:
shimmable_table_tail_call SF_TABLE_FTAN dtan_shim shimmable_table_tail_call SF_TABLE_FTAN dtan_shim
2: #if !PICO_DIVIDER_DISABLE_INTERRUPTS
dtan_save_state:
ldr r2, =(SIO_BASE)
save_div_state_and_lr
mov r2, ip
bl dtan_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
dtan_angle_out_of_range:
#if PICO_DOUBLE_PROPAGATE_NANS #if PICO_DOUBLE_PROPAGATE_NANS
lsls r2, r1, #1 lsls r2, r1, #1
asrs r2, #21 asrs r2, #21
@ -775,7 +833,7 @@ wrapper_func tan
bl sincostan_remainder bl sincostan_remainder
pop {r2} pop {r2}
mov lr, r2 mov lr, r2
b 1b b dtan_in_range
double_wrapper_section atan2 double_wrapper_section atan2
wrapper_func_d2 atan2 wrapper_func_d2 atan2

View File

@ -6,6 +6,7 @@
#include "pico/asm_helper.S" #include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h" #include "pico/bootrom/sf_table.h"
#include "hardware/divider_helper.S"
__pre_init __aeabi_float_init, 00020 __pre_init __aeabi_float_init, 00020
@ -104,16 +105,16 @@ __check_nan_f2:
.endm .endm
# note generally each function is in a separate section unless there is fall thru or branching between them // note generally each function is in a separate section unless there is fall thru or branching between them
# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool // note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
# note functions are word aligned except where they are an odd number of linear instructions // note functions are word aligned except where they are an odd number of linear instructions
// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition // float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition
float_wrapper_section __aeabi_farithmetic float_wrapper_section __aeabi_farithmetic
// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x // float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x
# frsub first because it is the only one that needs alignment // frsub first because it is the only one that needs alignment
.align 2 .align 2
wrapper_func __aeabi_frsub wrapper_func __aeabi_frsub
eors r0, r1 eors r0, r1
@ -146,7 +147,30 @@ wrapper_func_f2 __aeabi_fdiv
b fdiv_fsub_nan_helper b fdiv_fsub_nan_helper
1: 1:
#endif #endif
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE)
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs fdiv_save_state
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl fdiv_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
fdiv_shim_call:
table_tail_call SF_TABLE_FDIV table_tail_call SF_TABLE_FDIV
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
fdiv_save_state:
save_div_state_and_lr
bl fdiv_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
fdiv_fsub_nan_helper: fdiv_fsub_nan_helper:
#if PICO_FLOAT_PROPAGATE_NANS #if PICO_FLOAT_PROPAGATE_NANS
@ -689,10 +713,33 @@ wrapper_func tanf
lsls r1, r0, #1 lsls r1, r0, #1
lsrs r1, #24 lsrs r1, #24
cmp r1, #127 + 7 cmp r1, #127 + 7
bge 1f bge ftan_out_of_range
2: ftan_in_range:
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE)
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs ftan_save_state
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl ftan_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
ftan_shim_call:
table_tail_call SF_TABLE_FTAN table_tail_call SF_TABLE_FTAN
1: #if !PICO_DIVIDER_DISABLE_INTERRUPTS
ftan_save_state:
save_div_state_and_lr
bl ftan_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
ftan_out_of_range:
#if PICO_FLOAT_PROPAGATE_NANS #if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites // also check for infinites
cmp r1, #255 cmp r1, #255
@ -709,7 +756,7 @@ wrapper_func tanf
bl remainderf bl remainderf
pop {r1} pop {r1}
mov lr, r1 mov lr, r1
b 2b b ftan_in_range
float_wrapper_section atan2f float_wrapper_section atan2f
wrapper_func_f2 atan2f wrapper_func_f2 atan2f

View File

@ -12,7 +12,7 @@ if (PICO_ON_DEVICE)
pico_add_extra_outputs(pico_divider_test) pico_add_extra_outputs(pico_divider_test)
target_compile_definitions(pico_divider_test PRIVATE target_compile_definitions(pico_divider_test PRIVATE
PICO_DIVIDER_DISABLE_INTERRUPTS=1 # PICO_DIVIDER_DISABLE_INTERRUPTS=1
# TURBO # TURBO
) )

View File

@ -15,6 +15,9 @@ volatile bool failed;
volatile uint32_t count[3]; volatile uint32_t count[3];
volatile bool done; volatile bool done;
#define FAILED() ({ failed = true; })
//#define FAILED() ({ failed = true; __breakpoint(); })
bool timer_callback(repeating_timer_t *t) { bool timer_callback(repeating_timer_t *t) {
count[0]++; count[0]++;
static int z; static int z;
@ -23,9 +26,27 @@ bool timer_callback(repeating_timer_t *t) {
int a = z / 7; int a = z / 7;
int b = z % 7; int b = z % 7;
if (z != a * 7 + b) { if (z != a * 7 + b) {
failed = true; FAILED();
}
a = z / -7;
b = z % -7;
if (z != a * -7 + b) {
FAILED();
} }
} }
float fz = z;
float fa = fz / 11.0f;
float fb = fmodf(fz, 11.0f);
if (fabsf(fz - (fa * 11.0 + fb) > 1e-9)) {
FAILED();
}
double dz = z;
double da = dz / 11.0;
double db = fmod(dz, 11.0);
if (fabsf(dz - (da * 11.0 + db) > 1e-9)) {
FAILED();
}
return !done; return !done;
} }
@ -41,16 +62,20 @@ void do_dma_start(uint ch) {
dma_channel_configure(ch, &c, &word[ch], &word[ch], 513 + ch * 23, true); dma_channel_configure(ch, &c, &word[ch], &word[ch], 513 + ch * 23, true);
} }
double d0c, d0s, d0t, dz;
float f0c, f0s, f0t, fz;
void test_irq_handler0() { void test_irq_handler0() {
count[1]++; count[1]++;
dma_hw->ints0 |= 1u; dma_hw->ints0 |= 1u;
static uint z; static uint z;
static uint dz;
for (int i=0; i<80;i++) { for (int i=0; i<80;i++) {
z += 31; z += 31;
uint a = z / 11; uint a = z / 11;
uint b = z % 11; uint b = z % 11;
if (z != a * 11 + b) { if (z != a * 11 + b) {
failed = true; FAILED();
} }
} }
if (done) dma_channel_abort(0); if (done) dma_channel_abort(0);
@ -66,16 +91,17 @@ void test_irq_handler1() {
uint a = z / -13; uint a = z / -13;
uint b = z % -13; uint b = z % -13;
if (z != a * -13 + b) { if (z != a * -13 + b) {
failed = true; FAILED();
} }
static uint64_t z64; static uint64_t z64;
z64 -= 47; z64 -= 47;
uint64_t a64 = z64 / -13; uint64_t a64 = z64 / -13;
uint64_t b64 = z64 % -13; uint64_t b64 = z64 % -13;
if (z64 != a64 * -13 + b64) { if (z64 != a64 * -13 + b64) {
failed = true; FAILED();
} }
} }
if (done) dma_channel_abort(1); if (done) dma_channel_abort(1);
else do_dma_start(1); else do_dma_start(1);
} }
@ -89,7 +115,7 @@ void test_nesting() {
// They all busily make use of the dividers, to expose any issues with nested use // They all busily make use of the dividers, to expose any issues with nested use
repeating_timer_t timer; repeating_timer_t timer;
add_repeating_timer_us(529, timer_callback, NULL, &timer); add_repeating_timer_us(929, timer_callback, NULL, &timer);
irq_set_exclusive_handler(DMA_IRQ_0, test_irq_handler0); irq_set_exclusive_handler(DMA_IRQ_0, test_irq_handler0);
irq_set_exclusive_handler(DMA_IRQ_1, test_irq_handler1); irq_set_exclusive_handler(DMA_IRQ_1, test_irq_handler1);
@ -101,7 +127,7 @@ void test_nesting() {
irq_set_enabled(DMA_IRQ_1, 1); irq_set_enabled(DMA_IRQ_1, 1);
do_dma_start(0); do_dma_start(0);
do_dma_start(1); do_dma_start(1);
absolute_time_t end = delayed_by_ms(get_absolute_time(), 2000); absolute_time_t end = delayed_by_ms(get_absolute_time(), 10000);
int count_local=0; int count_local=0;
while (!time_reached(end)) { while (!time_reached(end)) {
for(uint i=0;i<100;i++) { for(uint i=0;i<100;i++) {
@ -109,8 +135,39 @@ void test_nesting() {
uint a = z / 11; uint a = z / 11;
uint b = z % 11; uint b = z % 11;
if (z != a * 11 + b) { if (z != a * 11 + b) {
failed = true; FAILED();
} }
int zz = (int)z;
int aa = zz / -11;
int bb = zz % -11;
if (zz != aa * -11 + bb) {
FAILED();
}
aa = -zz / -11;
bb = -zz % -11;
if (-zz != aa * -11 + bb) {
FAILED();
}
aa = -zz / 11;
bb = -zz % 11;
if (-zz != aa * 11 + bb) {
FAILED();
}
a = 0xffffffffu / 11;
b = 0xffffffffu % 11;
if (0xffffffffu != a * 11 + b) {
FAILED();
}
}
// these use the divider
for(uint i=0;i<=100;i+=20) {
// both in and out bootrom range (we perform mod in wrapper code if necessarry)
f0t = tanf(i * 50);
f0c = cosf(i * 50);
f0s = sinf(i * 50);
d0t = tan(i * 1000);
d0c = cos(i * 1000);
d0s = sin(i * 1000);
} }
count_local++; count_local++;
} }

View File

@ -11,15 +11,19 @@ add_executable(pico_double_test
) )
#todo split out variants with different flags
target_compile_definitions(pico_float_test PRIVATE target_compile_definitions(pico_float_test PRIVATE
PICO_USE_CRT_PRINTF=1 # want full precision output PICO_USE_CRT_PRINTF=1 # want full precision output
# PICO_FLOAT_PROPAGATE_NANS=1 # PICO_FLOAT_PROPAGATE_NANS=1
# PICO_DIVIDER_DISABLE_INTERRUPTS=1
) )
#todo split out variants with different flags
target_compile_definitions(pico_double_test PRIVATE target_compile_definitions(pico_double_test PRIVATE
PICO_USE_CRT_PRINTF=1 # want full precision output PICO_USE_CRT_PRINTF=1 # want full precision output
PICO_FLOAT_PROPAGATE_NANS=1 PICO_FLOAT_PROPAGATE_NANS=1
PICO_DOUBLE_PROPAGATE_NANS=1 #PICO_DOUBLE_PROPAGATE_NANS=1
#PICO_DIVIDER_DISABLE_INTERRUPTS=1
) )
# handy for testing we aren't pulling in extra stuff # handy for testing we aren't pulling in extra stuff

View File

@ -282,6 +282,51 @@ int test_dcmpun() {
return 0; return 0;
} }
#define assert_nan(a) assert(isnan(a))
#define check_nan(a) ({ assert_nan(a); a; })
double __aeabi_i2d(int32_t);
double __aeabi_ui2d(int32_t);
double __aeabi_l2d(int64_t);
double __aeabi_ul2d(int64_t);
int32_t __aeabi_d2iz(double);
int64_t __aeabi_d2lz(double);
double __aeabi_dmul(double, double);
double __aeabi_ddiv(double, double);
#if LIB_PICO_DOUBLE_PICO
double __real___aeabi_i2d(int);
double __real___aeabi_ui2d(int);
double __real___aeabi_l2d(int64_t);
double __real___aeabi_ul2d(int64_t);
double __real___aeabi_dmul(double, double);
double __real___aeabi_ddiv(double, double);
int32_t __real___aeabi_d2iz(double);
int64_t __real___aeabi_d2lz(double);
double __real_sqrt(double);
double __real_cos(double);
double __real_sin(double);
double __real_tan(double);
double __real_exp(double);
double __real_log(double);
double __real_atan2(double, double);
double __real_pow(double, double);
double __real_trunc(double);
double __real_ldexp(double, int);
double __real_fmod(double, double);
#define EPSILON 1e-9
#define assert_close(a, b) assert(((b - a) < EPSILON || (a - b) < EPSILON) || (isinf(a) && isinf(b) && (a < 0) == (b < 0)))
#define check1(func,p0) ({ typeof(p0) r = func(p0), r2 = __CONCAT(__real_, func)(p0); assert(r == r2); r; })
#define check2(func,p0,p1) ({ typeof(p0) r = func(p0,p1), r2 = __CONCAT(__real_, func)(p0,p1); assert(r == r2); r; })
#define check_close1(func,p0) ({ typeof(p0) r = func(p0), r2 = __CONCAT(__real_, func)(p0); if (isnan(p0)) assert_nan(r); else assert_close(r, r2); r; })
#define check_close2(func,p0,p1) ({ typeof(p0) r = func(p0,p1), r2 = __CONCAT(__real_, func)(p0,p1); if (isnan(p0) || isnan(p1)) assert_nan(r); else assert_close(r, r2); r; })
#else
#define check1(func,p0) func(p0)
#define check2(func,p0,p1) func(p0,p1)
#define check_close1(func,p0) func(p0)
#define check_close2(func,p0,p1) func(p0,p1)
#endif
double aa = 0.5; double aa = 0.5;
double bb = 1; double bb = 1;
@ -305,14 +350,18 @@ int main() {
#if 1 #if 1
for (double x = 0; x < 3; x++) { for (double x = 0; x < 3; x++) {
printf("\n ----- %g\n", x); printf("\n ----- %g\n", x);
printf("SQRT %10.18g\n", sqrt(x)); printf("SQRT %10.18g\n", check_close1(sqrt, x));
printf("COS %10.18g\n", cos(x)); printf("COS %10.18g\n", check_close1(cos, x));
printf("SIN %10.18g\n", sin(x)); printf("SIN %10.18g\n", check_close1(sin, x));
printf("TAN %10.18g\n", tan(x)); printf("TAN %10.18g\n", check_close1(tan, x));
printf("ATAN2 %10.18g\n", atan2(x, 10)); printf("ATAN2 %10.18g\n", check_close2(atan2, x, 10.0));
printf("ATAN2 %10.18g\n", atan2(10, x)); printf("ATAN2 %10.18g\n", check_close2(atan2, 10.0, x));
printf("EXP %10.18g\n", exp(x)); printf("EXP %10.18g\n", check_close1(exp, x));
printf("LN %10.18g\n", log(x)); printf("LN %10.18g\n", check_close1(log, x));
printf("POW %10.18f\n", check_close2(pow, x, x));
printf("TRUNC %10.18f\n", check_close1(trunc, x));
printf("LDEXP %10.18f\n", check_close2(ldexp, x, x));
printf("FMOD %10.18f\n", check_close2(fmod, x, 3.0f));
double s, c; double s, c;
sincos(x, &s, &c); sincos(x, &s, &c);
printf("SINCOS %10.18f %10.18f\n", s, c); printf("SINCOS %10.18f %10.18f\n", s, c);
@ -325,22 +374,21 @@ int main() {
#if PICO_DOUBLE_PROPAGATE_NANS #if PICO_DOUBLE_PROPAGATE_NANS
{ {
float x = NAN; float x = NAN;
printf("NANO %10.18f\n", x); printf("SQRT %10.18g\n", check_close1(sqrt, x));
printf("SQRT %10.18f\n", sqrt(x)); printf("COS %10.18g\n", check_close1(cos, x));
printf("COS %10.18f\n", cos(x)); printf("SIN %10.18g\n", check_close1(sin, x));
printf("SIN %10.18f\n", sin(x)); printf("TAN %10.18g\n", check_close1(tan, x));
printf("TAN %10.18f\n", tan(x)); printf("ATAN2 %10.18g\n", check_close2(atan2, x, 10.0));
printf("ATAN2 %10.18f\n", atan2(x, 10)); printf("ATAN2 %10.18g\n", check_close2(atan2, 10.0, x));
printf("ATAN2 %10.18f\n", atan2(10, x)); printf("EXP %10.18g\n", check_close1(exp, x));
printf("EXP %10.18f\n", exp(x)); printf("LN %10.18g\n", check_close1(log, x));
printf("LN %10.18f\n", log(x)); printf("POW %10.18f\n", check_nan(pow(x, x)));
printf("POW %10.18f\n", pow(x, x)); printf("TRUNC %10.18f\n", check_nan(trunc(x)));
printf("TRUNC %10.18f\n", trunc(x)); printf("LDEXP %10.18f\n", check_nan(ldexp(x, x)));
printf("LDEXP %10.18f\n", ldexp(x, x)); printf("FMOD %10.18f\n", check_nan(fmod(x, 3.0f)));
printf("FMOD %10.18f\n", fmod(x, 3.0f));
double s, c; double s, c;
sincos(x, &s, &c); sincos(x, &s, &c);
printf("SINCOS %10.18f %10.18f\n", s, c); printf("SINCOS %10.18f %10.18f\n", check_nan(s), check_nan(c));
for(int j=0;j<2;j++) { for(int j=0;j<2;j++) {
for (int i = 1; i < 4; i++) { for (int i = 1; i < 4; i++) {
@ -372,17 +420,21 @@ int main() {
// } // }
for (int32_t x = -1; x; x <<= 1) { for (int32_t x = -1; x; x <<= 1) {
printf("i %d->%f\n", x, (double) x); printf("i %d->%f\n", x, (double) x);
check1(__aeabi_i2d, x);
} }
for (int32_t x = 1; x; x <<= 1) { for (int32_t x = 1; x; x <<= 1) {
printf("i %d->%f\n", x, (double) x); printf("i %d->%f\n", x, (double) x);
check1(__aeabi_i2d, x);
y = x << 1; y = x << 1;
} }
for (int64_t x = 1; x; x <<= 1) { for (int64_t x = 1; x; x <<= 1) {
printf("i %lld->%f\n", x, (double) x); printf("i %lld->%f\n", x, (double) x);
check1(__aeabi_l2d, x);
y = x << 1; y = x << 1;
} }
for (int64_t x = -1; x; x <<= 1) { for (int64_t x = -1; x; x <<= 1) {
printf("i %lld->%f\n", x, (double) x); printf("i %lld->%f\n", x, (double) x);
check1(__aeabi_l2d, x);
y = x << 1; y = x << 1;
} }
printf("d %d->%f\n", y, (float) y); printf("d %d->%f\n", y, (float) y);
@ -392,24 +444,40 @@ int main() {
uint32_t y; uint32_t y;
for(uint32_t x = 1; x; x <<= 1) { for(uint32_t x = 1; x; x <<= 1) {
printf("u %u->%f\n", x, (double)x); printf("u %u->%f\n", x, (double)x);
check1(__aeabi_ui2d, x);
y = x << 1; y = x << 1;
} }
printf("u %u->%f\n", y, (double)y); printf("u %u->%f\n", y, (double)y);
} }
for(int64_t x = 1; x !=0; x <<= 1u) { for(int64_t x = 1; x !=0; x <<= 1u) {
printf("%lld->%f\n", x, (double)x); printf("%lld->%f\n", x, (double)x);
check1(__aeabi_l2d, x);
} }
for(double x = -4294967296.f * 4294967296.f; x<=-0.5f; x/=2.f) { for(double x = -4294967296.f * 4294967296.f * 2.f; x<=-0.5f; x/=2.f) {
printf("d2i64 %f->%lld\n", x, (int64_t)x); printf("d2i64 %f->%lld\n", x, (int64_t)x);
if (x < INT64_MIN) {
// seems like there is a bug in the gcc version!
assert(__aeabi_d2lz(x) == INT64_MIN);
} else {
check1(__aeabi_d2lz, x);
}
} }
for(double x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) { for(double x = 4294967296.f * 4294967296.f * 2.f; x>=0.5f; x/=2.f) {
printf("d2i64 %f->%lld\n", x, (int64_t)x); printf("d2i64 %f->%lld\n", x, (int64_t)x);
if (x >= INT64_MAX) {
// seems like there is a bug in the gcc version!
assert(__aeabi_d2lz(x) == INT64_MAX);
} else {
check1(__aeabi_d2lz, x);
}
} }
for(double x = -4294967296.f * 4294967296.f; x<=-0.5f; x/=2.f) { for(double x = -4294967296.f * 4294967296.f; x<=-0.5f; x/=2.f) {
printf("d2i32 %f->%d\n", x, (int32_t)x); printf("d2i32 %f->%d\n", x, (int32_t)x);
check1(__aeabi_d2iz, x);
} }
for(double x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) { for(double x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) {
printf("d2i32 %f->%d\n", x, (int32_t)x); printf("d2i32 %f->%d\n", x, (int32_t)x);
check1(__aeabi_d2iz, x);
} }
for (double x = 1; x < 11; x += 2) { for (double x = 1; x < 11; x += 2) {
@ -417,6 +485,8 @@ int main() {
double g = 1.0 / x; double g = 1.0 / x;
printf("%g %10.18g %10.18g, %10.18g, %10.18g %10.18g\n", x, f, x + 0.37777777777777777777777777777, printf("%g %10.18g %10.18g, %10.18g, %10.18g %10.18g\n", x, f, x + 0.37777777777777777777777777777,
x - 0.377777777777777777777777777777, g, 123456789.0 / x); x - 0.377777777777777777777777777777, g, 123456789.0 / x);
check2(__aeabi_dmul, x, x);
check2(__aeabi_ddiv, 1.0, x);
} }
if (fail || if (fail ||

View File

@ -16,7 +16,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <math.h> #include <math.h>
#include <pico/float.h> #include <pico/float.h>
//#include <pico/float.h>
#include "pico/stdlib.h" #include "pico/stdlib.h"
#include "inttypes.h" #include "inttypes.h"
@ -283,12 +282,58 @@ int test_fcmpun() {
return 0; return 0;
} }
#define assert_nan(a) assert(isnan(a))
#define check_nan(a) ({ assert_nan(a); a; })
float __aeabi_i2f(int32_t);
float __aeabi_ui2f(int32_t);
float __aeabi_l2f(int64_t);
float __aeabi_ul2f(int64_t);
int32_t __aeabi_f2iz(float);
int64_t __aeabi_f2lz(float);
float __aeabi_fmul(float, float);
float __aeabi_fdiv(float, float);
#if LIB_PICO_FLOAT_PICO
float __real___aeabi_i2f(int);
float __real___aeabi_ui2f(int);
float __real___aeabi_l2f(int64_t);
float __real___aeabi_ul2f(int64_t);
float __real___aeabi_fmul(float, float);
float __real___aeabi_fdiv(float, float);
int32_t __real___aeabi_f2iz(float);
int64_t __real___aeabi_f2lz(float);
float __real_sqrtf(float);
float __real_cosf(float);
float __real_sinf(float);
float __real_tanf(float);
float __real_expf(float);
float __real_logf(float);
float __real_atan2f(float, float);
float __real_powf(float, float);
float __real_truncf(float);
float __real_ldexpf(float, int);
float __real_fmodf(float, float);
#define EPSILON 1e-9
#define assert_close(a, b) assert(((b - a) < EPSILON || (a - b) < EPSILON) || (isinf(a) && isinf(b) && (a < 0) == (b < 0)))
#define check1(func,p0) ({ typeof(p0) r = func(p0), r2 = __CONCAT(__real_, func)(p0); assert(r == r2); r; })
#define check2(func,p0,p1) ({ typeof(p0) r = func(p0,p1), r2 = __CONCAT(__real_, func)(p0,p1); assert(r == r2); r; })
#define check_close1(func,p0) ({ typeof(p0) r = func(p0), r2 = __CONCAT(__real_, func)(p0); if (isnan(p0)) assert_nan(r); else assert_close(r, r2); r; })
#define check_close2(func,p0,p1) ({ typeof(p0) r = func(p0,p1), r2 = __CONCAT(__real_, func)(p0,p1); if (isnan(p0) || isnan(p1)) assert_nan(r); else assert_close(r, r2); r; })
#else
#define check1(func,p0) func(p0)
#define check2(func,p0,p1) func(p0,p1)
#define check_close1(func,p0) func(p0)
#define check_close2(func,p0,p1) func(p0,p1)
#endif
double aa = 0.5; double aa = 0.5;
double bb = 1; double bb = 1;
int main() { int main() {
setup_default_uart(); setup_default_uart();
bool fail = false;
printf("%d\n", aa < bb); printf("%d\n", aa < bb);
for(float a = -1; a <= 1; a++) { for(float a = -1; a <= 1; a++) {
for(float b = -1; b <= 1; b++) { for(float b = -1; b <= 1; b++) {
@ -341,21 +386,27 @@ int main() {
#if 1 #if 1
for (float x = 0; x < 3; x++) { for (float x = 0; x < 3; x++) {
printf("\n ----- %f\n", x); printf("\n ----- %f\n", x);
printf("FSQRT %10.18f\n", sqrtf(x)); printf("FSQRT %10.18f\n", check_close1(sqrtf, x));
printf("FCOS %10.18f\n", cosf(x)); printf("FCOS %10.18f\n", check_close1(cosf, x));
printf("FSIN %10.18f\n", sinf(x)); printf("FSIN %10.18f\n", check_close1(sinf, x));
float s, c; float s, c;
sincosf(x, &s, &c); sincosf(x, &s, &c);
printf("FSINCOS %10.18f %10.18f\n", s, c); printf("FSINCOS %10.18f %10.18f\n", s, c);
printf("FTAN %10.18f\n", tanf(x)); printf("FTAN %10.18f\n", check_close1(tanf, x));
printf("FATAN2 %10.18f\n", atan2f(x, 10)); printf("FATAN2 %10.18f\n", check_close2(atan2f, x, 10.f));
printf("FATAN2 %10.18f\n", atan2f(10, x)); printf("FATAN2 %10.18f\n", check_close2(atan2f, 10.f, x));
printf("FEXP %10.18f\n", expf(x)); printf("FEXP %10.18f\n", check_close1(expf, x));
printf("FLN %10.18f\n", logf(x)); printf("FLN %10.18f\n", check_close1(logf, x));
printf("POWF %10.18f\n", powf(x, x)); printf("POWF %10.18f\n", check_close2(powf, x, x));
printf("TRUNCF %10.18f\n", truncf(x)); printf("TRUNCF %10.18f\n", check_close1(truncf, x));
printf("LDEXPF %10.18f\n", ldexpf(x, x)); printf("LDEXPF %10.18f\n", check_close2(ldexpf, x, x));
printf("FMODF %10.18f\n", fmodf(x, 3.0f)); printf("FMODF %10.18f\n", check_close2(fmodf, x, 3.0f));
sincosf(x, &s, &c);
printf("SINCOS %10.18f %10.18f\n", s, c);
if (s != sin(x) || c != cos(x)) {
printf("SINCOS mismatch\n");
fail = true;
}
} }
for (double x = 0; x < 3; x++) { for (double x = 0; x < 3; x++) {
@ -390,18 +441,25 @@ int main() {
// sincosf(x, &s, &c); // sincosf(x, &s, &c);
printf("FSINCOS %10.18f %10.18f\n", s, c); printf("FSINCOS %10.18f %10.18f\n", s, c);
for(int i=1; i<4; i++) { for(int j=0;j<2;j++) {
char buf[4]; for (int i = 1; i < 4; i++) {
sprintf(buf, "%d", i); char buf[4];
float f0 = -nanf(buf); sprintf(buf, "%d", i);
double d0 = -nan(buf); float f0 = -nanf(buf);
// hmm double d0 = -nan(buf);
*(uint64_t *)&d0 |= i; // hmm nanf/nan seem to ignore payload
*(uint32_t *)&f0 |= i; *(uint64_t *) &d0 |= i;
float f = (float)d0; *(uint32_t *) &f0 |= i;
double d = (double)f0; if (j) {
printf("f2d %08"PRIx32" -> %g %016"PRIx64"\n", *(uint32_t*)&f0, d, *(uint64_t*)&d); // try without top bit set
printf("d2f %016"PRIx64" -> %f %08"PRIx32"\n", *(uint64_t*)&d0, f, *(uint32_t*)&f); *(uint64_t *) &d0 &= ~0x0008000000000000ull;
*(uint32_t *) &f0 &= ~0x00400000u;
}
float f = (float) d0;
double d = (double) f0;
printf("f2d %f %08"PRIx32" -> %g %016"PRIx64"\n", f0, *(uint32_t *) &f0, d, *(uint64_t *) &d);
printf("d2f %f %016"PRIx64" -> %f %08"PRIx32"\n", d0, *(uint64_t *) &d0, f, *(uint32_t *) &f);
}
} }
} }
#endif #endif
@ -413,17 +471,21 @@ int main() {
// } // }
for (int32_t x = -1; x; x <<= 1) { for (int32_t x = -1; x; x <<= 1) {
printf("i %d->%f\n", x, (float) x); printf("i %d->%f\n", x, (float) x);
check1(__aeabi_i2f, x);
} }
for (int32_t x = 1; x; x <<= 1) { for (int32_t x = 1; x; x <<= 1) {
printf("i %d->%f\n", x, (float) x); printf("i %d->%f\n", x, (float) x);
check1(__aeabi_i2f, x);
y = x << 1; y = x << 1;
} }
for (int64_t x = 1; x; x <<= 1) { for (int64_t x = 1; x; x <<= 1) {
printf("i %lld->%f\n", x, (float) x); printf("i %lld->%f\n", x, (float) x);
check1(__aeabi_l2f, x);
y = x << 1; y = x << 1;
} }
for (int64_t x = -1; x; x <<= 1) { for (int64_t x = -1; x; x <<= 1) {
printf("i %lld->%f\n", x, (float) x); printf("i %lld->%f\n", x, (float) x);
check1(__aeabi_l2f, x);
y = x << 1; y = x << 1;
} }
printf("d %d->%f\n", y, (float) y); printf("d %d->%f\n", y, (float) y);
@ -433,40 +495,63 @@ int main() {
uint32_t y; uint32_t y;
for(uint32_t x = 1; x; x <<= 1) { for(uint32_t x = 1; x; x <<= 1) {
printf("u %u->%f\n", x, (float)x); printf("u %u->%f\n", x, (float)x);
check1(__aeabi_ui2f, x);
y = x << 1; y = x << 1;
} }
printf("u %u->%f\n", y, (float)y); printf("u %u->%f\n", y, (float)y);
} }
for(int64_t x = 1; x !=0; x <<= 1u) { for(int64_t x = 1; x !=0; x <<= 1u) {
printf("%lld->%f\n", x, (float)x); printf("%lld->%f\n", x, (float)x);
check1(__aeabi_l2f, x);
}
for(float x = -4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) {
printf("f %f->%lld\n", x, (int64_t)x);
if (x < INT64_MIN) {
// seems like there is a bug in the gcc version!
assert(__aeabi_f2lz(x) == INT64_MIN);
} else {
check1(__aeabi_f2lz, x);
}
}
for(float x = 4294967296.f * 4294967296.f * 2.f; x>=0.5f; x/=2.f) {
printf("f2i64 %f->%lld\n", x, (int64_t)x);
if (x >= INT64_MAX) {
// seems like there is a bug in the gcc version!
assert(__aeabi_f2lz(x) == INT64_MAX);
} else {
check1(__aeabi_f2lz, x);
}
}
for(float x = -4294967296.f * 4294967296.f; x<=-0.5f; x/=2.f) {
printf("d2i32 %f->%d\n", x, (int32_t)x);
check1(__aeabi_f2iz, x);
} }
for(float x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) { for(float x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) {
printf("f %f->%lld\n", x, (int64_t)x); printf("d2i32 %f->%d\n", x, (int32_t)x);
check1(__aeabi_f2iz, x);
} }
for (double x = 1; x < 11; x += 2) {
double f = x * x; for (float x = 1; x < 11; x += 2) {
double g = 1.0 / x; float f = x * x;
printf("%g %10.18g %10.18g, %10.18g, %10.18g %10.18g\n", x, f, x + 0.37777777777777777777777777777, float g = 1.0f / x;
x - 0.377777777777777777777777777777, g, 123456789.0 / x); printf("%g %10.18g %10.18g, %10.18g, %10.18g %10.18g\n", x, f, x + 0.37777777777777777777777777777f,
x - 0.377777777777777777777777777777f, g, 123456789.0f / x);
check2(__aeabi_fmul, x, x);
check2(__aeabi_fdiv, 1.0f, x);
} }
if (test_cfcmpeq() || test_cfcmple() ||
test_fcmpun() || test_cmple_gt() || test_cmplt_ge()) { if (fail ||
test_cfcmpeq() ||
test_cfcmple() ||
test_fcmpun() ||
test_cmple_gt() ||
test_cmplt_ge()) {
printf("FAILED\n"); printf("FAILED\n");
return 1; return 1;
} else { } else {
printf("PASSED\n"); printf("PASSED\n");
return 0; return 0;
} }
if (test_cfcmpeq() || test_cfcmple() ||
test_fcmpun() || test_cmple_gt() || test_cmplt_ge()) {
printf("FAILED\n");
return 1;
} else {
printf("PASSED\n");
return 0;
}
#endif #endif
} }