From 574fdee37b1ca5bfaa32716c9391179dcbdaf523 Mon Sep 17 00:00:00 2001 From: Graham Sanderson Date: Thu, 13 May 2021 07:38:42 -0500 Subject: [PATCH] Fixup divider save_restore for floating point too; improve tests (#405) - The divider state needs to be saved for __aeabi_ddiv, __aeabi_fdiv, __aeabi_dtan and __aeabi_ftan or they won't work in interrupts *(probably not used much youd hope), or on an RTOS context switch - Refactored code out for the integer and floating point cases - Improved the floating point 'tests' in passing to check more return values against GCC implementations - Added floating point usage to the IRQ nesting test case --- .../include/hardware/divider_helper.S | 68 +++++++ src/rp2_common/pico_divider/divider.S | 82 ++------- src/rp2_common/pico_double/double_aeabi.S | 78 +++++++- src/rp2_common/pico_float/float_aeabi.S | 63 ++++++- test/pico_divider_test/CMakeLists.txt | 2 +- .../pico_divider_nesting_test.c | 71 +++++++- test/pico_float_test/CMakeLists.txt | 6 +- test/pico_float_test/pico_double_test.c | 118 +++++++++--- test/pico_float_test/pico_float_test.c | 171 +++++++++++++----- 9 files changed, 501 insertions(+), 158 deletions(-) create mode 100644 src/rp2_common/hardware_divider/include/hardware/divider_helper.S diff --git a/src/rp2_common/hardware_divider/include/hardware/divider_helper.S b/src/rp2_common/hardware_divider/include/hardware/divider_helper.S new file mode 100644 index 0000000..062e12d --- /dev/null +++ b/src/rp2_common/hardware_divider/include/hardware/divider_helper.S @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "hardware/regs/addressmap.h" +#include "hardware/regs/sio.h" + +#if SIO_DIV_CSR_READY_LSB == 0 +.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1 +#else +need to change SHIFT above +#endif +#if SIO_DIV_CSR_DIRTY_LSB == 1 +.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2 +#else +need to change SHIFT above +#endif + +// SIO_BASE ptr in r2; pushes r4-r7, lr to stack +// requires that division started at least 2 cycles prior to the start of the macro +.macro save_div_state_and_lr +// originally we did this, however a) it uses r3, and b) the push takes 6 cycles, b) +// any IRQ which uses the divider will necessarily put the data back, which will +// immediately make it ready +// +// // ldr r3, [r2, #SIO_DIV_CSR_OFFSET] +// // // wait for results as we can't save signed-ness of operation +// // 1: +// // lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY +// // bcc 1b + +// 6 cycles +push {r4, r5, r6, r7, lr} +// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia! +ldr r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET] +ldr r5, [r2, #SIO_DIV_UDIVISOR_OFFSET] +ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET] +ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET] +.endm + +// restores divider state from r4-r7, then pops them and pc +.macro restore_div_state_and_return +// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order +// +// it is worth considering what happens if we are interrupted +// +// after writing r4: we are DIRTY and !READY +// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be +// saved/restored correctly and we'll restore the rest ourselves +// after writing r4, r5: we are DIRTY and !READY +// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor +// at least will be saved/restored correctly and and we'll restore the rest ourselves +// after writing r4, r5, r6: we are DIRTY and READY +// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves), +// and we'll restore the remainder after the fact + +// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space +// and so 4 reads is cheaper (and we don't have to adjust r2) +// note also, that we must restore via UDIVI* rather than SDIVI* to prevent the quotient/remainder being negated on read based +// on the signs of the inputs +str r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET] +str r5, [r2, #SIO_DIV_UDIVISOR_OFFSET] +str r7, [r2, #SIO_DIV_REMAINDER_OFFSET] +str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET] +pop {r4, r5, r6, r7, pc} +.endm \ No newline at end of file diff --git a/src/rp2_common/pico_divider/divider.S b/src/rp2_common/pico_divider/divider.S index 8112681..ac67a5e 100644 --- a/src/rp2_common/pico_divider/divider.S +++ b/src/rp2_common/pico_divider/divider.S @@ -4,8 +4,8 @@ * SPDX-License-Identifier: BSD-3-Clause */ -#include "hardware/regs/sio.h" #include "hardware/regs/addressmap.h" +#include "hardware/divider_helper.S" .syntax unified .cpu cortex-m0plus @@ -34,17 +34,6 @@ #endif .endm -#if SIO_DIV_CSR_READY_LSB == 0 -.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1 -#else -need to change SHIFT above -#endif -#if SIO_DIV_CSR_DIRTY_LSB == 1 -.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2 -#else -need to change SHIFT above -#endif - @ wait 8-n cycles for the hardware divider .macro wait_div n .rept (8-\n) / 2 @@ -56,58 +45,17 @@ need to change SHIFT above .endif .endm - #if (SIO_DIV_SDIVISOR_OFFSET != SIO_DIV_SDIVIDEND_OFFSET + 4) || (SIO_DIV_QUOTIENT_OFFSET != SIO_DIV_SDIVISOR_OFFSET + 4) || (SIO_DIV_REMAINDER_OFFSET != SIO_DIV_QUOTIENT_OFFSET + 4) #error register layout has changed - we rely on this order to make sure we save/restore in the right order #endif #if !PICO_DIVIDER_DISABLE_INTERRUPTS - -# SIO_BASE ptr in r2 -.macro save_div_state_and_lr - ldr r3, [r2, #SIO_DIV_CSR_OFFSET] - # wait for results as we can't save signed-ness of operation -1: - lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY - bcc 1b - push {r4, r5, r6, r7, lr} - // note we must read quotient last, and since it isn't the last reg, we'll not use ldmia! - ldr r4, [r2, #SIO_DIV_SDIVIDEND_OFFSET] - ldr r5, [r2, #SIO_DIV_SDIVISOR_OFFSET] - ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET] - ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET] -.endm - -.macro restore_div_state_and_return - // writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order - // - // it is worth considering what happens if we are interrupted - // - // after writing r4: we are DIRTY and !READY - // ... interruptor using div will complete based on incorrect inputs, but dividend at least will be - // saved/restored correctly and we'll restore the rest ourselves - // after writing r4, r5: we are DIRTY and !READY - // ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor - // at least will be saved/restored correctly and and we'll restore the rest ourselves - // after writing r4, r5, r6: we are DIRTY and READY - // ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves), - // and we'll restore the remainder after the fact - - // note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space - // and so 4 reads is cheaper (and we don't have to adjust r2) - str r4, [r2, #SIO_DIV_SDIVIDEND_OFFSET] - str r5, [r2, #SIO_DIV_SDIVISOR_OFFSET] - str r7, [r2, #SIO_DIV_REMAINDER_OFFSET] - str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET] - pop {r4, r5, r6, r7, pc} -.endm - .macro save_div_state_and_lr_64 push {r4, r5, r6, r7, lr} ldr r6, =SIO_BASE 1: ldr r5, [r6, #SIO_DIV_CSR_OFFSET] - # wait for results as we can't save signed-ness of operation + // wait for results as we can't save signed-ness of operation lsrs r5, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY bcc 1b // note we must read quotient last, and since it isn't the last reg, we'll not use ldmia! @@ -154,17 +102,18 @@ wrapper_func __aeabi_idivmod regular_func div_s32s32 regular_func divmod_s32s32 #if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty ldr r2, =(SIO_BASE) - # to support IRQ usage we must save/restore ldr r3, [r2, #SIO_DIV_CSR_OFFSET] lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY bcs divmod_s32s32_savestate regular_func divmod_s32s32_unsafe #else -# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs -# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there -# are the hardware_divider functions that can be used instead anyway +// to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs +// in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there +// are the hardware_divider functions that can be used instead anyway regular_func divmod_s32s32_unsafe + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call ldr r2, =(SIO_BASE) mrs r3, PRIMASK cpsid i @@ -203,6 +152,8 @@ regular_func divmod_s32s32_unsafe #if !PICO_DIVIDER_DISABLE_INTERRUPTS .align 2 regular_func divmod_s32s32_savestate + // note that we must be at least 2 cycles into division at this point, + // which we are because of the firty check before getting here (and of course the function call before that) save_div_state_and_lr bl divmod_s32s32_unsafe restore_div_state_and_return @@ -215,17 +166,18 @@ regular_func divmod_u32u32 wrapper_func __aeabi_uidiv wrapper_func __aeabi_uidivmod #if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty ldr r2, =(SIO_BASE) - # to support IRQ usage we must save/restore ldr r3, [r2, #SIO_DIV_CSR_OFFSET] lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY bcs divmod_u32u32_savestate regular_func divmod_u32u32_unsafe #else -# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs -# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there -# are the hardware_divider functions that can be used instead anyway +// to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs +// in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there +// are the hardware_divider functions that can be used instead anyway regular_func divmod_u32u32_unsafe + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call ldr r2, =(SIO_BASE) mrs r3, PRIMASK cpsid i @@ -273,9 +225,9 @@ wrapper_func __aeabi_ldivmod regular_func div_s64s64 regular_func divmod_s64s64 #if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty mov ip, r2 ldr r2, =(SIO_BASE) - # to support IRQ usage we must save/restore ldr r2, [r2, #SIO_DIV_CSR_OFFSET] lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY mov r2, ip @@ -287,6 +239,7 @@ divmod_s64s64_savestate: bl divmod_s64s64_unsafe restore_div_state_and_return_64 #else + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call push {r4, lr} mrs r4, PRIMASK cpsid i @@ -300,9 +253,9 @@ wrapper_func __aeabi_uldivmod regular_func div_u64u64 regular_func divmod_u64u64 #if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty mov ip, r2 ldr r2, =(SIO_BASE) - # to support IRQ usage we must save/restore ldr r2, [r2, #SIO_DIV_CSR_OFFSET] lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY mov r2, ip @@ -314,6 +267,7 @@ regular_func divmod_u64u64_savestate bl divmod_u64u64_unsafe restore_div_state_and_return_64 #else + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call push {r4, lr} mrs r4, PRIMASK cpsid i diff --git a/src/rp2_common/pico_double/double_aeabi.S b/src/rp2_common/pico_double/double_aeabi.S index 1e1250f..a871e43 100644 --- a/src/rp2_common/pico_double/double_aeabi.S +++ b/src/rp2_common/pico_double/double_aeabi.S @@ -6,6 +6,7 @@ #include "pico/asm_helper.S" #include "pico/bootrom/sf_table.h" +#include "hardware/divider_helper.S" __pre_init __aeabi_double_init, 00020 @@ -131,16 +132,16 @@ regular_func pop_r8_r11 mov r11,r7 bx r14 -# note generally each function is in a separate section unless there is fall thru or branching between them -# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool +// note generally each function is in a separate section unless there is fall thru or branching between them +// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool -# note functions are word aligned except where they are an odd number of linear instructions +// note functions are word aligned except where they are an odd number of linear instructions // double FUNC_NAME(__aeabi_dadd)(double, double) double-precision addition double_wrapper_section __aeabi_darithmetic // double FUNC_NAME(__aeabi_drsub)(double x, double y) double-precision reverse subtraction, y - x -# frsub first because it is the only one that needs alignment +// frsub first because it is the only one that needs alignment .align 2 wrapper_func __aeabi_drsub eors r0, r1 @@ -177,7 +178,35 @@ wrapper_func_d2 __aeabi_ddiv b ddiv_dsub_nan_helper 1: #endif - shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim +#if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty + mov ip, r2 + ldr r2, =(SIO_BASE) + ldr r2, [r2, #SIO_DIV_CSR_OFFSET] + lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY + bcs ddiv_save_state + mov r2, ip +#else + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call + push {r4, lr} + mrs r4, PRIMASK + cpsid i + bl ddiv_shim_call + msr PRIMASK, r4 + pop {r4, pc} +#endif +ddiv_shim_call: + shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim + +#if !PICO_DIVIDER_DISABLE_INTERRUPTS +ddiv_save_state: + ldr r2, =(SIO_BASE) + save_div_state_and_lr + mov r2, ip + bl ddiv_shim_call + ldr r2, =(SIO_BASE) + restore_div_state_and_return +#endif ddiv_dsub_nan_helper: #if PICO_DOUBLE_PROPAGATE_NANS @@ -592,6 +621,8 @@ regular_func sincostan_remainder ldr r2, =0x54442D18 // 2 * M_PI ldr r3, =0x401921FB push {lr} + // note remainder only uses the divider thru integer divider functions + // which save and restore themselves bl remainder pop {pc} @@ -752,13 +783,40 @@ double_wrapper_section tan wrapper_func tan // rom version only works for -1024 < angle < 1024 lsls r2, r1, #2 - bcc 1f + bcc dtan_in_range lsrs r2, #22 cmp r2, #9 - bge 2f -1: + bge dtan_angle_out_of_range +dtan_in_range: +#if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty + mov ip, r2 + ldr r2, =(SIO_BASE) + ldr r2, [r2, #SIO_DIV_CSR_OFFSET] + lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY + bcs dtan_save_state + mov r2, ip +#else + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call + push {r4, lr} + mrs r4, PRIMASK + cpsid i + bl dtan_shim_call + msr PRIMASK, r4 + pop {r4, pc} +#endif +dtan_shim_call: shimmable_table_tail_call SF_TABLE_FTAN dtan_shim -2: +#if !PICO_DIVIDER_DISABLE_INTERRUPTS +dtan_save_state: + ldr r2, =(SIO_BASE) + save_div_state_and_lr + mov r2, ip + bl dtan_shim_call + ldr r2, =(SIO_BASE) + restore_div_state_and_return +#endif +dtan_angle_out_of_range: #if PICO_DOUBLE_PROPAGATE_NANS lsls r2, r1, #1 asrs r2, #21 @@ -775,7 +833,7 @@ wrapper_func tan bl sincostan_remainder pop {r2} mov lr, r2 - b 1b + b dtan_in_range double_wrapper_section atan2 wrapper_func_d2 atan2 diff --git a/src/rp2_common/pico_float/float_aeabi.S b/src/rp2_common/pico_float/float_aeabi.S index 2aee5f2..b901d30 100644 --- a/src/rp2_common/pico_float/float_aeabi.S +++ b/src/rp2_common/pico_float/float_aeabi.S @@ -6,6 +6,7 @@ #include "pico/asm_helper.S" #include "pico/bootrom/sf_table.h" +#include "hardware/divider_helper.S" __pre_init __aeabi_float_init, 00020 @@ -104,16 +105,16 @@ __check_nan_f2: .endm -# note generally each function is in a separate section unless there is fall thru or branching between them -# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool +// note generally each function is in a separate section unless there is fall thru or branching between them +// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool -# note functions are word aligned except where they are an odd number of linear instructions +// note functions are word aligned except where they are an odd number of linear instructions // float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition float_wrapper_section __aeabi_farithmetic // float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x -# frsub first because it is the only one that needs alignment +// frsub first because it is the only one that needs alignment .align 2 wrapper_func __aeabi_frsub eors r0, r1 @@ -146,7 +147,30 @@ wrapper_func_f2 __aeabi_fdiv b fdiv_fsub_nan_helper 1: #endif +#if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty + ldr r2, =(SIO_BASE) + ldr r3, [r2, #SIO_DIV_CSR_OFFSET] + lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY + bcs fdiv_save_state +#else + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call + push {r4, lr} + mrs r4, PRIMASK + cpsid i + bl fdiv_shim_call + msr PRIMASK, r4 + pop {r4, pc} +#endif +fdiv_shim_call: table_tail_call SF_TABLE_FDIV +#if !PICO_DIVIDER_DISABLE_INTERRUPTS +fdiv_save_state: + save_div_state_and_lr + bl fdiv_shim_call + ldr r2, =(SIO_BASE) + restore_div_state_and_return +#endif fdiv_fsub_nan_helper: #if PICO_FLOAT_PROPAGATE_NANS @@ -689,10 +713,33 @@ wrapper_func tanf lsls r1, r0, #1 lsrs r1, #24 cmp r1, #127 + 7 - bge 1f -2: + bge ftan_out_of_range +ftan_in_range: +#if !PICO_DIVIDER_DISABLE_INTERRUPTS + // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty + ldr r2, =(SIO_BASE) + ldr r3, [r2, #SIO_DIV_CSR_OFFSET] + lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY + bcs ftan_save_state +#else + // to avoid worrying about IRQs (or context switches), simply disable interrupts around call + push {r4, lr} + mrs r4, PRIMASK + cpsid i + bl ftan_shim_call + msr PRIMASK, r4 + pop {r4, pc} +#endif +ftan_shim_call: table_tail_call SF_TABLE_FTAN -1: +#if !PICO_DIVIDER_DISABLE_INTERRUPTS +ftan_save_state: + save_div_state_and_lr + bl ftan_shim_call + ldr r2, =(SIO_BASE) + restore_div_state_and_return +#endif +ftan_out_of_range: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r1, #255 @@ -709,7 +756,7 @@ wrapper_func tanf bl remainderf pop {r1} mov lr, r1 - b 2b + b ftan_in_range float_wrapper_section atan2f wrapper_func_f2 atan2f diff --git a/test/pico_divider_test/CMakeLists.txt b/test/pico_divider_test/CMakeLists.txt index c1ee628..d2ab012 100644 --- a/test/pico_divider_test/CMakeLists.txt +++ b/test/pico_divider_test/CMakeLists.txt @@ -12,7 +12,7 @@ if (PICO_ON_DEVICE) pico_add_extra_outputs(pico_divider_test) target_compile_definitions(pico_divider_test PRIVATE - PICO_DIVIDER_DISABLE_INTERRUPTS=1 +# PICO_DIVIDER_DISABLE_INTERRUPTS=1 # TURBO ) diff --git a/test/pico_divider_test/pico_divider_nesting_test.c b/test/pico_divider_test/pico_divider_nesting_test.c index 4224181..0e0db94 100644 --- a/test/pico_divider_test/pico_divider_nesting_test.c +++ b/test/pico_divider_test/pico_divider_nesting_test.c @@ -15,6 +15,9 @@ volatile bool failed; volatile uint32_t count[3]; volatile bool done; +#define FAILED() ({ failed = true; }) +//#define FAILED() ({ failed = true; __breakpoint(); }) + bool timer_callback(repeating_timer_t *t) { count[0]++; static int z; @@ -23,9 +26,27 @@ bool timer_callback(repeating_timer_t *t) { int a = z / 7; int b = z % 7; if (z != a * 7 + b) { - failed = true; + FAILED(); + } + a = z / -7; + b = z % -7; + if (z != a * -7 + b) { + FAILED(); } } + float fz = z; + float fa = fz / 11.0f; + float fb = fmodf(fz, 11.0f); + if (fabsf(fz - (fa * 11.0 + fb) > 1e-9)) { + FAILED(); + } + double dz = z; + double da = dz / 11.0; + double db = fmod(dz, 11.0); + if (fabsf(dz - (da * 11.0 + db) > 1e-9)) { + FAILED(); + } + return !done; } @@ -41,16 +62,20 @@ void do_dma_start(uint ch) { dma_channel_configure(ch, &c, &word[ch], &word[ch], 513 + ch * 23, true); } +double d0c, d0s, d0t, dz; +float f0c, f0s, f0t, fz; + void test_irq_handler0() { count[1]++; dma_hw->ints0 |= 1u; static uint z; + static uint dz; for (int i=0; i<80;i++) { z += 31; uint a = z / 11; uint b = z % 11; if (z != a * 11 + b) { - failed = true; + FAILED(); } } if (done) dma_channel_abort(0); @@ -66,16 +91,17 @@ void test_irq_handler1() { uint a = z / -13; uint b = z % -13; if (z != a * -13 + b) { - failed = true; + FAILED(); } static uint64_t z64; z64 -= 47; uint64_t a64 = z64 / -13; uint64_t b64 = z64 % -13; if (z64 != a64 * -13 + b64) { - failed = true; + FAILED(); } } + if (done) dma_channel_abort(1); else do_dma_start(1); } @@ -89,7 +115,7 @@ void test_nesting() { // They all busily make use of the dividers, to expose any issues with nested use repeating_timer_t timer; - add_repeating_timer_us(529, timer_callback, NULL, &timer); + add_repeating_timer_us(929, timer_callback, NULL, &timer); irq_set_exclusive_handler(DMA_IRQ_0, test_irq_handler0); irq_set_exclusive_handler(DMA_IRQ_1, test_irq_handler1); @@ -101,7 +127,7 @@ void test_nesting() { irq_set_enabled(DMA_IRQ_1, 1); do_dma_start(0); do_dma_start(1); - absolute_time_t end = delayed_by_ms(get_absolute_time(), 2000); + absolute_time_t end = delayed_by_ms(get_absolute_time(), 10000); int count_local=0; while (!time_reached(end)) { for(uint i=0;i<100;i++) { @@ -109,8 +135,39 @@ void test_nesting() { uint a = z / 11; uint b = z % 11; if (z != a * 11 + b) { - failed = true; + FAILED(); } + int zz = (int)z; + int aa = zz / -11; + int bb = zz % -11; + if (zz != aa * -11 + bb) { + FAILED(); + } + aa = -zz / -11; + bb = -zz % -11; + if (-zz != aa * -11 + bb) { + FAILED(); + } + aa = -zz / 11; + bb = -zz % 11; + if (-zz != aa * 11 + bb) { + FAILED(); + } + a = 0xffffffffu / 11; + b = 0xffffffffu % 11; + if (0xffffffffu != a * 11 + b) { + FAILED(); + } + } + // these use the divider + for(uint i=0;i<=100;i+=20) { + // both in and out bootrom range (we perform mod in wrapper code if necessarry) + f0t = tanf(i * 50); + f0c = cosf(i * 50); + f0s = sinf(i * 50); + d0t = tan(i * 1000); + d0c = cos(i * 1000); + d0s = sin(i * 1000); } count_local++; } diff --git a/test/pico_float_test/CMakeLists.txt b/test/pico_float_test/CMakeLists.txt index 0ff1f60..26204d3 100644 --- a/test/pico_float_test/CMakeLists.txt +++ b/test/pico_float_test/CMakeLists.txt @@ -11,15 +11,19 @@ add_executable(pico_double_test ) +#todo split out variants with different flags target_compile_definitions(pico_float_test PRIVATE PICO_USE_CRT_PRINTF=1 # want full precision output # PICO_FLOAT_PROPAGATE_NANS=1 +# PICO_DIVIDER_DISABLE_INTERRUPTS=1 ) +#todo split out variants with different flags target_compile_definitions(pico_double_test PRIVATE PICO_USE_CRT_PRINTF=1 # want full precision output PICO_FLOAT_PROPAGATE_NANS=1 - PICO_DOUBLE_PROPAGATE_NANS=1 + #PICO_DOUBLE_PROPAGATE_NANS=1 + #PICO_DIVIDER_DISABLE_INTERRUPTS=1 ) # handy for testing we aren't pulling in extra stuff diff --git a/test/pico_float_test/pico_double_test.c b/test/pico_float_test/pico_double_test.c index 708ab25..6c095e7 100644 --- a/test/pico_float_test/pico_double_test.c +++ b/test/pico_float_test/pico_double_test.c @@ -282,6 +282,51 @@ int test_dcmpun() { return 0; } +#define assert_nan(a) assert(isnan(a)) +#define check_nan(a) ({ assert_nan(a); a; }) + +double __aeabi_i2d(int32_t); +double __aeabi_ui2d(int32_t); +double __aeabi_l2d(int64_t); +double __aeabi_ul2d(int64_t); +int32_t __aeabi_d2iz(double); +int64_t __aeabi_d2lz(double); +double __aeabi_dmul(double, double); +double __aeabi_ddiv(double, double); +#if LIB_PICO_DOUBLE_PICO +double __real___aeabi_i2d(int); +double __real___aeabi_ui2d(int); +double __real___aeabi_l2d(int64_t); +double __real___aeabi_ul2d(int64_t); +double __real___aeabi_dmul(double, double); +double __real___aeabi_ddiv(double, double); +int32_t __real___aeabi_d2iz(double); +int64_t __real___aeabi_d2lz(double); +double __real_sqrt(double); +double __real_cos(double); +double __real_sin(double); +double __real_tan(double); +double __real_exp(double); +double __real_log(double); +double __real_atan2(double, double); +double __real_pow(double, double); +double __real_trunc(double); +double __real_ldexp(double, int); +double __real_fmod(double, double); + +#define EPSILON 1e-9 +#define assert_close(a, b) assert(((b - a) < EPSILON || (a - b) < EPSILON) || (isinf(a) && isinf(b) && (a < 0) == (b < 0))) +#define check1(func,p0) ({ typeof(p0) r = func(p0), r2 = __CONCAT(__real_, func)(p0); assert(r == r2); r; }) +#define check2(func,p0,p1) ({ typeof(p0) r = func(p0,p1), r2 = __CONCAT(__real_, func)(p0,p1); assert(r == r2); r; }) +#define check_close1(func,p0) ({ typeof(p0) r = func(p0), r2 = __CONCAT(__real_, func)(p0); if (isnan(p0)) assert_nan(r); else assert_close(r, r2); r; }) +#define check_close2(func,p0,p1) ({ typeof(p0) r = func(p0,p1), r2 = __CONCAT(__real_, func)(p0,p1); if (isnan(p0) || isnan(p1)) assert_nan(r); else assert_close(r, r2); r; }) +#else +#define check1(func,p0) func(p0) +#define check2(func,p0,p1) func(p0,p1) +#define check_close1(func,p0) func(p0) +#define check_close2(func,p0,p1) func(p0,p1) +#endif + double aa = 0.5; double bb = 1; @@ -305,14 +350,18 @@ int main() { #if 1 for (double x = 0; x < 3; x++) { printf("\n ----- %g\n", x); - printf("SQRT %10.18g\n", sqrt(x)); - printf("COS %10.18g\n", cos(x)); - printf("SIN %10.18g\n", sin(x)); - printf("TAN %10.18g\n", tan(x)); - printf("ATAN2 %10.18g\n", atan2(x, 10)); - printf("ATAN2 %10.18g\n", atan2(10, x)); - printf("EXP %10.18g\n", exp(x)); - printf("LN %10.18g\n", log(x)); + printf("SQRT %10.18g\n", check_close1(sqrt, x)); + printf("COS %10.18g\n", check_close1(cos, x)); + printf("SIN %10.18g\n", check_close1(sin, x)); + printf("TAN %10.18g\n", check_close1(tan, x)); + printf("ATAN2 %10.18g\n", check_close2(atan2, x, 10.0)); + printf("ATAN2 %10.18g\n", check_close2(atan2, 10.0, x)); + printf("EXP %10.18g\n", check_close1(exp, x)); + printf("LN %10.18g\n", check_close1(log, x)); + printf("POW %10.18f\n", check_close2(pow, x, x)); + printf("TRUNC %10.18f\n", check_close1(trunc, x)); + printf("LDEXP %10.18f\n", check_close2(ldexp, x, x)); + printf("FMOD %10.18f\n", check_close2(fmod, x, 3.0f)); double s, c; sincos(x, &s, &c); printf("SINCOS %10.18f %10.18f\n", s, c); @@ -325,22 +374,21 @@ int main() { #if PICO_DOUBLE_PROPAGATE_NANS { float x = NAN; - printf("NANO %10.18f\n", x); - printf("SQRT %10.18f\n", sqrt(x)); - printf("COS %10.18f\n", cos(x)); - printf("SIN %10.18f\n", sin(x)); - printf("TAN %10.18f\n", tan(x)); - printf("ATAN2 %10.18f\n", atan2(x, 10)); - printf("ATAN2 %10.18f\n", atan2(10, x)); - printf("EXP %10.18f\n", exp(x)); - printf("LN %10.18f\n", log(x)); - printf("POW %10.18f\n", pow(x, x)); - printf("TRUNC %10.18f\n", trunc(x)); - printf("LDEXP %10.18f\n", ldexp(x, x)); - printf("FMOD %10.18f\n", fmod(x, 3.0f)); + printf("SQRT %10.18g\n", check_close1(sqrt, x)); + printf("COS %10.18g\n", check_close1(cos, x)); + printf("SIN %10.18g\n", check_close1(sin, x)); + printf("TAN %10.18g\n", check_close1(tan, x)); + printf("ATAN2 %10.18g\n", check_close2(atan2, x, 10.0)); + printf("ATAN2 %10.18g\n", check_close2(atan2, 10.0, x)); + printf("EXP %10.18g\n", check_close1(exp, x)); + printf("LN %10.18g\n", check_close1(log, x)); + printf("POW %10.18f\n", check_nan(pow(x, x))); + printf("TRUNC %10.18f\n", check_nan(trunc(x))); + printf("LDEXP %10.18f\n", check_nan(ldexp(x, x))); + printf("FMOD %10.18f\n", check_nan(fmod(x, 3.0f))); double s, c; sincos(x, &s, &c); - printf("SINCOS %10.18f %10.18f\n", s, c); + printf("SINCOS %10.18f %10.18f\n", check_nan(s), check_nan(c)); for(int j=0;j<2;j++) { for (int i = 1; i < 4; i++) { @@ -372,17 +420,21 @@ int main() { // } for (int32_t x = -1; x; x <<= 1) { printf("i %d->%f\n", x, (double) x); + check1(__aeabi_i2d, x); } for (int32_t x = 1; x; x <<= 1) { printf("i %d->%f\n", x, (double) x); + check1(__aeabi_i2d, x); y = x << 1; } for (int64_t x = 1; x; x <<= 1) { printf("i %lld->%f\n", x, (double) x); + check1(__aeabi_l2d, x); y = x << 1; } for (int64_t x = -1; x; x <<= 1) { printf("i %lld->%f\n", x, (double) x); + check1(__aeabi_l2d, x); y = x << 1; } printf("d %d->%f\n", y, (float) y); @@ -392,24 +444,40 @@ int main() { uint32_t y; for(uint32_t x = 1; x; x <<= 1) { printf("u %u->%f\n", x, (double)x); + check1(__aeabi_ui2d, x); y = x << 1; } printf("u %u->%f\n", y, (double)y); } for(int64_t x = 1; x !=0; x <<= 1u) { printf("%lld->%f\n", x, (double)x); + check1(__aeabi_l2d, x); } - for(double x = -4294967296.f * 4294967296.f; x<=-0.5f; x/=2.f) { + for(double x = -4294967296.f * 4294967296.f * 2.f; x<=-0.5f; x/=2.f) { printf("d2i64 %f->%lld\n", x, (int64_t)x); + if (x < INT64_MIN) { + // seems like there is a bug in the gcc version! + assert(__aeabi_d2lz(x) == INT64_MIN); + } else { + check1(__aeabi_d2lz, x); + } } - for(double x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) { + for(double x = 4294967296.f * 4294967296.f * 2.f; x>=0.5f; x/=2.f) { printf("d2i64 %f->%lld\n", x, (int64_t)x); + if (x >= INT64_MAX) { + // seems like there is a bug in the gcc version! + assert(__aeabi_d2lz(x) == INT64_MAX); + } else { + check1(__aeabi_d2lz, x); + } } for(double x = -4294967296.f * 4294967296.f; x<=-0.5f; x/=2.f) { printf("d2i32 %f->%d\n", x, (int32_t)x); + check1(__aeabi_d2iz, x); } for(double x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) { printf("d2i32 %f->%d\n", x, (int32_t)x); + check1(__aeabi_d2iz, x); } for (double x = 1; x < 11; x += 2) { @@ -417,6 +485,8 @@ int main() { double g = 1.0 / x; printf("%g %10.18g %10.18g, %10.18g, %10.18g %10.18g\n", x, f, x + 0.37777777777777777777777777777, x - 0.377777777777777777777777777777, g, 123456789.0 / x); + check2(__aeabi_dmul, x, x); + check2(__aeabi_ddiv, 1.0, x); } if (fail || diff --git a/test/pico_float_test/pico_float_test.c b/test/pico_float_test/pico_float_test.c index 75a37a8..5da23d2 100644 --- a/test/pico_float_test/pico_float_test.c +++ b/test/pico_float_test/pico_float_test.c @@ -16,7 +16,6 @@ #include #include #include -//#include #include "pico/stdlib.h" #include "inttypes.h" @@ -283,12 +282,58 @@ int test_fcmpun() { return 0; } +#define assert_nan(a) assert(isnan(a)) +#define check_nan(a) ({ assert_nan(a); a; }) + +float __aeabi_i2f(int32_t); +float __aeabi_ui2f(int32_t); +float __aeabi_l2f(int64_t); +float __aeabi_ul2f(int64_t); +int32_t __aeabi_f2iz(float); +int64_t __aeabi_f2lz(float); +float __aeabi_fmul(float, float); +float __aeabi_fdiv(float, float); +#if LIB_PICO_FLOAT_PICO +float __real___aeabi_i2f(int); +float __real___aeabi_ui2f(int); +float __real___aeabi_l2f(int64_t); +float __real___aeabi_ul2f(int64_t); +float __real___aeabi_fmul(float, float); +float __real___aeabi_fdiv(float, float); +int32_t __real___aeabi_f2iz(float); +int64_t __real___aeabi_f2lz(float); +float __real_sqrtf(float); +float __real_cosf(float); +float __real_sinf(float); +float __real_tanf(float); +float __real_expf(float); +float __real_logf(float); +float __real_atan2f(float, float); +float __real_powf(float, float); +float __real_truncf(float); +float __real_ldexpf(float, int); +float __real_fmodf(float, float); +#define EPSILON 1e-9 +#define assert_close(a, b) assert(((b - a) < EPSILON || (a - b) < EPSILON) || (isinf(a) && isinf(b) && (a < 0) == (b < 0))) +#define check1(func,p0) ({ typeof(p0) r = func(p0), r2 = __CONCAT(__real_, func)(p0); assert(r == r2); r; }) +#define check2(func,p0,p1) ({ typeof(p0) r = func(p0,p1), r2 = __CONCAT(__real_, func)(p0,p1); assert(r == r2); r; }) +#define check_close1(func,p0) ({ typeof(p0) r = func(p0), r2 = __CONCAT(__real_, func)(p0); if (isnan(p0)) assert_nan(r); else assert_close(r, r2); r; }) +#define check_close2(func,p0,p1) ({ typeof(p0) r = func(p0,p1), r2 = __CONCAT(__real_, func)(p0,p1); if (isnan(p0) || isnan(p1)) assert_nan(r); else assert_close(r, r2); r; }) +#else +#define check1(func,p0) func(p0) +#define check2(func,p0,p1) func(p0,p1) +#define check_close1(func,p0) func(p0) +#define check_close2(func,p0,p1) func(p0,p1) +#endif + double aa = 0.5; double bb = 1; int main() { setup_default_uart(); + bool fail = false; + printf("%d\n", aa < bb); for(float a = -1; a <= 1; a++) { for(float b = -1; b <= 1; b++) { @@ -341,21 +386,27 @@ int main() { #if 1 for (float x = 0; x < 3; x++) { printf("\n ----- %f\n", x); - printf("FSQRT %10.18f\n", sqrtf(x)); - printf("FCOS %10.18f\n", cosf(x)); - printf("FSIN %10.18f\n", sinf(x)); + printf("FSQRT %10.18f\n", check_close1(sqrtf, x)); + printf("FCOS %10.18f\n", check_close1(cosf, x)); + printf("FSIN %10.18f\n", check_close1(sinf, x)); float s, c; sincosf(x, &s, &c); printf("FSINCOS %10.18f %10.18f\n", s, c); - printf("FTAN %10.18f\n", tanf(x)); - printf("FATAN2 %10.18f\n", atan2f(x, 10)); - printf("FATAN2 %10.18f\n", atan2f(10, x)); - printf("FEXP %10.18f\n", expf(x)); - printf("FLN %10.18f\n", logf(x)); - printf("POWF %10.18f\n", powf(x, x)); - printf("TRUNCF %10.18f\n", truncf(x)); - printf("LDEXPF %10.18f\n", ldexpf(x, x)); - printf("FMODF %10.18f\n", fmodf(x, 3.0f)); + printf("FTAN %10.18f\n", check_close1(tanf, x)); + printf("FATAN2 %10.18f\n", check_close2(atan2f, x, 10.f)); + printf("FATAN2 %10.18f\n", check_close2(atan2f, 10.f, x)); + printf("FEXP %10.18f\n", check_close1(expf, x)); + printf("FLN %10.18f\n", check_close1(logf, x)); + printf("POWF %10.18f\n", check_close2(powf, x, x)); + printf("TRUNCF %10.18f\n", check_close1(truncf, x)); + printf("LDEXPF %10.18f\n", check_close2(ldexpf, x, x)); + printf("FMODF %10.18f\n", check_close2(fmodf, x, 3.0f)); + sincosf(x, &s, &c); + printf("SINCOS %10.18f %10.18f\n", s, c); + if (s != sin(x) || c != cos(x)) { + printf("SINCOS mismatch\n"); + fail = true; + } } for (double x = 0; x < 3; x++) { @@ -390,18 +441,25 @@ int main() { // sincosf(x, &s, &c); printf("FSINCOS %10.18f %10.18f\n", s, c); - for(int i=1; i<4; i++) { - char buf[4]; - sprintf(buf, "%d", i); - float f0 = -nanf(buf); - double d0 = -nan(buf); - // hmm - *(uint64_t *)&d0 |= i; - *(uint32_t *)&f0 |= i; - float f = (float)d0; - double d = (double)f0; - printf("f2d %08"PRIx32" -> %g %016"PRIx64"\n", *(uint32_t*)&f0, d, *(uint64_t*)&d); - printf("d2f %016"PRIx64" -> %f %08"PRIx32"\n", *(uint64_t*)&d0, f, *(uint32_t*)&f); + for(int j=0;j<2;j++) { + for (int i = 1; i < 4; i++) { + char buf[4]; + sprintf(buf, "%d", i); + float f0 = -nanf(buf); + double d0 = -nan(buf); + // hmm nanf/nan seem to ignore payload + *(uint64_t *) &d0 |= i; + *(uint32_t *) &f0 |= i; + if (j) { + // try without top bit set + *(uint64_t *) &d0 &= ~0x0008000000000000ull; + *(uint32_t *) &f0 &= ~0x00400000u; + } + float f = (float) d0; + double d = (double) f0; + printf("f2d %f %08"PRIx32" -> %g %016"PRIx64"\n", f0, *(uint32_t *) &f0, d, *(uint64_t *) &d); + printf("d2f %f %016"PRIx64" -> %f %08"PRIx32"\n", d0, *(uint64_t *) &d0, f, *(uint32_t *) &f); + } } } #endif @@ -413,17 +471,21 @@ int main() { // } for (int32_t x = -1; x; x <<= 1) { printf("i %d->%f\n", x, (float) x); + check1(__aeabi_i2f, x); } for (int32_t x = 1; x; x <<= 1) { printf("i %d->%f\n", x, (float) x); + check1(__aeabi_i2f, x); y = x << 1; } for (int64_t x = 1; x; x <<= 1) { printf("i %lld->%f\n", x, (float) x); + check1(__aeabi_l2f, x); y = x << 1; } for (int64_t x = -1; x; x <<= 1) { printf("i %lld->%f\n", x, (float) x); + check1(__aeabi_l2f, x); y = x << 1; } printf("d %d->%f\n", y, (float) y); @@ -433,40 +495,63 @@ int main() { uint32_t y; for(uint32_t x = 1; x; x <<= 1) { printf("u %u->%f\n", x, (float)x); + check1(__aeabi_ui2f, x); y = x << 1; } printf("u %u->%f\n", y, (float)y); } for(int64_t x = 1; x !=0; x <<= 1u) { printf("%lld->%f\n", x, (float)x); + check1(__aeabi_l2f, x); + } + for(float x = -4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) { + printf("f %f->%lld\n", x, (int64_t)x); + if (x < INT64_MIN) { + // seems like there is a bug in the gcc version! + assert(__aeabi_f2lz(x) == INT64_MIN); + } else { + check1(__aeabi_f2lz, x); + } + } + for(float x = 4294967296.f * 4294967296.f * 2.f; x>=0.5f; x/=2.f) { + printf("f2i64 %f->%lld\n", x, (int64_t)x); + if (x >= INT64_MAX) { + // seems like there is a bug in the gcc version! + assert(__aeabi_f2lz(x) == INT64_MAX); + } else { + check1(__aeabi_f2lz, x); + } + } + for(float x = -4294967296.f * 4294967296.f; x<=-0.5f; x/=2.f) { + printf("d2i32 %f->%d\n", x, (int32_t)x); + check1(__aeabi_f2iz, x); } for(float x = 4294967296.f * 4294967296.f; x>=0.5f; x/=2.f) { - printf("f %f->%lld\n", x, (int64_t)x); + printf("d2i32 %f->%d\n", x, (int32_t)x); + check1(__aeabi_f2iz, x); } - for (double x = 1; x < 11; x += 2) { - double f = x * x; - double g = 1.0 / x; - printf("%g %10.18g %10.18g, %10.18g, %10.18g %10.18g\n", x, f, x + 0.37777777777777777777777777777, - x - 0.377777777777777777777777777777, g, 123456789.0 / x); + + for (float x = 1; x < 11; x += 2) { + float f = x * x; + float g = 1.0f / x; + printf("%g %10.18g %10.18g, %10.18g, %10.18g %10.18g\n", x, f, x + 0.37777777777777777777777777777f, + x - 0.377777777777777777777777777777f, g, 123456789.0f / x); + check2(__aeabi_fmul, x, x); + check2(__aeabi_fdiv, 1.0f, x); } - if (test_cfcmpeq() || test_cfcmple() || - test_fcmpun() || test_cmple_gt() || test_cmplt_ge()) { + + if (fail || + test_cfcmpeq() || + test_cfcmple() || + test_fcmpun() || + test_cmple_gt() || + test_cmplt_ge()) { printf("FAILED\n"); return 1; } else { printf("PASSED\n"); return 0; } - - if (test_cfcmpeq() || test_cfcmple() || - test_fcmpun() || test_cmple_gt() || test_cmplt_ge()) { - printf("FAILED\n"); - return 1; - } else { - printf("PASSED\n"); - return 0; - } - #endif }