Fixup divider save_restore for floating point too; improve tests (#405)

- The divider state needs to be saved for __aeabi_ddiv, __aeabi_fdiv, __aeabi_dtan and __aeabi_ftan or they won't work in interrupts *(probably not used much youd hope), or on an RTOS context switch
 - Refactored code out for the integer and floating point cases
 - Improved the floating point 'tests' in passing to check more return values against GCC implementations
 - Added floating point usage to the IRQ nesting test case
This commit is contained in:
Graham Sanderson
2021-05-13 07:38:42 -05:00
committed by GitHub
parent c6c4eeb122
commit 574fdee37b
9 changed files with 501 additions and 158 deletions

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "hardware/regs/addressmap.h"
#include "hardware/regs/sio.h"
#if SIO_DIV_CSR_READY_LSB == 0
.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1
#else
need to change SHIFT above
#endif
#if SIO_DIV_CSR_DIRTY_LSB == 1
.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2
#else
need to change SHIFT above
#endif
// SIO_BASE ptr in r2; pushes r4-r7, lr to stack
// requires that division started at least 2 cycles prior to the start of the macro
.macro save_div_state_and_lr
// originally we did this, however a) it uses r3, and b) the push takes 6 cycles, b)
// any IRQ which uses the divider will necessarily put the data back, which will
// immediately make it ready
//
// // ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
// // // wait for results as we can't save signed-ness of operation
// // 1:
// // lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
// // bcc 1b
// 6 cycles
push {r4, r5, r6, r7, lr}
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
ldr r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
ldr r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
.endm
// restores divider state from r4-r7, then pops them and pc
.macro restore_div_state_and_return
// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order
//
// it is worth considering what happens if we are interrupted
//
// after writing r4: we are DIRTY and !READY
// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be
// saved/restored correctly and we'll restore the rest ourselves
// after writing r4, r5: we are DIRTY and !READY
// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor
// at least will be saved/restored correctly and and we'll restore the rest ourselves
// after writing r4, r5, r6: we are DIRTY and READY
// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves),
// and we'll restore the remainder after the fact
// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space
// and so 4 reads is cheaper (and we don't have to adjust r2)
// note also, that we must restore via UDIVI* rather than SDIVI* to prevent the quotient/remainder being negated on read based
// on the signs of the inputs
str r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
str r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
str r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
pop {r4, r5, r6, r7, pc}
.endm

View File

@ -4,8 +4,8 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "hardware/regs/sio.h"
#include "hardware/regs/addressmap.h"
#include "hardware/divider_helper.S"
.syntax unified
.cpu cortex-m0plus
@ -34,17 +34,6 @@
#endif
.endm
#if SIO_DIV_CSR_READY_LSB == 0
.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1
#else
need to change SHIFT above
#endif
#if SIO_DIV_CSR_DIRTY_LSB == 1
.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2
#else
need to change SHIFT above
#endif
@ wait 8-n cycles for the hardware divider
.macro wait_div n
.rept (8-\n) / 2
@ -56,58 +45,17 @@ need to change SHIFT above
.endif
.endm
#if (SIO_DIV_SDIVISOR_OFFSET != SIO_DIV_SDIVIDEND_OFFSET + 4) || (SIO_DIV_QUOTIENT_OFFSET != SIO_DIV_SDIVISOR_OFFSET + 4) || (SIO_DIV_REMAINDER_OFFSET != SIO_DIV_QUOTIENT_OFFSET + 4)
#error register layout has changed - we rely on this order to make sure we save/restore in the right order
#endif
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
# SIO_BASE ptr in r2
.macro save_div_state_and_lr
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
# wait for results as we can't save signed-ness of operation
1:
lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
bcc 1b
push {r4, r5, r6, r7, lr}
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
ldr r4, [r2, #SIO_DIV_SDIVIDEND_OFFSET]
ldr r5, [r2, #SIO_DIV_SDIVISOR_OFFSET]
ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
.endm
.macro restore_div_state_and_return
// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order
//
// it is worth considering what happens if we are interrupted
//
// after writing r4: we are DIRTY and !READY
// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be
// saved/restored correctly and we'll restore the rest ourselves
// after writing r4, r5: we are DIRTY and !READY
// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor
// at least will be saved/restored correctly and and we'll restore the rest ourselves
// after writing r4, r5, r6: we are DIRTY and READY
// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves),
// and we'll restore the remainder after the fact
// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space
// and so 4 reads is cheaper (and we don't have to adjust r2)
str r4, [r2, #SIO_DIV_SDIVIDEND_OFFSET]
str r5, [r2, #SIO_DIV_SDIVISOR_OFFSET]
str r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
pop {r4, r5, r6, r7, pc}
.endm
.macro save_div_state_and_lr_64
push {r4, r5, r6, r7, lr}
ldr r6, =SIO_BASE
1:
ldr r5, [r6, #SIO_DIV_CSR_OFFSET]
# wait for results as we can't save signed-ness of operation
// wait for results as we can't save signed-ness of operation
lsrs r5, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
bcc 1b
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
@ -154,17 +102,18 @@ wrapper_func __aeabi_idivmod
regular_func div_s32s32
regular_func divmod_s32s32
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs divmod_s32s32_savestate
regular_func divmod_s32s32_unsafe
#else
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
# are the hardware_divider functions that can be used instead anyway
// to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
// in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
// are the hardware_divider functions that can be used instead anyway
regular_func divmod_s32s32_unsafe
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
ldr r2, =(SIO_BASE)
mrs r3, PRIMASK
cpsid i
@ -203,6 +152,8 @@ regular_func divmod_s32s32_unsafe
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
.align 2
regular_func divmod_s32s32_savestate
// note that we must be at least 2 cycles into division at this point,
// which we are because of the firty check before getting here (and of course the function call before that)
save_div_state_and_lr
bl divmod_s32s32_unsafe
restore_div_state_and_return
@ -215,17 +166,18 @@ regular_func divmod_u32u32
wrapper_func __aeabi_uidiv
wrapper_func __aeabi_uidivmod
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs divmod_u32u32_savestate
regular_func divmod_u32u32_unsafe
#else
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
# are the hardware_divider functions that can be used instead anyway
// to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
// in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
// are the hardware_divider functions that can be used instead anyway
regular_func divmod_u32u32_unsafe
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
ldr r2, =(SIO_BASE)
mrs r3, PRIMASK
cpsid i
@ -273,9 +225,9 @@ wrapper_func __aeabi_ldivmod
regular_func div_s64s64
regular_func divmod_s64s64
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
mov r2, ip
@ -287,6 +239,7 @@ divmod_s64s64_savestate:
bl divmod_s64s64_unsafe
restore_div_state_and_return_64
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
@ -300,9 +253,9 @@ wrapper_func __aeabi_uldivmod
regular_func div_u64u64
regular_func divmod_u64u64
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
mov r2, ip
@ -314,6 +267,7 @@ regular_func divmod_u64u64_savestate
bl divmod_u64u64_unsafe
restore_div_state_and_return_64
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i

View File

@ -6,6 +6,7 @@
#include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h"
#include "hardware/divider_helper.S"
__pre_init __aeabi_double_init, 00020
@ -131,16 +132,16 @@ regular_func pop_r8_r11
mov r11,r7
bx r14
# note generally each function is in a separate section unless there is fall thru or branching between them
# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
// note generally each function is in a separate section unless there is fall thru or branching between them
// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
# note functions are word aligned except where they are an odd number of linear instructions
// note functions are word aligned except where they are an odd number of linear instructions
// double FUNC_NAME(__aeabi_dadd)(double, double) double-precision addition
double_wrapper_section __aeabi_darithmetic
// double FUNC_NAME(__aeabi_drsub)(double x, double y) double-precision reverse subtraction, y - x
# frsub first because it is the only one that needs alignment
// frsub first because it is the only one that needs alignment
.align 2
wrapper_func __aeabi_drsub
eors r0, r1
@ -177,7 +178,35 @@ wrapper_func_d2 __aeabi_ddiv
b ddiv_dsub_nan_helper
1:
#endif
shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs ddiv_save_state
mov r2, ip
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl ddiv_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
ddiv_shim_call:
shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
ddiv_save_state:
ldr r2, =(SIO_BASE)
save_div_state_and_lr
mov r2, ip
bl ddiv_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
ddiv_dsub_nan_helper:
#if PICO_DOUBLE_PROPAGATE_NANS
@ -592,6 +621,8 @@ regular_func sincostan_remainder
ldr r2, =0x54442D18 // 2 * M_PI
ldr r3, =0x401921FB
push {lr}
// note remainder only uses the divider thru integer divider functions
// which save and restore themselves
bl remainder
pop {pc}
@ -752,13 +783,40 @@ double_wrapper_section tan
wrapper_func tan
// rom version only works for -1024 < angle < 1024
lsls r2, r1, #2
bcc 1f
bcc dtan_in_range
lsrs r2, #22
cmp r2, #9
bge 2f
1:
bge dtan_angle_out_of_range
dtan_in_range:
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs dtan_save_state
mov r2, ip
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl dtan_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
dtan_shim_call:
shimmable_table_tail_call SF_TABLE_FTAN dtan_shim
2:
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
dtan_save_state:
ldr r2, =(SIO_BASE)
save_div_state_and_lr
mov r2, ip
bl dtan_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
dtan_angle_out_of_range:
#if PICO_DOUBLE_PROPAGATE_NANS
lsls r2, r1, #1
asrs r2, #21
@ -775,7 +833,7 @@ wrapper_func tan
bl sincostan_remainder
pop {r2}
mov lr, r2
b 1b
b dtan_in_range
double_wrapper_section atan2
wrapper_func_d2 atan2

View File

@ -6,6 +6,7 @@
#include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h"
#include "hardware/divider_helper.S"
__pre_init __aeabi_float_init, 00020
@ -104,16 +105,16 @@ __check_nan_f2:
.endm
# note generally each function is in a separate section unless there is fall thru or branching between them
# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
// note generally each function is in a separate section unless there is fall thru or branching between them
// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
# note functions are word aligned except where they are an odd number of linear instructions
// note functions are word aligned except where they are an odd number of linear instructions
// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition
float_wrapper_section __aeabi_farithmetic
// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x
# frsub first because it is the only one that needs alignment
// frsub first because it is the only one that needs alignment
.align 2
wrapper_func __aeabi_frsub
eors r0, r1
@ -146,7 +147,30 @@ wrapper_func_f2 __aeabi_fdiv
b fdiv_fsub_nan_helper
1:
#endif
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE)
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs fdiv_save_state
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl fdiv_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
fdiv_shim_call:
table_tail_call SF_TABLE_FDIV
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
fdiv_save_state:
save_div_state_and_lr
bl fdiv_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
fdiv_fsub_nan_helper:
#if PICO_FLOAT_PROPAGATE_NANS
@ -689,10 +713,33 @@ wrapper_func tanf
lsls r1, r0, #1
lsrs r1, #24
cmp r1, #127 + 7
bge 1f
2:
bge ftan_out_of_range
ftan_in_range:
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE)
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs ftan_save_state
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl ftan_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
ftan_shim_call:
table_tail_call SF_TABLE_FTAN
1:
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
ftan_save_state:
save_div_state_and_lr
bl ftan_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
ftan_out_of_range:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r1, #255
@ -709,7 +756,7 @@ wrapper_func tanf
bl remainderf
pop {r1}
mov lr, r1
b 2b
b ftan_in_range
float_wrapper_section atan2f
wrapper_func_f2 atan2f