pico-sdk/src/rp2_common/pico_float/float_aeabi.S
graham sanderson 26653ea81e Initial Release
2021-01-20 10:44:27 -06:00

725 lines
16 KiB
ArmAsm

/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h"
__pre_init __aeabi_float_init, 00020
.syntax unified
.cpu cortex-m0plus
.thumb
.macro float_section name
#if PICO_FLOAT_IN_RAM
.section RAM_SECTION_NAME(\name), "ax"
#else
.section SECTION_NAME(\name), "ax"
#endif
.endm
.macro float_wrapper_section func
float_section WRAPPER_FUNC_NAME(\func)
.endm
.macro _float_wrapper_func x
wrapper_func \x
.endm
.macro wrapper_func_f1 x
_float_wrapper_func \x
#if PICO_FLOAT_PROPAGATE_NANS
mov ip, lr
bl __check_nan_f1
mov lr, ip
#endif
.endm
.macro wrapper_func_f2 x
_float_wrapper_func \x
#if PICO_FLOAT_PROPAGATE_NANS
mov ip, lr
bl __check_nan_f2
mov lr, ip
#endif
.endm
.section .text
#if PICO_FLOAT_PROPAGATE_NANS
.thumb_func
__check_nan_f1:
movs r3, #1
lsls r3, #24
lsls r2, r0, #1
adds r2, r3
bhi 1f
bx lr
1:
bx ip
.thumb_func
__check_nan_f2:
movs r3, #1
lsls r3, #24
lsls r2, r0, #1
adds r2, r3
bhi 1f
lsls r2, r1, #1
adds r2, r3
bhi 2f
bx lr
2:
mov r0, r1
1:
bx ip
#endif
.macro table_tail_call SF_TABLE_OFFSET
#if PICO_FLOAT_SUPPORT_ROM_V1
#ifndef NDEBUG
movs r3, #0
mov ip, r3
#endif
#endif
ldr r3, =sf_table
ldr r3, [r3, #\SF_TABLE_OFFSET]
bx r3
.endm
.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
ldr r3, =sf_table
ldr r3, [r3, #\SF_TABLE_OFFSET]
#if PICO_FLOAT_SUPPORT_ROM_V1
mov ip, pc
#endif
bx r3
#if PICO_FLOAT_SUPPORT_ROM_V1
.byte \SF_TABLE_OFFSET, 0xdf
.word \shim
#endif
.endm
# note generally each function is in a separate section unless there is fall thru or branching between them
# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
# note functions are word aligned except where they are an odd number of linear instructions
// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition
float_wrapper_section __aeabi_farithmetic
// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x
# frsub first because it is the only one that needs alignment
.align 2
wrapper_func __aeabi_frsub
eors r0, r1
eors r1, r0
eors r0, r1
// fall thru
// float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y
wrapper_func_f2 __aeabi_fsub
#if PICO_FLOAT_PROPAGATE_NANS
// we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
mov r2, r0
eors r2, r1
bmi 1f // different signs
push {r0, r1, lr}
bl 1f
b fdiv_fsub_nan_helper
1:
#endif
table_tail_call SF_TABLE_FSUB
wrapper_func_f2 __aeabi_fadd
table_tail_call SF_TABLE_FADD
// float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d
wrapper_func_f2 __aeabi_fdiv
#if PICO_FLOAT_PROPAGATE_NANS
push {r0, r1, lr}
bl 1f
b fdiv_fsub_nan_helper
1:
#endif
table_tail_call SF_TABLE_FDIV
fdiv_fsub_nan_helper:
#if PICO_FLOAT_PROPAGATE_NANS
pop {r1, r2}
// check for infinite op infinite (or rather check for infinite result with both
// operands being infinite)
lsls r3, r0, #1
asrs r3, r3, #24
adds r3, #1
beq 2f
pop {pc}
2:
lsls r1, #1
asrs r1, r1, #24
lsls r2, #1
asrs r2, r2, #24
ands r1, r2
adds r1, #1
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
3:
pop {pc}
#endif
// float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication
wrapper_func_f2 __aeabi_fmul
#if PICO_FLOAT_PROPAGATE_NANS
push {r0, r1, lr}
bl 1f
pop {r1, r2}
// check for multiplication of infinite by zero (or rather check for infinite result with either
// operand 0)
lsls r3, r0, #1
asrs r3, r3, #24
adds r3, #1
beq 2f
pop {pc}
2:
ands r1, r2
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
3:
pop {pc}
1:
#endif
table_tail_call SF_TABLE_FMUL
// void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
float_wrapper_section __aeabi_cfcmple
.align 2
wrapper_func __aeabi_cfrcmple
push {r0-r2, lr}
eors r0, r1
eors r1, r0
eors r0, r1
b __aeabi_cfcmple_guts
// NOTE these share an implementation as we have no excepting NaNs.
// void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags
// void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags
.align 2
wrapper_func __aeabi_cfcmple
wrapper_func __aeabi_cfcmpeq
push {r0-r2, lr}
__aeabi_cfcmple_guts:
lsls r2,r0,#1
lsrs r2,#24
beq 1f
cmp r2,#0xff
bne 2f
lsls r2, r0, #9
bhi 3f
1:
lsrs r0,#23 @ clear mantissa if denormal or infinite
lsls r0,#23
2:
lsls r2,r1,#1
lsrs r2,#24
beq 1f
cmp r2,#0xff
bne 2f
lsls r2, r1, #9
bhi 3f
1:
lsrs r1,#23 @ clear mantissa if denormal or infinite
lsls r1,#23
2:
movs r2,#1 @ initialise result
eors r1,r0
bmi 2f @ opposite signs? then can proceed on basis of sign of x
eors r1,r0 @ restore y
bpl 1f
cmp r1,r0
pop {r0-r2, pc}
1:
cmp r0,r1
pop {r0-r2, pc}
2:
orrs r1, r0 @ handle 0/-0
adds r1, r1 @ note this always sets C
beq 3f
mvns r0, r0 @ carry inverse of r0 sign
adds r0, r0
3:
pop {r0-r2, pc}
// int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
float_wrapper_section __aeabi_fcmpeq
.align 2
wrapper_func __aeabi_fcmpeq
push {lr}
bl __aeabi_cfcmpeq
beq 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C <
float_wrapper_section __aeabi_fcmplt
.align 2
wrapper_func __aeabi_fcmplt
push {lr}
bl __aeabi_cfcmple
sbcs r0, r0
pop {pc}
// int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <=
float_wrapper_section __aeabi_fcmple
.align 2
wrapper_func __aeabi_fcmple
push {lr}
bl __aeabi_cfcmple
bls 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >=
float_wrapper_section __aeabi_fcmpge
.align 2
wrapper_func __aeabi_fcmpge
push {lr}
// because of NaNs it is better to reverse the args than the result
bl __aeabi_cfrcmple
bls 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C >
float_wrapper_section __aeabi_fcmpgt
wrapper_func __aeabi_fcmpgt
push {lr}
// because of NaNs it is better to reverse the args than the result
bl __aeabi_cfrcmple
sbcs r0, r0
pop {pc}
// int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
float_wrapper_section __aeabi_fcmpun
wrapper_func __aeabi_fcmpun
movs r3, #1
lsls r3, #24
lsls r2, r0, #1
adds r2, r3
bhi 1f
lsls r2, r1, #1
adds r2, r3
bhi 1f
movs r0, #0
bx lr
1:
movs r0, #1
bx lr
// float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion
float_wrapper_section __aeabi_ui2f
wrapper_func __aeabi_ui2f
subs r1, r1
cmp r0, #0
bne __aeabi_i2f_main
mov r0, r1
bx lr
float_wrapper_section __aeabi_i2f
// float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion
wrapper_func __aeabi_i2f
lsrs r1, r0, #31
lsls r1, #31
bpl 1f
rsbs r0, #0
1:
cmp r0, #0
beq 7f
__aeabi_i2f_main:
mov ip, lr
push {r0, r1}
ldr r3, =sf_clz_func
ldr r3, [r3]
blx r3
pop {r1, r2}
lsls r1, r0
subs r0, #158
rsbs r0, #0
adds r1,#0x80 @ rounding
bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
lsls r3,r1,#24 @ check bottom 8 bits of r1
beq 6f @ in rounding-tie case?
lsls r1,#1 @ remove leading 1
3:
lsrs r1,#9 @ align mantissa
lsls r0,#23 @ align exponent
orrs r0,r2 @ assemble exponent and mantissa
4:
orrs r0,r1 @ apply sign
1:
bx ip
5:
adds r0,#1 @ correct exponent offset
b 3b
6:
lsrs r1,#9 @ ensure even result
lsls r1,#10
b 3b
7:
bx lr
// int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3]
float_wrapper_section __aeabi_f2iz
wrapper_func __aeabi_f2iz
regular_func float2int_z
lsls r1, r0, #1
lsrs r2, r1, #24
movs r3, #0x80
lsls r3, #24
cmp r2, #126
ble 1f
subs r2, #158
bge 2f
asrs r1, r0, #31
lsls r0, #9
lsrs r0, #1
orrs r0, r3
negs r2, r2
lsrs r0, r2
lsls r1, #1
adds r1, #1
muls r0, r1
bx lr
1:
movs r0, #0
bx lr
2:
lsrs r0, #31
adds r0, r3
subs r0, #1
bx lr
cmn r0, r0
bcc float2int
push {lr}
lsls r0, #1
lsrs r0, #1
movs r1, #0
bl __aeabi_f2uiz
cmp r0, #0
bmi 1f
rsbs r0, #0
pop {pc}
1:
movs r0, #128
lsls r0, #24
pop {pc}
float_section float2int
regular_func float2int
shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
float_section float2fix
regular_func float2fix
shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
float_section float2ufix
regular_func float2ufix
table_tail_call SF_TABLE_FLOAT2UFIX
// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3]
float_wrapper_section __aeabi_f2uiz
wrapper_func __aeabi_f2uiz
table_tail_call SF_TABLE_FLOAT2UINT
float_section fix2float
regular_func fix2float
table_tail_call SF_TABLE_FIX2FLOAT
float_section ufix2float
regular_func ufix2float
table_tail_call SF_TABLE_UFIX2FLOAT
float_section fix642float
regular_func fix642float
shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim
float_section ufix642float
regular_func ufix642float
shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim
// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
float_wrapper_section __aeabi_l2f
1:
ldr r2, =__aeabi_i2f
bx r2
wrapper_func __aeabi_l2f
asrs r2, r0, #31
cmp r1, r2
beq 1b
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim
// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
float_wrapper_section __aeabi_ul2f
1:
ldr r2, =__aeabi_ui2f
bx r2
wrapper_func __aeabi_ul2f
cmp r1, #0
beq 1b
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim
// long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3]
float_wrapper_section __aeabi_f2lz
wrapper_func __aeabi_f2lz
regular_func float2int64_z
cmn r0, r0
bcc float2int64
push {lr}
lsls r0, #1
lsrs r0, #1
movs r1, #0
bl float2ufix64
cmp r1, #0
bmi 1f
movs r2, #0
rsbs r0, #0
sbcs r2, r1
mov r1, r2
pop {pc}
1:
movs r1, #128
lsls r1, #24
movs r0, #0
pop {pc}
float_section float2int64
regular_func float2int64
shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
float_section float2fix64
regular_func float2fix64
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3]
float_wrapper_section __aeabi_f2ulz
wrapper_func __aeabi_f2ulz
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
float_section float2ufix64
regular_func float2ufix64
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
float_wrapper_section __aeabi_f2d
1:
#if PICO_FLOAT_PROPAGATE_NANS
// copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
asrs r1, r0, #3
movs r2, #0xf
lsls r2, #27
orrs r1, r2
lsls r0, #25
bx lr
#endif
wrapper_func __aeabi_f2d
#if PICO_FLOAT_PROPAGATE_NANS
movs r3, #1
lsls r3, #24
lsls r2, r0, #1
adds r2, r3
bhi 1b
#endif
shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim
float_wrapper_section srqtf
wrapper_func_f1 sqrtf
#if PICO_FLOAT_SUPPORT_ROM_V1
// check for negative
asrs r1, r0, #23
bmi 1f
#endif
table_tail_call SF_TABLE_FSQRT
#if PICO_FLOAT_SUPPORT_ROM_V1
1:
mvns r0, r1
cmp r0, #255
bne 2f
// -0 or -Denormal return -0 (0x80000000)
lsls r0, #31
bx lr
2:
// return -Inf (0xff800000)
asrs r0, r1, #31
lsls r0, #23
bx lr
#endif
float_wrapper_section cosf
// note we don't use _f1 since we do an infinity/nan check for outside of range
wrapper_func cosf
// rom version only works for -128 < angle < 128
lsls r1, r0, #1
lsrs r1, #24
cmp r1, #127 + 7
bge 1f
2:
table_tail_call SF_TABLE_FCOS
1:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r1, #255
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
bx lr
3:
#endif
ldr r1, =0x40c90fdb // 2 * M_PI
push {lr}
bl remainderf
pop {r1}
mov lr, r1
b 2b
float_wrapper_section sinf
// note we don't use _f1 since we do an infinity/nan check for outside of range
wrapper_func sinf
// rom version only works for -128 < angle < 128
lsls r1, r0, #1
lsrs r1, #24
cmp r1, #127 + 7
bge 1f
2:
table_tail_call SF_TABLE_FSIN
1:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r1, #255
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
bx lr
3:
#endif
ldr r1, =0x40c90fdb // 2 * M_PI
push {lr}
bl remainderf
pop {r1}
mov lr, r1
b 2b
float_wrapper_section sincosf
// note we don't use _f1 since we do an infinity/nan check for outside of range
wrapper_func sincosf
push {r1, r2, lr}
// rom version only works for -128 < angle < 128
lsls r3, r0, #1
lsrs r3, #24
cmp r3, #127 + 7
bge 3f
2:
ldr r3, =sf_table
ldr r3, [r3, #SF_TABLE_FSIN]
blx r3
pop {r2, r3}
str r0, [r2]
str r1, [r3]
pop {pc}
#if PICO_FLOAT_PROPAGATE_NANS
.align 2
pop {pc}
#endif
3:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r3, #255
bne 4f
// infinite to nan
movs r3, #1
lsls r3, #22
orrs r0, r3
str r0, [r1]
str r0, [r2]
add sp, #12
bx lr
4:
#endif
ldr r1, =0x40c90fdb // 2 * M_PI
push {lr}
bl remainderf
pop {r1}
mov lr, r1
b 2b
float_wrapper_section tanf
// note we don't use _f1 since we do an infinity/nan check for outside of range
wrapper_func tanf
// rom version only works for -128 < angle < 128
lsls r1, r0, #1
lsrs r1, #24
cmp r1, #127 + 7
bge 1f
2:
table_tail_call SF_TABLE_FTAN
1:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r1, #255
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
bx lr
3:
#endif
ldr r1, =0x40c90fdb // 2 * M_PI
push {lr}
bl remainderf
pop {r1}
mov lr, r1
b 2b
float_wrapper_section atan2f
wrapper_func_f2 atan2f
shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim
float_wrapper_section expf
wrapper_func_f1 expf
table_tail_call SF_TABLE_FEXP
float_wrapper_section logf
wrapper_func_f1 logf
table_tail_call SF_TABLE_FLN