/* * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. * * SPDX-License-Identifier: BSD-3-Clause */ #include "pico/asm_helper.S" #include "pico/bootrom/sf_table.h" __pre_init __aeabi_float_init, 00020 .syntax unified .cpu cortex-m0plus .thumb .macro float_section name #if PICO_FLOAT_IN_RAM .section RAM_SECTION_NAME(\name), "ax" #else .section SECTION_NAME(\name), "ax" #endif .endm .macro float_wrapper_section func float_section WRAPPER_FUNC_NAME(\func) .endm .macro _float_wrapper_func x wrapper_func \x .endm .macro wrapper_func_f1 x _float_wrapper_func \x #if PICO_FLOAT_PROPAGATE_NANS mov ip, lr bl __check_nan_f1 mov lr, ip #endif .endm .macro wrapper_func_f2 x _float_wrapper_func \x #if PICO_FLOAT_PROPAGATE_NANS mov ip, lr bl __check_nan_f2 mov lr, ip #endif .endm .section .text #if PICO_FLOAT_PROPAGATE_NANS .thumb_func __check_nan_f1: movs r3, #1 lsls r3, #24 lsls r2, r0, #1 adds r2, r3 bhi 1f bx lr 1: bx ip .thumb_func __check_nan_f2: movs r3, #1 lsls r3, #24 lsls r2, r0, #1 adds r2, r3 bhi 1f lsls r2, r1, #1 adds r2, r3 bhi 2f bx lr 2: mov r0, r1 1: bx ip #endif .macro table_tail_call SF_TABLE_OFFSET #if PICO_FLOAT_SUPPORT_ROM_V1 #ifndef NDEBUG movs r3, #0 mov ip, r3 #endif #endif ldr r3, =sf_table ldr r3, [r3, #\SF_TABLE_OFFSET] bx r3 .endm .macro shimmable_table_tail_call SF_TABLE_OFFSET shim ldr r3, =sf_table ldr r3, [r3, #\SF_TABLE_OFFSET] #if PICO_FLOAT_SUPPORT_ROM_V1 mov ip, pc #endif bx r3 #if PICO_FLOAT_SUPPORT_ROM_V1 .byte \SF_TABLE_OFFSET, 0xdf .word \shim #endif .endm # note generally each function is in a separate section unless there is fall thru or branching between them # note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool # note functions are word aligned except where they are an odd number of linear instructions // float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition float_wrapper_section __aeabi_farithmetic // float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x # frsub first because it is the only one that needs alignment .align 2 wrapper_func __aeabi_frsub eors r0, r1 eors r1, r0 eors r0, r1 // fall thru // float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y wrapper_func_f2 __aeabi_fsub #if PICO_FLOAT_PROPAGATE_NANS // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost mov r2, r0 eors r2, r1 bmi 1f // different signs push {r0, r1, lr} bl 1f b fdiv_fsub_nan_helper 1: #endif table_tail_call SF_TABLE_FSUB wrapper_func_f2 __aeabi_fadd table_tail_call SF_TABLE_FADD // float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d wrapper_func_f2 __aeabi_fdiv #if PICO_FLOAT_PROPAGATE_NANS push {r0, r1, lr} bl 1f b fdiv_fsub_nan_helper 1: #endif table_tail_call SF_TABLE_FDIV fdiv_fsub_nan_helper: #if PICO_FLOAT_PROPAGATE_NANS pop {r1, r2} // check for infinite op infinite (or rather check for infinite result with both // operands being infinite) lsls r3, r0, #1 asrs r3, r3, #24 adds r3, #1 beq 2f pop {pc} 2: lsls r1, #1 asrs r1, r1, #24 lsls r2, #1 asrs r2, r2, #24 ands r1, r2 adds r1, #1 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 3: pop {pc} #endif // float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication wrapper_func_f2 __aeabi_fmul #if PICO_FLOAT_PROPAGATE_NANS push {r0, r1, lr} bl 1f pop {r1, r2} // check for multiplication of infinite by zero (or rather check for infinite result with either // operand 0) lsls r3, r0, #1 asrs r3, r3, #24 adds r3, #1 beq 2f pop {pc} 2: ands r1, r2 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 3: pop {pc} 1: #endif table_tail_call SF_TABLE_FMUL // void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags float_wrapper_section __aeabi_cfcmple .align 2 wrapper_func __aeabi_cfrcmple push {r0-r2, lr} eors r0, r1 eors r1, r0 eors r0, r1 b __aeabi_cfcmple_guts // NOTE these share an implementation as we have no excepting NaNs. // void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags // void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags .align 2 wrapper_func __aeabi_cfcmple wrapper_func __aeabi_cfcmpeq push {r0-r2, lr} __aeabi_cfcmple_guts: lsls r2,r0,#1 lsrs r2,#24 beq 1f cmp r2,#0xff bne 2f lsls r2, r0, #9 bhi 3f 1: lsrs r0,#23 @ clear mantissa if denormal or infinite lsls r0,#23 2: lsls r2,r1,#1 lsrs r2,#24 beq 1f cmp r2,#0xff bne 2f lsls r2, r1, #9 bhi 3f 1: lsrs r1,#23 @ clear mantissa if denormal or infinite lsls r1,#23 2: movs r2,#1 @ initialise result eors r1,r0 bmi 2f @ opposite signs? then can proceed on basis of sign of x eors r1,r0 @ restore y bpl 1f cmp r1,r0 pop {r0-r2, pc} 1: cmp r0,r1 pop {r0-r2, pc} 2: orrs r1, r0 @ handle 0/-0 adds r1, r1 @ note this always sets C beq 3f mvns r0, r0 @ carry inverse of r0 sign adds r0, r0 3: pop {r0-r2, pc} // int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and != float_wrapper_section __aeabi_fcmpeq .align 2 wrapper_func __aeabi_fcmpeq push {lr} bl __aeabi_cfcmpeq beq 1f movs r0, #0 pop {pc} 1: movs r0, #1 pop {pc} // int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C < float_wrapper_section __aeabi_fcmplt .align 2 wrapper_func __aeabi_fcmplt push {lr} bl __aeabi_cfcmple sbcs r0, r0 pop {pc} // int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <= float_wrapper_section __aeabi_fcmple .align 2 wrapper_func __aeabi_fcmple push {lr} bl __aeabi_cfcmple bls 1f movs r0, #0 pop {pc} 1: movs r0, #1 pop {pc} // int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >= float_wrapper_section __aeabi_fcmpge .align 2 wrapper_func __aeabi_fcmpge push {lr} // because of NaNs it is better to reverse the args than the result bl __aeabi_cfrcmple bls 1f movs r0, #0 pop {pc} 1: movs r0, #1 pop {pc} // int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C > float_wrapper_section __aeabi_fcmpgt wrapper_func __aeabi_fcmpgt push {lr} // because of NaNs it is better to reverse the args than the result bl __aeabi_cfrcmple sbcs r0, r0 pop {pc} // int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered() float_wrapper_section __aeabi_fcmpun wrapper_func __aeabi_fcmpun movs r3, #1 lsls r3, #24 lsls r2, r0, #1 adds r2, r3 bhi 1f lsls r2, r1, #1 adds r2, r3 bhi 1f movs r0, #0 bx lr 1: movs r0, #1 bx lr // float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion float_wrapper_section __aeabi_ui2f wrapper_func __aeabi_ui2f subs r1, r1 cmp r0, #0 bne __aeabi_i2f_main mov r0, r1 bx lr float_wrapper_section __aeabi_i2f // float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion wrapper_func __aeabi_i2f lsrs r1, r0, #31 lsls r1, #31 bpl 1f rsbs r0, #0 1: cmp r0, #0 beq 7f __aeabi_i2f_main: mov ip, lr push {r0, r1} ldr r3, =sf_clz_func ldr r3, [r3] blx r3 pop {r1, r2} lsls r1, r0 subs r0, #158 rsbs r0, #0 adds r1,#0x80 @ rounding bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) lsls r3,r1,#24 @ check bottom 8 bits of r1 beq 6f @ in rounding-tie case? lsls r1,#1 @ remove leading 1 3: lsrs r1,#9 @ align mantissa lsls r0,#23 @ align exponent orrs r0,r2 @ assemble exponent and mantissa 4: orrs r0,r1 @ apply sign 1: bx ip 5: adds r0,#1 @ correct exponent offset b 3b 6: lsrs r1,#9 @ ensure even result lsls r1,#10 b 3b 7: bx lr // int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3] float_wrapper_section __aeabi_f2iz wrapper_func __aeabi_f2iz regular_func float2int_z lsls r1, r0, #1 lsrs r2, r1, #24 movs r3, #0x80 lsls r3, #24 cmp r2, #126 ble 1f subs r2, #158 bge 2f asrs r1, r0, #31 lsls r0, #9 lsrs r0, #1 orrs r0, r3 negs r2, r2 lsrs r0, r2 lsls r1, #1 adds r1, #1 muls r0, r1 bx lr 1: movs r0, #0 bx lr 2: lsrs r0, #31 adds r0, r3 subs r0, #1 bx lr cmn r0, r0 bcc float2int push {lr} lsls r0, #1 lsrs r0, #1 movs r1, #0 bl __aeabi_f2uiz cmp r0, #0 bmi 1f rsbs r0, #0 pop {pc} 1: movs r0, #128 lsls r0, #24 pop {pc} float_section float2int regular_func float2int shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim float_section float2fix regular_func float2fix shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim float_section float2ufix regular_func float2ufix table_tail_call SF_TABLE_FLOAT2UFIX // unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3] float_wrapper_section __aeabi_f2uiz wrapper_func __aeabi_f2uiz table_tail_call SF_TABLE_FLOAT2UINT float_section fix2float regular_func fix2float table_tail_call SF_TABLE_FIX2FLOAT float_section ufix2float regular_func ufix2float table_tail_call SF_TABLE_UFIX2FLOAT float_section fix642float regular_func fix642float shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim float_section ufix642float regular_func ufix642float shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim // float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion float_wrapper_section __aeabi_l2f 1: ldr r2, =__aeabi_i2f bx r2 wrapper_func __aeabi_l2f asrs r2, r0, #31 cmp r1, r2 beq 1b shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim // float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion float_wrapper_section __aeabi_ul2f 1: ldr r2, =__aeabi_ui2f bx r2 wrapper_func __aeabi_ul2f cmp r1, #0 beq 1b shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim // long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3] float_wrapper_section __aeabi_f2lz wrapper_func __aeabi_f2lz regular_func float2int64_z cmn r0, r0 bcc float2int64 push {lr} lsls r0, #1 lsrs r0, #1 movs r1, #0 bl float2ufix64 cmp r1, #0 bmi 1f movs r2, #0 rsbs r0, #0 sbcs r2, r1 mov r1, r2 pop {pc} 1: movs r1, #128 lsls r1, #24 movs r0, #0 pop {pc} float_section float2int64 regular_func float2int64 shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim float_section float2fix64 regular_func float2fix64 shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim // unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3] float_wrapper_section __aeabi_f2ulz wrapper_func __aeabi_f2ulz shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim float_section float2ufix64 regular_func float2ufix64 shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim float_wrapper_section __aeabi_f2d 1: #if PICO_FLOAT_PROPAGATE_NANS // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit asrs r1, r0, #3 movs r2, #0xf lsls r2, #27 orrs r1, r2 lsls r0, #25 bx lr #endif wrapper_func __aeabi_f2d #if PICO_FLOAT_PROPAGATE_NANS movs r3, #1 lsls r3, #24 lsls r2, r0, #1 adds r2, r3 bhi 1b #endif shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim float_wrapper_section srqtf wrapper_func_f1 sqrtf #if PICO_FLOAT_SUPPORT_ROM_V1 // check for negative asrs r1, r0, #23 bmi 1f #endif table_tail_call SF_TABLE_FSQRT #if PICO_FLOAT_SUPPORT_ROM_V1 1: mvns r0, r1 cmp r0, #255 bne 2f // -0 or -Denormal return -0 (0x80000000) lsls r0, #31 bx lr 2: // return -Inf (0xff800000) asrs r0, r1, #31 lsls r0, #23 bx lr #endif float_wrapper_section cosf // note we don't use _f1 since we do an infinity/nan check for outside of range wrapper_func cosf // rom version only works for -128 < angle < 128 lsls r1, r0, #1 lsrs r1, #24 cmp r1, #127 + 7 bge 1f 2: table_tail_call SF_TABLE_FCOS 1: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r1, #255 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 bx lr 3: #endif ldr r1, =0x40c90fdb // 2 * M_PI push {lr} bl remainderf pop {r1} mov lr, r1 b 2b float_wrapper_section sinf // note we don't use _f1 since we do an infinity/nan check for outside of range wrapper_func sinf // rom version only works for -128 < angle < 128 lsls r1, r0, #1 lsrs r1, #24 cmp r1, #127 + 7 bge 1f 2: table_tail_call SF_TABLE_FSIN 1: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r1, #255 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 bx lr 3: #endif ldr r1, =0x40c90fdb // 2 * M_PI push {lr} bl remainderf pop {r1} mov lr, r1 b 2b float_wrapper_section sincosf // note we don't use _f1 since we do an infinity/nan check for outside of range wrapper_func sincosf push {r1, r2, lr} // rom version only works for -128 < angle < 128 lsls r3, r0, #1 lsrs r3, #24 cmp r3, #127 + 7 bge 3f 2: ldr r3, =sf_table ldr r3, [r3, #SF_TABLE_FSIN] blx r3 pop {r2, r3} str r0, [r2] str r1, [r3] pop {pc} #if PICO_FLOAT_PROPAGATE_NANS .align 2 pop {pc} #endif 3: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r3, #255 bne 4f // infinite to nan movs r3, #1 lsls r3, #22 orrs r0, r3 str r0, [r1] str r0, [r2] add sp, #12 bx lr 4: #endif ldr r1, =0x40c90fdb // 2 * M_PI push {lr} bl remainderf pop {r1} mov lr, r1 b 2b float_wrapper_section tanf // note we don't use _f1 since we do an infinity/nan check for outside of range wrapper_func tanf // rom version only works for -128 < angle < 128 lsls r1, r0, #1 lsrs r1, #24 cmp r1, #127 + 7 bge 1f 2: table_tail_call SF_TABLE_FTAN 1: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r1, #255 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 bx lr 3: #endif ldr r1, =0x40c90fdb // 2 * M_PI push {lr} bl remainderf pop {r1} mov lr, r1 b 2b float_wrapper_section atan2f wrapper_func_f2 atan2f shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim float_wrapper_section expf wrapper_func_f1 expf table_tail_call SF_TABLE_FEXP float_wrapper_section logf wrapper_func_f1 logf table_tail_call SF_TABLE_FLN