Initial Release

This commit is contained in:
graham sanderson
2021-01-20 10:44:27 -06:00
commit 26653ea81e
404 changed files with 135614 additions and 0 deletions

View File

@ -0,0 +1,126 @@
if (NOT TARGET pico_float)
# library to be depended on - we make this depend on particular implementations using per target generator expressions
add_library(pico_float INTERFACE)
# no custom implementation; falls thru to compiler
add_library(pico_float_compiler INTERFACE)
# PICO_BUILD_DEFINE: PICO_FLOAT_COMPILER, whether compiler provided float support is being used, type=bool, default=0, but dependent on CMake options, group=pico_float
target_compile_definitions(pico_float_compiler INTERFACE
PICO_FLOAT_COMPILER=1
)
add_library(pico_float_headers INTERFACE)
target_include_directories(pico_float_headers INTERFACE ${CMAKE_CURRENT_LIST_DIR}/include)
# add alias "default" which is just rom.
add_library(pico_float_default INTERFACE)
target_link_libraries(pico_float_default INTERFACE pico_float_pico)
set(PICO_DEFAULT_FLOAT_IMPL pico_float_default)
target_link_libraries(pico_float INTERFACE
$<IF:$<BOOL:$<TARGET_PROPERTY:PICO_TARGET_FLOAT_IMPL>>,$<TARGET_PROPERTY:PICO_TARGET_FLOAT_IMPL>,${PICO_DEFAULT_FLOAT_IMPL}>)
add_library(pico_float_pico INTERFACE)
target_sources(pico_float_pico INTERFACE
${CMAKE_CURRENT_LIST_DIR}/float_aeabi.S
${CMAKE_CURRENT_LIST_DIR}/float_init_rom.c
${CMAKE_CURRENT_LIST_DIR}/float_math.c
${CMAKE_CURRENT_LIST_DIR}/float_v1_rom_shim.S
)
# PICO_BUILD_DEFINE: PICO_FLOAT_PICO, whether optimized pico/bootrom provided float support is being used, type=bool, default=1, but dependent on CMake options, group=pico_float
target_compile_definitions(pico_float_pico INTERFACE
PICO_FLOAT_PICO=1
)
target_link_libraries(pico_float_pico INTERFACE pico_bootrom pico_float_headers)
add_library(pico_float_none INTERFACE)
target_sources(pico_float_none INTERFACE
${CMAKE_CURRENT_LIST_DIR}/float_none.S
)
target_link_libraries(pico_float_none INTERFACE pico_float_headers)
# PICO_BUILD_DEFINE: PICO_FLOAT_NONE, whether float support is disabled and functions will panic, type=bool, default=0, but dependent on CMake options, group=pico_float
target_compile_definitions(pico_float_none INTERFACE
PICO_FLOAT_NONE=1
)
function(wrap_float_functions TARGET)
pico_wrap_function(${TARGET} __aeabi_fadd)
pico_wrap_function(${TARGET} __aeabi_fdiv)
pico_wrap_function(${TARGET} __aeabi_fmul)
pico_wrap_function(${TARGET} __aeabi_frsub)
pico_wrap_function(${TARGET} __aeabi_fsub)
pico_wrap_function(${TARGET} __aeabi_cfcmpeq)
pico_wrap_function(${TARGET} __aeabi_cfrcmple)
pico_wrap_function(${TARGET} __aeabi_cfcmple)
pico_wrap_function(${TARGET} __aeabi_fcmpeq)
pico_wrap_function(${TARGET} __aeabi_fcmplt)
pico_wrap_function(${TARGET} __aeabi_fcmple)
pico_wrap_function(${TARGET} __aeabi_fcmpge)
pico_wrap_function(${TARGET} __aeabi_fcmpgt)
pico_wrap_function(${TARGET} __aeabi_fcmpun)
pico_wrap_function(${TARGET} __aeabi_i2f)
pico_wrap_function(${TARGET} __aeabi_l2f)
pico_wrap_function(${TARGET} __aeabi_ui2f)
pico_wrap_function(${TARGET} __aeabi_ul2f)
pico_wrap_function(${TARGET} __aeabi_f2iz)
pico_wrap_function(${TARGET} __aeabi_f2lz)
pico_wrap_function(${TARGET} __aeabi_f2uiz)
pico_wrap_function(${TARGET} __aeabi_f2ulz)
pico_wrap_function(${TARGET} __aeabi_f2d)
pico_wrap_function(${TARGET} sqrtf)
pico_wrap_function(${TARGET} cosf)
pico_wrap_function(${TARGET} sinf)
pico_wrap_function(${TARGET} tanf)
pico_wrap_function(${TARGET} atan2f)
pico_wrap_function(${TARGET} expf)
pico_wrap_function(${TARGET} logf)
pico_wrap_function(${TARGET} ldexpf)
pico_wrap_function(${TARGET} copysignf)
pico_wrap_function(${TARGET} truncf)
pico_wrap_function(${TARGET} floorf)
pico_wrap_function(${TARGET} ceilf)
pico_wrap_function(${TARGET} roundf)
pico_wrap_function(${TARGET} sincosf) # gnu
pico_wrap_function(${TARGET} asinf)
pico_wrap_function(${TARGET} acosf)
pico_wrap_function(${TARGET} atanf)
pico_wrap_function(${TARGET} sinhf)
pico_wrap_function(${TARGET} coshf)
pico_wrap_function(${TARGET} tanhf)
pico_wrap_function(${TARGET} asinhf)
pico_wrap_function(${TARGET} acoshf)
pico_wrap_function(${TARGET} atanhf)
pico_wrap_function(${TARGET} exp2f)
pico_wrap_function(${TARGET} log2f)
pico_wrap_function(${TARGET} exp10f)
pico_wrap_function(${TARGET} log10f)
pico_wrap_function(${TARGET} powf)
pico_wrap_function(${TARGET} powintf) #gnu
pico_wrap_function(${TARGET} hypotf)
pico_wrap_function(${TARGET} cbrtf)
pico_wrap_function(${TARGET} fmodf)
pico_wrap_function(${TARGET} dremf)
pico_wrap_function(${TARGET} remainderf)
pico_wrap_function(${TARGET} remquof)
pico_wrap_function(${TARGET} expm1f)
pico_wrap_function(${TARGET} log1pf)
pico_wrap_function(${TARGET} fmaf)
endfunction()
wrap_float_functions(pico_float_pico)
wrap_float_functions(pico_float_none)
macro(pico_set_float_implementation TARGET IMPL)
get_target_property(target_type ${TARGET} TYPE)
if ("EXECUTABLE" STREQUAL "${target_type}")
set_target_properties(${TARGET} PROPERTIES PICO_TARGET_FLOAT_IMPL "pico_float_${IMPL}")
else()
message(FATAL_ERROR "float implementation must be set on executable not library")
endif()
endmacro()
endif()

View File

@ -0,0 +1,724 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h"
__pre_init __aeabi_float_init, 00020
.syntax unified
.cpu cortex-m0plus
.thumb
.macro float_section name
#if PICO_FLOAT_IN_RAM
.section RAM_SECTION_NAME(\name), "ax"
#else
.section SECTION_NAME(\name), "ax"
#endif
.endm
.macro float_wrapper_section func
float_section WRAPPER_FUNC_NAME(\func)
.endm
.macro _float_wrapper_func x
wrapper_func \x
.endm
.macro wrapper_func_f1 x
_float_wrapper_func \x
#if PICO_FLOAT_PROPAGATE_NANS
mov ip, lr
bl __check_nan_f1
mov lr, ip
#endif
.endm
.macro wrapper_func_f2 x
_float_wrapper_func \x
#if PICO_FLOAT_PROPAGATE_NANS
mov ip, lr
bl __check_nan_f2
mov lr, ip
#endif
.endm
.section .text
#if PICO_FLOAT_PROPAGATE_NANS
.thumb_func
__check_nan_f1:
movs r3, #1
lsls r3, #24
lsls r2, r0, #1
adds r2, r3
bhi 1f
bx lr
1:
bx ip
.thumb_func
__check_nan_f2:
movs r3, #1
lsls r3, #24
lsls r2, r0, #1
adds r2, r3
bhi 1f
lsls r2, r1, #1
adds r2, r3
bhi 2f
bx lr
2:
mov r0, r1
1:
bx ip
#endif
.macro table_tail_call SF_TABLE_OFFSET
#if PICO_FLOAT_SUPPORT_ROM_V1
#ifndef NDEBUG
movs r3, #0
mov ip, r3
#endif
#endif
ldr r3, =sf_table
ldr r3, [r3, #\SF_TABLE_OFFSET]
bx r3
.endm
.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
ldr r3, =sf_table
ldr r3, [r3, #\SF_TABLE_OFFSET]
#if PICO_FLOAT_SUPPORT_ROM_V1
mov ip, pc
#endif
bx r3
#if PICO_FLOAT_SUPPORT_ROM_V1
.byte \SF_TABLE_OFFSET, 0xdf
.word \shim
#endif
.endm
# note generally each function is in a separate section unless there is fall thru or branching between them
# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
# note functions are word aligned except where they are an odd number of linear instructions
// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition
float_wrapper_section __aeabi_farithmetic
// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x
# frsub first because it is the only one that needs alignment
.align 2
wrapper_func __aeabi_frsub
eors r0, r1
eors r1, r0
eors r0, r1
// fall thru
// float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y
wrapper_func_f2 __aeabi_fsub
#if PICO_FLOAT_PROPAGATE_NANS
// we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
mov r2, r0
eors r2, r1
bmi 1f // different signs
push {r0, r1, lr}
bl 1f
b fdiv_fsub_nan_helper
1:
#endif
table_tail_call SF_TABLE_FSUB
wrapper_func_f2 __aeabi_fadd
table_tail_call SF_TABLE_FADD
// float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d
wrapper_func_f2 __aeabi_fdiv
#if PICO_FLOAT_PROPAGATE_NANS
push {r0, r1, lr}
bl 1f
b fdiv_fsub_nan_helper
1:
#endif
table_tail_call SF_TABLE_FDIV
fdiv_fsub_nan_helper:
#if PICO_FLOAT_PROPAGATE_NANS
pop {r1, r2}
// check for infinite op infinite (or rather check for infinite result with both
// operands being infinite)
lsls r3, r0, #1
asrs r3, r3, #24
adds r3, #1
beq 2f
pop {pc}
2:
lsls r1, #1
asrs r1, r1, #24
lsls r2, #1
asrs r2, r2, #24
ands r1, r2
adds r1, #1
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
3:
pop {pc}
#endif
// float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication
wrapper_func_f2 __aeabi_fmul
#if PICO_FLOAT_PROPAGATE_NANS
push {r0, r1, lr}
bl 1f
pop {r1, r2}
// check for multiplication of infinite by zero (or rather check for infinite result with either
// operand 0)
lsls r3, r0, #1
asrs r3, r3, #24
adds r3, #1
beq 2f
pop {pc}
2:
ands r1, r2
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
3:
pop {pc}
1:
#endif
table_tail_call SF_TABLE_FMUL
// void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
float_wrapper_section __aeabi_cfcmple
.align 2
wrapper_func __aeabi_cfrcmple
push {r0-r2, lr}
eors r0, r1
eors r1, r0
eors r0, r1
b __aeabi_cfcmple_guts
// NOTE these share an implementation as we have no excepting NaNs.
// void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags
// void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags
.align 2
wrapper_func __aeabi_cfcmple
wrapper_func __aeabi_cfcmpeq
push {r0-r2, lr}
__aeabi_cfcmple_guts:
lsls r2,r0,#1
lsrs r2,#24
beq 1f
cmp r2,#0xff
bne 2f
lsls r2, r0, #9
bhi 3f
1:
lsrs r0,#23 @ clear mantissa if denormal or infinite
lsls r0,#23
2:
lsls r2,r1,#1
lsrs r2,#24
beq 1f
cmp r2,#0xff
bne 2f
lsls r2, r1, #9
bhi 3f
1:
lsrs r1,#23 @ clear mantissa if denormal or infinite
lsls r1,#23
2:
movs r2,#1 @ initialise result
eors r1,r0
bmi 2f @ opposite signs? then can proceed on basis of sign of x
eors r1,r0 @ restore y
bpl 1f
cmp r1,r0
pop {r0-r2, pc}
1:
cmp r0,r1
pop {r0-r2, pc}
2:
orrs r1, r0 @ handle 0/-0
adds r1, r1 @ note this always sets C
beq 3f
mvns r0, r0 @ carry inverse of r0 sign
adds r0, r0
3:
pop {r0-r2, pc}
// int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
float_wrapper_section __aeabi_fcmpeq
.align 2
wrapper_func __aeabi_fcmpeq
push {lr}
bl __aeabi_cfcmpeq
beq 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C <
float_wrapper_section __aeabi_fcmplt
.align 2
wrapper_func __aeabi_fcmplt
push {lr}
bl __aeabi_cfcmple
sbcs r0, r0
pop {pc}
// int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <=
float_wrapper_section __aeabi_fcmple
.align 2
wrapper_func __aeabi_fcmple
push {lr}
bl __aeabi_cfcmple
bls 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >=
float_wrapper_section __aeabi_fcmpge
.align 2
wrapper_func __aeabi_fcmpge
push {lr}
// because of NaNs it is better to reverse the args than the result
bl __aeabi_cfrcmple
bls 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C >
float_wrapper_section __aeabi_fcmpgt
wrapper_func __aeabi_fcmpgt
push {lr}
// because of NaNs it is better to reverse the args than the result
bl __aeabi_cfrcmple
sbcs r0, r0
pop {pc}
// int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
float_wrapper_section __aeabi_fcmpun
wrapper_func __aeabi_fcmpun
movs r3, #1
lsls r3, #24
lsls r2, r0, #1
adds r2, r3
bhi 1f
lsls r2, r1, #1
adds r2, r3
bhi 1f
movs r0, #0
bx lr
1:
movs r0, #1
bx lr
// float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion
float_wrapper_section __aeabi_ui2f
wrapper_func __aeabi_ui2f
subs r1, r1
cmp r0, #0
bne __aeabi_i2f_main
mov r0, r1
bx lr
float_wrapper_section __aeabi_i2f
// float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion
wrapper_func __aeabi_i2f
lsrs r1, r0, #31
lsls r1, #31
bpl 1f
rsbs r0, #0
1:
cmp r0, #0
beq 7f
__aeabi_i2f_main:
mov ip, lr
push {r0, r1}
ldr r3, =sf_clz_func
ldr r3, [r3]
blx r3
pop {r1, r2}
lsls r1, r0
subs r0, #158
rsbs r0, #0
adds r1,#0x80 @ rounding
bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
lsls r3,r1,#24 @ check bottom 8 bits of r1
beq 6f @ in rounding-tie case?
lsls r1,#1 @ remove leading 1
3:
lsrs r1,#9 @ align mantissa
lsls r0,#23 @ align exponent
orrs r0,r2 @ assemble exponent and mantissa
4:
orrs r0,r1 @ apply sign
1:
bx ip
5:
adds r0,#1 @ correct exponent offset
b 3b
6:
lsrs r1,#9 @ ensure even result
lsls r1,#10
b 3b
7:
bx lr
// int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3]
float_wrapper_section __aeabi_f2iz
wrapper_func __aeabi_f2iz
regular_func float2int_z
lsls r1, r0, #1
lsrs r2, r1, #24
movs r3, #0x80
lsls r3, #24
cmp r2, #126
ble 1f
subs r2, #158
bge 2f
asrs r1, r0, #31
lsls r0, #9
lsrs r0, #1
orrs r0, r3
negs r2, r2
lsrs r0, r2
lsls r1, #1
adds r1, #1
muls r0, r1
bx lr
1:
movs r0, #0
bx lr
2:
lsrs r0, #31
adds r0, r3
subs r0, #1
bx lr
cmn r0, r0
bcc float2int
push {lr}
lsls r0, #1
lsrs r0, #1
movs r1, #0
bl __aeabi_f2uiz
cmp r0, #0
bmi 1f
rsbs r0, #0
pop {pc}
1:
movs r0, #128
lsls r0, #24
pop {pc}
float_section float2int
regular_func float2int
shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
float_section float2fix
regular_func float2fix
shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
float_section float2ufix
regular_func float2ufix
table_tail_call SF_TABLE_FLOAT2UFIX
// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3]
float_wrapper_section __aeabi_f2uiz
wrapper_func __aeabi_f2uiz
table_tail_call SF_TABLE_FLOAT2UINT
float_section fix2float
regular_func fix2float
table_tail_call SF_TABLE_FIX2FLOAT
float_section ufix2float
regular_func ufix2float
table_tail_call SF_TABLE_UFIX2FLOAT
float_section fix642float
regular_func fix642float
shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim
float_section ufix642float
regular_func ufix642float
shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim
// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
float_wrapper_section __aeabi_l2f
1:
ldr r2, =__aeabi_i2f
bx r2
wrapper_func __aeabi_l2f
asrs r2, r0, #31
cmp r1, r2
beq 1b
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim
// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion
float_wrapper_section __aeabi_ul2f
1:
ldr r2, =__aeabi_ui2f
bx r2
wrapper_func __aeabi_ul2f
cmp r1, #0
beq 1b
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim
// long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3]
float_wrapper_section __aeabi_f2lz
wrapper_func __aeabi_f2lz
regular_func float2int64_z
cmn r0, r0
bcc float2int64
push {lr}
lsls r0, #1
lsrs r0, #1
movs r1, #0
bl float2ufix64
cmp r1, #0
bmi 1f
movs r2, #0
rsbs r0, #0
sbcs r2, r1
mov r1, r2
pop {pc}
1:
movs r1, #128
lsls r1, #24
movs r0, #0
pop {pc}
float_section float2int64
regular_func float2int64
shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
float_section float2fix64
regular_func float2fix64
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3]
float_wrapper_section __aeabi_f2ulz
wrapper_func __aeabi_f2ulz
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
float_section float2ufix64
regular_func float2ufix64
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
float_wrapper_section __aeabi_f2d
1:
#if PICO_FLOAT_PROPAGATE_NANS
// copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
asrs r1, r0, #3
movs r2, #0xf
lsls r2, #27
orrs r1, r2
lsls r0, #25
bx lr
#endif
wrapper_func __aeabi_f2d
#if PICO_FLOAT_PROPAGATE_NANS
movs r3, #1
lsls r3, #24
lsls r2, r0, #1
adds r2, r3
bhi 1b
#endif
shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim
float_wrapper_section srqtf
wrapper_func_f1 sqrtf
#if PICO_FLOAT_SUPPORT_ROM_V1
// check for negative
asrs r1, r0, #23
bmi 1f
#endif
table_tail_call SF_TABLE_FSQRT
#if PICO_FLOAT_SUPPORT_ROM_V1
1:
mvns r0, r1
cmp r0, #255
bne 2f
// -0 or -Denormal return -0 (0x80000000)
lsls r0, #31
bx lr
2:
// return -Inf (0xff800000)
asrs r0, r1, #31
lsls r0, #23
bx lr
#endif
float_wrapper_section cosf
// note we don't use _f1 since we do an infinity/nan check for outside of range
wrapper_func cosf
// rom version only works for -128 < angle < 128
lsls r1, r0, #1
lsrs r1, #24
cmp r1, #127 + 7
bge 1f
2:
table_tail_call SF_TABLE_FCOS
1:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r1, #255
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
bx lr
3:
#endif
ldr r1, =0x40c90fdb // 2 * M_PI
push {lr}
bl remainderf
pop {r1}
mov lr, r1
b 2b
float_wrapper_section sinf
// note we don't use _f1 since we do an infinity/nan check for outside of range
wrapper_func sinf
// rom version only works for -128 < angle < 128
lsls r1, r0, #1
lsrs r1, #24
cmp r1, #127 + 7
bge 1f
2:
table_tail_call SF_TABLE_FSIN
1:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r1, #255
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
bx lr
3:
#endif
ldr r1, =0x40c90fdb // 2 * M_PI
push {lr}
bl remainderf
pop {r1}
mov lr, r1
b 2b
float_wrapper_section sincosf
// note we don't use _f1 since we do an infinity/nan check for outside of range
wrapper_func sincosf
push {r1, r2, lr}
// rom version only works for -128 < angle < 128
lsls r3, r0, #1
lsrs r3, #24
cmp r3, #127 + 7
bge 3f
2:
ldr r3, =sf_table
ldr r3, [r3, #SF_TABLE_FSIN]
blx r3
pop {r2, r3}
str r0, [r2]
str r1, [r3]
pop {pc}
#if PICO_FLOAT_PROPAGATE_NANS
.align 2
pop {pc}
#endif
3:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r3, #255
bne 4f
// infinite to nan
movs r3, #1
lsls r3, #22
orrs r0, r3
str r0, [r1]
str r0, [r2]
add sp, #12
bx lr
4:
#endif
ldr r1, =0x40c90fdb // 2 * M_PI
push {lr}
bl remainderf
pop {r1}
mov lr, r1
b 2b
float_wrapper_section tanf
// note we don't use _f1 since we do an infinity/nan check for outside of range
wrapper_func tanf
// rom version only works for -128 < angle < 128
lsls r1, r0, #1
lsrs r1, #24
cmp r1, #127 + 7
bge 1f
2:
table_tail_call SF_TABLE_FTAN
1:
#if PICO_FLOAT_PROPAGATE_NANS
// also check for infinites
cmp r1, #255
bne 3f
// infinite to nan
movs r1, #1
lsls r1, #22
orrs r0, r1
bx lr
3:
#endif
ldr r1, =0x40c90fdb // 2 * M_PI
push {lr}
bl remainderf
pop {r1}
mov lr, r1
b 2b
float_wrapper_section atan2f
wrapper_func_f2 atan2f
shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim
float_wrapper_section expf
wrapper_func_f1 expf
table_tail_call SF_TABLE_FEXP
float_wrapper_section logf
wrapper_func_f1 logf
table_tail_call SF_TABLE_FLN

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <string.h>
#include "pico/bootrom.h"
#include "pico/bootrom/sf_table.h"
// NOTE THIS FUNCTION TABLE IS NOT PUBLIC OR NECESSARILY COMPLETE...
// IT IS ***NOT*** SAFE TO CALL THESE FUNCTION POINTERS FROM ARBITRARY CODE
uint32_t sf_table[SF_TABLE_V2_SIZE / 2];
void *sf_clz_func;
#if !PICO_FLOAT_SUPPORT_ROM_V1
static __attribute__((noreturn)) void missing_float_func_shim() {
panic("");
}
#endif
void __aeabi_float_init() {
int rom_version = rp2040_rom_version();
void *rom_table = rom_data_lookup(rom_table_code('S', 'F'));
#if PICO_FLOAT_SUPPORT_ROM_V1
if (rom_version == 1) {
memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE);
extern void float_table_shim_on_use_helper();
// todo replace NDEBUG with a more exclusive assertion guard
#ifndef NDEBUG
if (*(uint16_t *)0x29ee != 0x0fc4 || // this is packx
*(uint16_t *)0x29c0 != 0x0dc2 || // this is upackx
*(uint16_t *)0x2b96 != 0xb5c0 || // this is cordic_vec
*(uint16_t *)0x2b18 != 0x2500 || // this is packretns
*(uint16_t *)0x2acc != 0xb510 || // this is float2fix
*(uint32_t *)0x2cfc != 0x6487ed51 // pi_q29
) {
panic("");
}
#endif
// this is a little tricky.. we only want to pull in a shim if the corresponding function
// is called. to that end we include a SVC instruction with the table offset as the call number
// followed by the shim function pointer inside the actual wrapper function. that way if the wrapper
// function is garbage collected, so is the shim function.
//
// float_table_shim_on_use_helper expects this SVC instruction in the calling code soon after the address
// pointed to by IP and patches the float_table entry with the real shim the first time the function is called.
for(uint i=SF_TABLE_V1_SIZE/4; i<SF_TABLE_V2_SIZE/4; i++) {
sf_table[i] = (uintptr_t)float_table_shim_on_use_helper;
}
// we shim these for -0 and -denormal handling
sf_table[SF_TABLE_FLOAT2INT/4] = sf_table[SF_TABLE_FLOAT2FIX/4] = (uintptr_t)float_table_shim_on_use_helper;
}
#else
if (rom_version == 1) {
memcpy(&sf_table, rom_table, SF_TABLE_V1_SIZE);
// opting for soft failure for now - you'll get a panic at runtime if you call any of the missing methods
for(uint i=0;i<SF_TABLE_V2_SIZE/4;i++) {
if (!sf_table[i]) sf_table[i] = (uintptr_t)missing_float_func_shim;
}
}
#endif
if (rom_version >= 2) {
assert(*((uint8_t *)(rom_table-2)) * 4 >= SF_TABLE_V2_SIZE);
memcpy(&sf_table, rom_table, SF_TABLE_V2_SIZE);
}
sf_clz_func = rom_func_lookup(rom_table_code('L', '3'));
}

View File

@ -0,0 +1,565 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "pico/types.h"
#include "pico/float.h"
#include "pico/platform.h"
typedef uint32_t ui32;
typedef int32_t i32;
#define PINF ( HUGE_VAL)
#define MINF (-HUGE_VAL)
#define NANF ((float)NAN)
#define PZERO (+0.0)
#define MZERO (-0.0)
#define PI 3.14159265358979323846
#define LOG2 0.69314718055994530941
// Unfortunately in double precision ln(10) is very close to half-way between to representable numbers
#define LOG10 2.30258509299404568401
#define LOG2E 1.44269504088896340737
#define LOG10E 0.43429448190325182765
#define ONETHIRD 0.33333333333333333333
#define PIf 3.14159265358979323846f
#define LOG2f 0.69314718055994530941f
#define LOG2Ef 1.44269504088896340737f
#define LOG10Ef 0.43429448190325182765f
#define ONETHIRDf 0.33333333333333333333f
#define FUNPACK(x,e,m) e=((x)>>23)&0xff,m=((x)&0x007fffff)|0x00800000
#define FUNPACKS(x,s,e,m) s=((x)>>31),FUNPACK((x),(e),(m))
_Pragma("GCC diagnostic push")
_Pragma("GCC diagnostic ignored \"-Wstrict-aliasing\"")
static inline bool fisnan(float x) {
ui32 ix=*(i32*)&x;
return ix * 2 > 0xff000000u;
}
#if PICO_FLOAT_PROPAGATE_NANS
#define check_nan_f1(x) if (fisnan((x))) return (x)
#define check_nan_f2(x,y) if (fisnan((x))) return (x); else if (fisnan((y))) return (y);
#else
#define check_nan_f1(x) ((void)0)
#define check_nan_f2(x,y) ((void)0)
#endif
static inline int fgetsignexp(float x) {
ui32 ix=*(ui32*)&x;
return (ix>>23)&0x1ff;
}
static inline int fgetexp(float x) {
ui32 ix=*(ui32*)&x;
return (ix>>23)&0xff;
}
static inline float fldexp(float x,int de) {
ui32 ix=*(ui32*)&x,iy;
int e;
e=fgetexp(x);
if(e==0||e==0xff) return x;
e+=de;
if(e<=0) iy=ix&0x80000000; // signed zero for underflow
else if(e>=0xff) iy=(ix&0x80000000)|0x7f800000ULL; // signed infinity on overflow
else iy=ix+((ui32)de<<23);
return *(float*)&iy;
}
float WRAPPER_FUNC(ldexpf)(float x, int de) {
check_nan_f1(x);
return fldexp(x, de);
}
static inline float fcopysign(float x,float y) {
ui32 ix=*(ui32*)&x,iy=*(ui32*)&y;
ix=((ix&0x7fffffff)|(iy&0x80000000));
return *(float*)&ix;
}
float WRAPPER_FUNC(copysignf)(float x, float y) {
check_nan_f2(x,y);
return fcopysign(x, y);
}
static inline int fiszero(float x) { return fgetexp (x)==0; }
static inline int fispzero(float x) { return fgetsignexp(x)==0; }
static inline int fismzero(float x) { return fgetsignexp(x)==0x100; }
static inline int fisinf(float x) { return fgetexp (x)==0xff; }
static inline int fispinf(float x) { return fgetsignexp(x)==0xff; }
static inline int fisminf(float x) { return fgetsignexp(x)==0x1ff; }
static inline int fisint(float x) {
ui32 ix=*(ui32*)&x,m;
int e=fgetexp(x);
if(e==0) return 1; // 0 is an integer
e-=0x7f; // remove exponent bias
if(e<0) return 0; // |x|<1
e=23-e; // bit position in mantissa with significance 1
if(e<=0) return 1; // |x| large, so must be an integer
m=(1<<e)-1; // mask for bits of significance <1
if(ix&m) return 0; // not an integer
return 1;
}
static inline int fisoddint(float x) {
ui32 ix=*(ui32*)&x,m;
int e=fgetexp(x);
e-=0x7f; // remove exponent bias
if(e<0) return 0; // |x|<1; 0 is not odd
e=23-e; // bit position in mantissa with significance 1
if(e<0) return 0; // |x| large, so must be even
m=(1<<e)-1; // mask for bits of significance <1 (if any)
if(ix&m) return 0; // not an integer
if(e==23) return 1; // value is exactly 1
return (ix>>e)&1;
}
static inline int fisstrictneg(float x) {
ui32 ix=*(ui32*)&x;
if(fiszero(x)) return 0;
return ix>>31;
}
static inline int fisneg(float x) {
ui32 ix=*(ui32*)&x;
return ix>>31;
}
static inline float fneg(float x) {
ui32 ix=*(ui32*)&x;
ix^=0x80000000;
return *(float*)&ix;
}
static inline int fispo2(float x) {
ui32 ix=*(ui32*)&x;
if(fiszero(x)) return 0;
if(fisinf(x)) return 0;
ix&=0x007fffff;
return ix==0;
}
static inline float fnan_or(float x) {
#if PICO_FLOAT_PROPAGATE_NANS
return NANF;
#else
return x;
#endif
}
float WRAPPER_FUNC(truncf)(float x) {
check_nan_f1(x);
ui32 ix=*(ui32*)&x,m;
int e=fgetexp(x);
e-=0x7f; // remove exponent bias
if(e<0) { // |x|<1
ix&=0x80000000;
return *(float*)&ix;
}
e=23-e; // bit position in mantissa with significance 1
if(e<=0) return x; // |x| large, so must be an integer
m=(1<<e)-1; // mask for bits of significance <1
ix&=~m;
return *(float*)&ix;
}
float WRAPPER_FUNC(roundf)(float x) {
check_nan_f1(x);
ui32 ix=*(ui32*)&x,m;
int e=fgetexp(x);
e-=0x7f; // remove exponent bias
if(e<-1) { // |x|<0.5
ix&=0x80000000;
return *(float*)&ix;
}
if(e==-1) { // 0.5<=|x|<1
ix&=0x80000000;
ix|=0x3f800000; // ±1
return *(float*)&ix;
}
e=23-e; // bit position in mantissa with significance 1, <=23
if(e<=0) return x; // |x| large, so must be an integer
m=1<<(e-1); // mask for bit of significance 0.5
ix+=m;
m=m+m-1; // mask for bits of significance <1
ix&=~m;
return *(float*)&ix;
}
float WRAPPER_FUNC(floorf)(float x) {
check_nan_f1(x);
ui32 ix=*(ui32*)&x,m;
int e=fgetexp(x);
if(e==0) { // x==0
ix&=0x80000000;
return *(float*)&ix;
}
e-=0x7f; // remove exponent bias
if(e<0) { // |x|<1, not zero
if(fisneg(x)) return -1;
return PZERO;
}
e=23-e; // bit position in mantissa with significance 1
if(e<=0) return x; // |x| large, so must be an integer
m=(1<<e)-1; // mask for bit of significance <1
if(fisneg(x)) ix+=m; // add 1-ε to magnitude if negative
ix&=~m; // truncate
return *(float*)&ix;
}
float WRAPPER_FUNC(ceilf)(float x) {
check_nan_f1(x);
ui32 ix=*(ui32*)&x,m;
int e=fgetexp(x);
if(e==0) { // x==0
ix&=0x80000000;
return *(float*)&ix;
}
e-=0x7f; // remove exponent bias
if(e<0) { // |x|<1, not zero
if(fisneg(x)) return MZERO;
return 1;
}
e=23-e; // bit position in mantissa with significance 1
if(e<=0) return x; // |x| large, so must be an integer
m=(1<<e)-1; // mask for bit of significance <1
if(!fisneg(x)) ix+=m; // add 1-ε to magnitude if positive
ix&=~m; // truncate
return *(float*)&ix;
}
float WRAPPER_FUNC(asinf)(float x) {
check_nan_f1(x);
float u;
u=(1.0f-x)*(1.0f+x);
if(fisstrictneg(u)) return fnan_or(PINF);
return atan2f(x,sqrtf(u));
}
float WRAPPER_FUNC(acosf)(float x) {
check_nan_f1(x);
float u;
u=(1.0f-x)*(1.0f+x);
if(fisstrictneg(u)) return fnan_or(PINF);
return atan2f(sqrtf(u),x);
}
float WRAPPER_FUNC(atanf)(float x) {
check_nan_f1(x);
if(fispinf(x)) return (float)( PIf/2);
if(fisminf(x)) return (float)(-PIf/2);
return atan2f(x,1.0f);
}
float WRAPPER_FUNC(sinhf)(float x) {
check_nan_f1(x);
return fldexp((expf(x)-expf(fneg(x))),-1);
}
float WRAPPER_FUNC(coshf)(float x) {
check_nan_f1(x);
return fldexp((expf(x)+expf(fneg(x))),-1);
}
float WRAPPER_FUNC(tanhf)(float x) {
check_nan_f1(x);
float u;
int e;
e=fgetexp(x);
if(e>=4+0x7f) { // |x|>=16?
if(!fisneg(x)) return 1; // 1 << exp 2x; avoid generating infinities later
else return -1; // 1 >> exp 2x
}
u=expf(fldexp(x,1));
return (u-1.0f)/(u+1.0f);
}
float WRAPPER_FUNC(asinhf)(float x) {
check_nan_f1(x);
int e;
e=fgetexp(x);
if(e>=16+0x7f) { // |x|>=2^16?
if(!fisneg(x)) return logf( x )+LOG2f; // 1/x^2 << 1
else return fneg(logf(fneg(x))+LOG2f); // 1/x^2 << 1
}
if(x>0) return (float)log(sqrt((double)x*(double)x+1.0)+(double)x);
else return fneg((float)log(sqrt((double)x*(double)x+1.0)-(double)x));
}
float WRAPPER_FUNC(acoshf)(float x) {
check_nan_f1(x);
int e;
if(fisneg(x)) x=fneg(x);
e=fgetexp(x);
if(e>=16+0x7f) return logf(x)+LOG2f; // |x|>=2^16?
return (float)log(sqrt(((double)x+1.0)*((double)x-1.0))+(double)x);
}
float WRAPPER_FUNC(atanhf)(float x) {
check_nan_f1(x);
return fldexp(logf((1.0f+x)/(1.0f-x)),-1);
}
float WRAPPER_FUNC(exp2f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG2); }
float WRAPPER_FUNC(log2f)(float x) { check_nan_f1(x); return logf(x)*LOG2Ef; }
float WRAPPER_FUNC(exp10f)(float x) { check_nan_f1(x); return (float)exp((double)x*LOG10); }
float WRAPPER_FUNC(log10f)(float x) { check_nan_f1(x); return logf(x)*LOG10Ef; }
float WRAPPER_FUNC(expm1f)(float x) { check_nan_f1(x); return (float)(exp((double)x)-1); }
float WRAPPER_FUNC(log1pf)(float x) { check_nan_f1(x); return (float)(log(1+(double)x)); }
float WRAPPER_FUNC(fmaf)(float x,float y,float z) {
check_nan_f2(x,y);
check_nan_f1(z);
return (float)((double)x*(double)y+(double)z);
} // has double rounding so not exact
// general power, x>0
static inline float fpow_1(float x,float y) {
return (float)exp(log((double)x)*(double)y); // using double-precision intermediates for better accuracy
}
static float fpow_int2(float x,int y) {
float u;
if(y==1) return x;
u=fpow_int2(x,y/2);
u*=u;
if(y&1) u*=x;
return u;
}
// for the case where x not zero or infinity, y small and not zero
static inline float fpowint_1(float x,int y) {
if(y<0) x=1.0f/x,y=-y;
return fpow_int2(x,y);
}
// for the case where x not zero or infinity
static float fpowint_0(float x,int y) {
int e;
if(fisneg(x)) {
if(fisoddint(y)) return fneg(fpowint_0(fneg(x),y));
else return fpowint_0(fneg(x),y);
}
if(fispo2(x)) {
e=fgetexp(x)-0x7f;
if(y>=256) y= 255; // avoid overflow
if(y<-256) y=-256;
y*=e;
return fldexp(1,y);
}
if(y==0) return 1;
if(y>=-32&&y<=32) return fpowint_1(x,y);
return fpow_1(x,y);
}
float WRAPPER_FUNC(powintf)(float x,int y) {
_Pragma("GCC diagnostic push")
_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
if(x==1.0f||y==0) return 1;
if(x==0.0f) {
if(y>0) {
if(y&1) return x;
else return 0;
}
if((y&1)) return fcopysign(PINF,x);
return PINF;
}
_Pragma("GCC diagnostic pop")
check_nan_f1(x);
if(fispinf(x)) {
if(y<0) return 0;
else return PINF;
}
if(fisminf(x)) {
if(y>0) {
if((y&1)) return MINF;
else return PINF;
}
if((y&1)) return MZERO;
else return PZERO;
}
return fpowint_0(x,y);
}
// for the case where y is guaranteed a finite integer, x not zero or infinity
static float fpow_0(float x,float y) {
int e,p;
if(fisneg(x)) {
if(fisoddint(y)) return fneg(fpow_0(fneg(x),y));
else return fpow_0(fneg(x),y);
}
p=(int)y;
if(fispo2(x)) {
e=fgetexp(x)-0x7f;
if(p>=256) p= 255; // avoid overflow
if(p<-256) p=-256;
p*=e;
return fldexp(1,p);
}
if(p==0) return 1;
if(p>=-32&&p<=32) return fpowint_1(x,p);
return fpow_1(x,y);
}
float WRAPPER_FUNC(powf)(float x,float y) {
_Pragma("GCC diagnostic push")
_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
if(x==1.0f||fiszero(y)) return 1;
check_nan_f2(x,y);
if(x==-1.0f&&fisinf(y)) return 1;
_Pragma("GCC diagnostic pop")
if(fiszero(x)) {
if(!fisneg(y)) {
if(fisoddint(y)) return x;
else return 0;
}
if(fisoddint(y)) return fcopysign(PINF,x);
return PINF;
}
if(fispinf(x)) {
if(fisneg(y)) return 0;
else return PINF;
}
if(fisminf(x)) {
if(!fisneg(y)) {
if(fisoddint(y)) return MINF;
else return PINF;
}
if(fisoddint(y)) return MZERO;
else return PZERO;
}
if(fispinf(y)) {
if(fgetexp(x)<0x7f) return PZERO;
else return PINF;
}
if(fisminf(y)) {
if(fgetexp(x)<0x7f) return PINF;
else return PZERO;
}
if(fisint(y)) return fpow_0(x,y);
if(fisneg(x)) return PINF;
return fpow_1(x,y);
}
float WRAPPER_FUNC(hypotf)(float x,float y) {
check_nan_f2(x,y);
int ex,ey;
ex=fgetexp(x); ey=fgetexp(y);
if(ex>=0x7f+50||ey>=0x7f+50) { // overflow, or nearly so
x=fldexp(x,-70),y=fldexp(y,-70);
return fldexp(sqrtf(x*x+y*y), 70);
}
else if(ex<=0x7f-50&&ey<=0x7f-50) { // underflow, or nearly so
x=fldexp(x, 70),y=fldexp(y, 70);
return fldexp(sqrtf(x*x+y*y),-70);
}
return sqrtf(x*x+y*y);
}
float WRAPPER_FUNC(cbrtf)(float x) {
check_nan_f1(x);
int e;
if(fisneg(x)) return fneg(cbrtf(fneg(x)));
if(fiszero(x)) return fcopysign(PZERO,x);
e=fgetexp(x)-0x7f;
e=(e*0x5555+0x8000)>>16; // ~e/3, rounded
x=fldexp(x,-e*3);
x=expf(logf(x)*ONETHIRDf);
return fldexp(x,e);
}
// reduces mx*2^e modulo my, returning bottom bits of quotient at *pquo
// 2^23<=|mx|,my<2^24, e>=0; 0<=result<my
static i32 frem_0(i32 mx,i32 my,int e,int*pquo) {
int quo=0,q,r=0,s;
if(e>0) {
r=0xffffffffU/(ui32)(my>>7); // reciprocal estimate Q16
}
while(e>0) {
s=e; if(s>12) s=12; // gain up to 12 bits on each iteration
q=(mx>>9)*r; // Q30
q=((q>>(29-s))+1)>>1; // Q(s), rounded
mx=(mx<<s)-my*q;
quo=(quo<<s)+q;
e-=s;
}
if(mx>=my) mx-=my,quo++; // when e==0 mx can be nearly as big as 2my
if(mx>=my) mx-=my,quo++;
if(mx<0) mx+=my,quo--;
if(mx<0) mx+=my,quo--;
if(pquo) *pquo=quo;
return mx;
}
float WRAPPER_FUNC(fmodf)(float x,float y) {
check_nan_f2(x,y);
ui32 ix=*(ui32*)&x,iy=*(ui32*)&y;
int sx,ex,ey;
i32 mx,my;
FUNPACKS(ix,sx,ex,mx);
FUNPACK(iy,ey,my);
if(ex==0xff) {
return fnan_or(PINF);
}
if(ey==0) return PINF;
if(ex==0) {
if(!fisneg(x)) return PZERO;
return MZERO;
}
if(ex<ey) return x; // |x|<|y|, including case x=±0
mx=frem_0(mx,my,ex-ey,0);
if(sx) mx=-mx;
return fix2float(mx,0x7f-ey+23);
}
float WRAPPER_FUNC(remquof)(float x,float y,int*quo) {
check_nan_f2(x,y);
ui32 ix=*(ui32*)&x,iy=*(ui32*)&y;
int sx,sy,ex,ey,q;
i32 mx,my;
FUNPACKS(ix,sx,ex,mx);
FUNPACKS(iy,sy,ey,my);
if(quo) *quo=0;
if(ex==0xff) return PINF;
if(ey==0) return PINF;
if(ex==0) return PZERO;
if(ey==0xff) return x;
if(ex<ey-1) return x; // |x|<|y|/2
if(ex==ey-1) {
if(mx<=my) return x; // |x|<=|y|/2, even quotient
// here |y|/2<|x|<|y|
if(!sx) { // x>|y|/2
mx-=my+my;
ey--;
q=1;
} else { // x<-|y|/2
mx=my+my-mx;
ey--;
q=-1;
}
}
else {
if(sx) mx=-mx;
mx=frem_0(mx,my,ex-ey,&q);
if(mx+mx>my || (mx+mx==my&&(q&1)) ) { // |x|>|y|/2, or equality and an odd quotient?
mx-=my;
q++;
}
}
if(sy) q=-q;
if(quo) *quo=q;
return fix2float(mx,0x7f-ey+23);
}
float WRAPPER_FUNC(dremf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); }
float WRAPPER_FUNC(remainderf)(float x,float y) { check_nan_f2(x,y); return remquof(x,y,0); }
_Pragma("GCC diagnostic pop") // strict-aliasing

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h"
.syntax unified
.cpu cortex-m0plus
.thumb
wrapper_func __aeabi_fadd
wrapper_func __aeabi_fdiv
wrapper_func __aeabi_fmul
wrapper_func __aeabi_frsub
wrapper_func __aeabi_fsub
wrapper_func __aeabi_cfcmpeq
wrapper_func __aeabi_cfrcmple
wrapper_func __aeabi_cfcmple
wrapper_func __aeabi_fcmpeq
wrapper_func __aeabi_fcmplt
wrapper_func __aeabi_fcmple
wrapper_func __aeabi_fcmpge
wrapper_func __aeabi_fcmpgt
wrapper_func __aeabi_fcmpun
wrapper_func __aeabi_i2f
wrapper_func __aeabi_l2f
wrapper_func __aeabi_ui2f
wrapper_func __aeabi_ul2f
wrapper_func __aeabi_i2f
wrapper_func __aeabi_f2iz
wrapper_func __aeabi_f2lz
wrapper_func __aeabi_f2uiz
wrapper_func __aeabi_f2ulz
wrapper_func sqrtf
wrapper_func cosf
wrapper_func sinf
wrapper_func tanf
wrapper_func atan2f
wrapper_func expf
wrapper_func logf
wrapper_func ldexpf
wrapper_func copysignf
wrapper_func truncf
wrapper_func floorf
wrapper_func ceilf
wrapper_func roundf
wrapper_func sincosf
wrapper_func asinf
wrapper_func acosf
wrapper_func atanf
wrapper_func sinhf
wrapper_func coshf
wrapper_func tanhf
wrapper_func asinhf
wrapper_func acoshf
wrapper_func atanhf
wrapper_func exp2f
wrapper_func log2f
wrapper_func exp10f
wrapper_func log10f
wrapper_func powf
wrapper_func powintf
wrapper_func hypotf
wrapper_func cbrtf
wrapper_func fmodf
wrapper_func dremf
wrapper_func remainderf
wrapper_func remquof
wrapper_func expm1f
wrapper_func log1pf
wrapper_func fmaf
push {lr} // keep stack trace sane
ldr r0, =str
bl panic
str:
.asciz "float support is disabled"

View File

@ -0,0 +1,347 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "pico/asm_helper.S"
#if PICO_FLOAT_SUPPORT_ROM_V1
.syntax unified
.cpu cortex-m0plus
.thumb
#ifndef PICO_FLOAT_IN_RAM
#define PICO_FLOAT_IN_RAM 0
#endif
.macro float_section name
// todo separate flag for shims?
#if PICO_FLOAT_IN_RAM
.section RAM_SECTION_NAME(\name), "ax"
#else
.section SECTION_NAME(\name), "ax"
#endif
.endm
float_section float_table_shim_on_use_helper
regular_func float_table_shim_on_use_helper
push {r0-r2, lr}
mov r0, ip
#ifndef NDEBUG
// sanity check to make sure we weren't called by non (shimmable_) table_tail_call macro
cmp r0, #0
bne 1f
bkpt #0
#endif
1:
ldrh r1, [r0]
lsrs r2, r1, #8
adds r0, #2
cmp r2, #0xdf
bne 1b
uxtb r1, r1 // r1 holds table offset
lsrs r2, r0, #2
bcc 1f
// unaligned
ldrh r2, [r0, #0]
ldrh r0, [r0, #2]
lsls r0, #16
orrs r0, r2
b 2f
1:
ldr r0, [r0]
2:
ldr r2, =sf_table
str r0, [r2, r1]
str r0, [sp, #12]
pop {r0-r2, pc}
float_section 642float_shims
@ convert uint64 to float, rounding
regular_func uint642float_shim
movs r2,#0 @ fall through
@ convert unsigned 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
regular_func ufix642float_shim
push {r4,r5,r14}
cmp r1,#0
bpl 3f @ positive? we can use signed code
lsls r5,r1,#31 @ contribution to sticky bits
orrs r5,r0
lsrs r0,r1,#1
subs r2,#1
b 4f
@ convert int64 to float, rounding
regular_func int642float_shim
movs r2,#0 @ fall through
@ convert signed 64-bit fix to float, rounding; number of r0:r1 bits after point in r2
regular_func fix642float_shim
push {r4,r5,r14}
3:
movs r5,r0
orrs r5,r1
beq ret_pop45 @ zero? return +0
asrs r5,r1,#31 @ sign bits
2:
asrs r4,r1,#24 @ try shifting 7 bits at a time
cmp r4,r5
bne 1f @ next shift will overflow?
lsls r1,#7
lsrs r4,r0,#25
orrs r1,r4
lsls r0,#7
adds r2,#7
b 2b
1:
movs r5,r0
movs r0,r1
4:
rsbs r2,#0
adds r2,#32+29
// bl packx
ldr r1, =0x29ef // packx
blx r1
ret_pop45:
pop {r4,r5,r15}
float_section fatan2_shim
regular_func fatan2_shim
push {r4,r5,r14}
ldr r4, =0x29c1 // unpackx
mov ip, r4
@ unpack arguments and shift one down to have common exponent
blx ip
mov r4,r0
mov r0,r1
mov r1,r4
mov r4,r2
mov r2,r3
mov r3,r4
blx ip
lsls r0,r0,#5 @ Q28
lsls r1,r1,#5 @ Q28
adds r4,r2,r3 @ this is -760 if both arguments are 0 and at least -380-126=-506 otherwise
asrs r4,#9
adds r4,#1
bmi 2f @ force y to 0 proper, so result will be zero
subs r4,r2,r3 @ calculate shift
bge 1f @ ex>=ey?
rsbs r4,#0 @ make shift positive
asrs r0,r4
cmp r4,#28
blo 3f
asrs r0,#31
b 3f
1:
asrs r1,r4
cmp r4,#28
blo 3f
2:
@ here |x|>>|y| or both x and y are ±0
cmp r0,#0
bge 4f @ x positive, return signed 0
ldr r3, =0x2cfc @ &pi_q29, circular coefficients
ldr r0,[r3] @ x negative, return +/- pi
asrs r1,#31
eors r0,r1
b 7f
4:
asrs r0,r1,#31
b 7f
3:
movs r2,#0 @ initial angle
ldr r3, =0x2cfc @ &pi_q29, circular coefficients
cmp r0,#0 @ x negative
bge 5f
rsbs r0,#0 @ rotate to 1st/4th quadrants
rsbs r1,#0
ldr r2,[r3] @ pi Q29
5:
movs r4,#1 @ m=1
ldr r5, =0x2b97 @ cordic_vec
blx r5 @ also produces magnitude (with scaling factor 1.646760119), which is discarded
mov r0,r2 @ result here is -pi/2..3pi/2 Q29
@ asrs r2,#29
@ subs r0,r2
ldr r3, =0x2cfc @ &pi_q29, circular coefficients
ldr r2,[r3] @ pi Q29
adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case
bcs 6f @ -pi/2..0? leave result as is
subs r4,r0,r2 @ <pi? leave as is
bmi 6f
subs r0,r4,r2 @ >pi: take off 2pi
6:
subs r0,#1 @ fiddle factor so atan2(0,1)==0
7:
movs r2,#0 @ exponent for pack
ldr r3, =0x2b19
bx r3
float_section float232_shims
regular_func float2int_shim
movs r1,#0 @ fall through
regular_func float2fix_shim
// check for -0 or -denormal upfront
asrs r2, r0, #23
adds r2, #128
adds r2, #128
beq 1f
// call original
ldr r2, =0x2acd
bx r2
1:
movs r0, #0
bx lr
float_section float264_shims
regular_func float2int64_shim
movs r1,#0 @ and fall through
regular_func float2fix64_shim
push {r14}
bl f2fix
b d2f64_a
regular_func float2uint64_shim
movs r1,#0 @ and fall through
regular_func float2ufix64_shim
asrs r3,r0,#23 @ negative? return 0
bmi ret_dzero
@ and fall through
@ convert float in r0 to signed fixed point in r0:r1:r3, r1 places after point, rounding towards -Inf
@ result clamped so that r3 can only be 0 or -1
@ trashes r12
.thumb_func
f2fix:
push {r4,r14}
mov r12,r1
asrs r3,r0,#31
lsls r0,#1
lsrs r2,r0,#24
beq 1f @ zero?
cmp r2,#0xff @ Inf?
beq 2f
subs r1,r2,#1
subs r2,#0x7f @ remove exponent bias
lsls r1,#24
subs r0,r1 @ insert implied 1
eors r0,r3
subs r0,r3 @ top two's complement
asrs r1,r0,#4 @ convert to double format
lsls r0,#28
ldr r4, =d2fix_a
bx r4
1:
movs r0,#0
movs r1,r0
movs r3,r0
pop {r4,r15}
2:
mvns r0,r3 @ return max/min value
mvns r1,r3
pop {r4,r15}
ret_dzero:
movs r0,#0
movs r1,#0
bx r14
float_section d2fix_a_float
.weak d2fix_a // weak because it exists in float shims too
.thumb_func
d2fix_a:
@ here
@ r0:r1 two's complement mantissa
@ r2 unbaised exponent
@ r3 mantissa sign extension bits
add r2,r12 @ exponent plus offset for required binary point position
subs r2,#52 @ required shift
bmi 1f @ shift down?
@ here a shift up by r2 places
cmp r2,#12 @ will clamp?
bge 2f
movs r4,r0
lsls r1,r2
lsls r0,r2
rsbs r2,#0
adds r2,#32 @ complementary shift
lsrs r4,r2
orrs r1,r4
pop {r4,r15}
2:
mvns r0,r3
mvns r1,r3 @ overflow: clamp to extreme fixed-point values
pop {r4,r15}
1:
@ here a shift down by -r2 places
adds r2,#32
bmi 1f @ long shift?
mov r4,r1
lsls r4,r2
rsbs r2,#0
adds r2,#32 @ complementary shift
asrs r1,r2
lsrs r0,r2
orrs r0,r4
pop {r4,r15}
1:
@ here a long shift down
movs r0,r1
asrs r1,#31 @ shift down 32 places
adds r2,#32
bmi 1f @ very long shift?
rsbs r2,#0
adds r2,#32
asrs r0,r2
pop {r4,r15}
1:
movs r0,r3 @ result very near zero: use sign extension bits
movs r1,r3
pop {r4,r15}
d2f64_a:
asrs r2,r1,#31
cmp r2,r3
bne 1f @ sign extension bits fail to match sign of result?
pop {r15}
1:
mvns r0,r3
movs r1,#1
lsls r1,#31
eors r1,r1,r0 @ generate extreme fixed-point values
pop {r15}
float_section float2double_shim
regular_func float2double_shim
lsrs r3,r0,#31 @ sign bit
lsls r3,#31
lsls r1,r0,#1
lsrs r2,r1,#24 @ exponent
beq 1f @ zero?
cmp r2,#0xff @ Inf?
beq 2f
lsrs r1,#4 @ exponent and top 20 bits of mantissa
ldr r2,=#(0x3ff-0x7f)<<20 @ difference in exponent offsets
adds r1,r2
orrs r1,r3
lsls r0,#29 @ bottom 3 bits of mantissa
bx r14
1:
movs r1,r3 @ return signed zero
3:
movs r0,#0
bx r14
2:
ldr r1,=#0x7ff00000 @ return signed infinity
adds r1,r3
b 3b
#endif

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#ifndef _PICO_FLOAT_H
#define _PICO_FLOAT_H
#include <math.h>
#include <float.h>
#include "pico/types.h"
#include "pico/bootrom/sf_table.h"
#ifdef __cplusplus
extern "C" {
#endif
/** \file float.h
* \defgroup pico_float pico_float
*
* Optimized single-precision floating point functions
*
* (Replacement) optimized implementations are provided of the following compiler built-ins
* and math library functions:
*
* - __aeabi_fadd, __aeabi_fdiv, __aeabi_fmul, __aeabi_frsub, __aeabi_fsub, __aeabi_cfcmpeq, __aeabi_cfrcmple, __aeabi_cfcmple, __aeabi_fcmpeq, __aeabi_fcmplt, __aeabi_fcmple, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmpun, __aeabi_i2f, __aeabi_l2f, __aeabi_ui2f, __aeabi_ul2f, __aeabi_f2iz, __aeabi_f2lz, __aeabi_f2uiz, __aeabi_f2ulz, __aeabi_f2d, sqrtf, cosf, sinf, tanf, atan2f, expf, logf
* - ldexpf, copysignf, truncf, floorf, ceilf, roundf, asinf, acosf, atanf, sinhf, coshf, tanhf, asinhf, acoshf, atanhf, exp2f, log2f, exp10f, log10f, powf, hypotf, cbrtf, fmodf, dremf, remainderf, remquof, expm1f, log1pf, fmaf
* - powintf, sincosf (GNU extensions)
*
* The following additional optimized functions are also provided:
*
* - fix2float, ufix2float, fix642float, ufix642float, float2fix, float2ufix, float2fix64, float2ufix64, float2int, float2int64, float2int_z, float2int64_z
*/
float fix2float(int32_t m, int e);
float ufix2float(uint32_t m, int e);
float fix642float(int64_t m, int e);
float ufix642float(uint64_t m, int e);
// These methods round towards -Infinity.
int32_t float2fix(float f, int e);
uint32_t float2ufix(float f, int e);
int64_t float2fix64(float f, int e);
uint64_t float2ufix64(float f, int e);
int32_t float2int(float f);
int64_t float2int64(float f);
// These methods round towards 0.
int32_t float2int_z(float f);
int64_t float2int64_z(float f);
float exp10f(float x);
void sincosf(float x, float *sinx, float *cosx);
float powintf(float x, int y);
#ifdef __cplusplus
}
#endif
#endif