Initial Release

This commit is contained in:
graham sanderson
2021-01-20 10:44:27 -06:00
commit 26653ea81e
404 changed files with 135614 additions and 0 deletions

View File

@ -0,0 +1,127 @@
if (NOT TARGET pico_double)
# library to be depended on - we make this depend on particular implementations using per target generator expressions
add_library(pico_double INTERFACE)
# no custom implementation; falls thru to compiler
add_library(pico_double_compiler INTERFACE)
# PICO_BUILD_DEFINE: PICO_DOUBLE_COMPILER, whether compiler provided double support is being used, type=bool, default=0, but dependent on CMake options, group=pico_double
target_compile_definitions(pico_double_compiler INTERFACE
PICO_DOUBLE_COMPILER=1
)
add_library(pico_double_headers INTERFACE)
target_include_directories(pico_double_headers INTERFACE ${CMAKE_CURRENT_LIST_DIR}/include)
# add alias "default" which is just pico.
add_library(pico_double_default INTERFACE)
target_link_libraries(pico_double_default INTERFACE pico_double_pico)
set(PICO_DEFAULT_DOUBLE_IMPL pico_double_default)
target_link_libraries(pico_double INTERFACE
$<IF:$<BOOL:$<TARGET_PROPERTY:PICO_TARGET_DOUBLE_IMPL>>,$<TARGET_PROPERTY:PICO_TARGET_DOUBLE_IMPL>,${PICO_DEFAULT_DOUBLE_IMPL}>)
add_library(pico_double_pico INTERFACE)
target_sources(pico_double_pico INTERFACE
${CMAKE_CURRENT_LIST_DIR}/double_aeabi.S
${CMAKE_CURRENT_LIST_DIR}/double_init_rom.c
${CMAKE_CURRENT_LIST_DIR}/double_math.c
${CMAKE_CURRENT_LIST_DIR}/double_v1_rom_shim.S
)
# PICO_BUILD_DEFINE: PICO_DOUBLE_PICO, whether optimized pico/bootrom provided double support is being used, type=bool, default=1, but dependent on CMake options, group=pico_double
target_compile_definitions(pico_double_pico INTERFACE
PICO_DOUBLE_PICO=1
)
target_link_libraries(pico_double_pico INTERFACE pico_bootrom pico_double_headers)
add_library(pico_double_none INTERFACE)
target_sources(pico_double_none INTERFACE
${CMAKE_CURRENT_LIST_DIR}/double_none.S
)
target_link_libraries(pico_double_none INTERFACE pico_double_headers)
# PICO_BUILD_DEFINE: PICO_DOUBLE_NONE, whether double support is disabled and functions will panic, type=bool, default=0, but dependent on CMake options, group=pico_double
target_compile_definitions(pico_double_none INTERFACE
PICO_DOUBLE_NONE=1
PICO_PRINTF_SUPPORT_FLOAT=0 # printing floats/doubles won't work, so we can save space by removing it
)
function(wrap_double_functions TARGET)
pico_wrap_function(${TARGET} __aeabi_dadd)
pico_wrap_function(${TARGET} __aeabi_ddiv)
pico_wrap_function(${TARGET} __aeabi_dmul)
pico_wrap_function(${TARGET} __aeabi_drsub)
pico_wrap_function(${TARGET} __aeabi_dsub)
pico_wrap_function(${TARGET} __aeabi_cdcmpeq)
pico_wrap_function(${TARGET} __aeabi_cdrcmple)
pico_wrap_function(${TARGET} __aeabi_cdcmple)
pico_wrap_function(${TARGET} __aeabi_dcmpeq)
pico_wrap_function(${TARGET} __aeabi_dcmplt)
pico_wrap_function(${TARGET} __aeabi_dcmple)
pico_wrap_function(${TARGET} __aeabi_dcmpge)
pico_wrap_function(${TARGET} __aeabi_dcmpgt)
pico_wrap_function(${TARGET} __aeabi_dcmpun)
pico_wrap_function(${TARGET} __aeabi_i2d)
pico_wrap_function(${TARGET} __aeabi_l2d)
pico_wrap_function(${TARGET} __aeabi_ui2d)
pico_wrap_function(${TARGET} __aeabi_ul2d)
pico_wrap_function(${TARGET} __aeabi_d2iz)
pico_wrap_function(${TARGET} __aeabi_d2lz)
pico_wrap_function(${TARGET} __aeabi_d2uiz)
pico_wrap_function(${TARGET} __aeabi_d2ulz)
pico_wrap_function(${TARGET} __aeabi_d2f)
pico_wrap_function(${TARGET} sqrt)
pico_wrap_function(${TARGET} cos)
pico_wrap_function(${TARGET} sin)
pico_wrap_function(${TARGET} tan)
pico_wrap_function(${TARGET} atan2)
pico_wrap_function(${TARGET} exp)
pico_wrap_function(${TARGET} log)
pico_wrap_function(${TARGET} ldexp)
pico_wrap_function(${TARGET} copysign)
pico_wrap_function(${TARGET} trunc)
pico_wrap_function(${TARGET} floor)
pico_wrap_function(${TARGET} ceil)
pico_wrap_function(${TARGET} round)
pico_wrap_function(${TARGET} sincos) # gnu
pico_wrap_function(${TARGET} asin)
pico_wrap_function(${TARGET} acos)
pico_wrap_function(${TARGET} atan)
pico_wrap_function(${TARGET} sinh)
pico_wrap_function(${TARGET} cosh)
pico_wrap_function(${TARGET} tanh)
pico_wrap_function(${TARGET} asinh)
pico_wrap_function(${TARGET} acosh)
pico_wrap_function(${TARGET} atanh)
pico_wrap_function(${TARGET} exp2)
pico_wrap_function(${TARGET} log2)
pico_wrap_function(${TARGET} exp10)
pico_wrap_function(${TARGET} log10)
pico_wrap_function(${TARGET} pow)
pico_wrap_function(${TARGET} powint) #gnu
pico_wrap_function(${TARGET} hypot)
pico_wrap_function(${TARGET} cbrt)
pico_wrap_function(${TARGET} fmod)
pico_wrap_function(${TARGET} drem)
pico_wrap_function(${TARGET} remainder)
pico_wrap_function(${TARGET} remquo)
pico_wrap_function(${TARGET} expm1)
pico_wrap_function(${TARGET} log1p)
pico_wrap_function(${TARGET} fma)
endfunction()
wrap_double_functions(pico_double_pico)
wrap_double_functions(pico_double_none)
macro(pico_set_double_implementation TARGET IMPL)
get_target_property(target_type ${TARGET} TYPE)
if ("EXECUTABLE" STREQUAL "${target_type}")
set_target_properties(${TARGET} PROPERTIES PICO_TARGET_DOUBLE_IMPL "pico_double_${IMPL}")
else()
message(FATAL_ERROR "double implementation must be set on executable not library")
endif()
endmacro()
endif()

View File

@ -0,0 +1,801 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h"
__pre_init __aeabi_double_init, 00020
.syntax unified
.cpu cortex-m0plus
.thumb
.macro double_section name
#if PICO_DOUBLE_IN_RAM
.section RAM_SECTION_NAME(\name), "ax"
#else
.section SECTION_NAME(\name), "ax"
#endif
.endm
.macro _double_wrapper_func x
wrapper_func \x
.endm
.macro wrapper_func_d1 x
_double_wrapper_func \x
#if PICO_DOUBLE_PROPAGATE_NANS
mov ip, lr
bl __check_nan_d1
mov lr, ip
#endif
.endm
.macro wrapper_func_d2 x
_double_wrapper_func \x
#if PICO_DOUBLE_PROPAGATE_NANS
mov ip, lr
bl __check_nan_d2
mov lr, ip
#endif
.endm
.section .text
#if PICO_DOUBLE_PROPAGATE_NANS
.thumb_func
__check_nan_d1:
movs r3, #1
lsls r3, #21
lsls r2, r1, #1
adds r2, r3
bhi 1f
bx lr
1:
bx ip
.thumb_func
__check_nan_d2:
push {r0, r2}
movs r2, #1
lsls r2, #21
lsls r0, r1, #1
adds r0, r2
bhi 1f
lsls r0, r3, #1
adds r0, r2
bhi 2f
pop {r0, r2}
bx lr
2:
pop {r0, r2}
mov r0, r2
mov r1, r3
bx ip
1:
pop {r0, r2}
bx ip
#endif
.macro table_tail_call SF_TABLE_OFFSET
push {r3, r4}
#if PICO_DOUBLE_SUPPORT_ROM_V1
#ifndef NDEBUG
movs r3, #0
mov ip, r3
#endif
#endif
ldr r3, =sd_table
ldr r3, [r3, #\SF_TABLE_OFFSET]
str r3, [sp, #4]
pop {r3, pc}
.endm
.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
push {r3, r4}
ldr r3, =sd_table
ldr r3, [r3, #\SF_TABLE_OFFSET]
#if PICO_DOUBLE_SUPPORT_ROM_V1
mov ip, pc
#endif
str r3, [sp, #4]
pop {r3, pc}
#if PICO_DOUBLE_SUPPORT_ROM_V1
.byte \SF_TABLE_OFFSET, 0xdf
.word \shim
#endif
.endm
.macro double_wrapper_section func
double_section WRAPPER_FUNC_NAME(\func)
.endm
double_section push_r8_r11
regular_func push_r8_r11
mov r4,r8
mov r5,r9
mov r6,r10
mov r7,r11
push {r4-r7}
bx r14
double_section pop_r8_r11
regular_func pop_r8_r11
pop {r4-r7}
mov r8,r4
mov r9,r5
mov r10,r6
mov r11,r7
bx r14
# note generally each function is in a separate section unless there is fall thru or branching between them
# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
# note functions are word aligned except where they are an odd number of linear instructions
// double FUNC_NAME(__aeabi_dadd)(double, double) double-precision addition
double_wrapper_section __aeabi_darithmetic
// double FUNC_NAME(__aeabi_drsub)(double x, double y) double-precision reverse subtraction, y - x
# frsub first because it is the only one that needs alignment
.align 2
wrapper_func __aeabi_drsub
eors r0, r1
eors r1, r0
eors r0, r1
// fall thru
// double FUNC_NAME(__aeabi_dsub)(double x, double y) double-precision subtraction, x - y
wrapper_func_d2 __aeabi_dsub
#if PICO_DOUBLE_PROPAGATE_NANS
// we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
mov ip, r0
mov r0, r1
eors r0, r3
bmi 1f // different signs
mov r0, ip
push {r0-r3, lr}
bl 2f
b ddiv_dsub_nan_helper
1:
mov r0, ip
2:
#endif
shimmable_table_tail_call SF_TABLE_FSUB dsub_shim
wrapper_func_d2 __aeabi_dadd
shimmable_table_tail_call SF_TABLE_FADD dadd_shim
// double FUNC_NAME(__aeabi_ddiv)(double n, double d) double-precision division, n / d
wrapper_func_d2 __aeabi_ddiv
#if PICO_DOUBLE_PROPAGATE_NANS
push {r0-r3, lr}
bl 1f
b ddiv_dsub_nan_helper
1:
#endif
shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim
ddiv_dsub_nan_helper:
#if PICO_DOUBLE_PROPAGATE_NANS
// check for infinite op infinite (or rather check for infinite result with both
// operands being infinite)
lsls r2, r1, #1
asrs r2, r2, #21
adds r2, #1
beq 2f
add sp, #16
pop {pc}
2:
ldr r2, [sp, #4]
ldr r3, [sp, #12]
lsls r2, #1
asrs r2, r2, #21
lsls r3, #1
asrs r3, r3, #24
ands r2, r3
adds r2, #1
bne 3f
// infinite to nan
movs r2, #1
lsls r2, #19
orrs r1, r2
3:
add sp, #16
pop {pc}
#endif
// double FUNC_NAME(__aeabi_dmul)(double, double) double-precision multiplication
wrapper_func_d2 __aeabi_dmul
#if PICO_DOUBLE_PROPAGATE_NANS
push {r0-r3, lr}
bl 1f
// check for multiplication of infinite by zero (or rather check for infinite result with either
// operand 0)
lsls r3, r1, #1
asrs r3, r3, #21
adds r3, #1
beq 2f
add sp, #16
pop {pc}
2:
ldr r2, [sp, #4]
ldr r3, [sp, #12]
ands r2, r3
bne 3f
// infinite to nan
movs r2, #1
lsls r2, #19
orrs r1, r2
3:
add sp, #16
pop {pc}
1:
#endif
shimmable_table_tail_call SF_TABLE_FMUL dmul_shim
// void FUNC_NAME(__aeabi_cdrcmple)(double, double) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
double_wrapper_section __aeabi_cdcmple
wrapper_func __aeabi_cdrcmple
push {r0-r7,r14}
eors r0, r2
eors r2, r0
eors r0, r2
eors r1, r3
eors r3, r1
eors r1, r3
b __aeabi_dfcmple_guts
// NOTE these share an implementation as we have no excepting NaNs.
// void FUNC_NAME(__aeabi_cdcmple)(double, double) 3-way (<, =, ?>) compare [1], result in PSR ZC flags
// void FUNC_NAME(__aeabi_cdcmpeq)(double, double) non-excepting equality comparison [1], result in PSR ZC flags
@ compare r0:r1 against r2:r3, returning -1/0/1 for <, =, >
@ also set flags accordingly
.align 2
wrapper_func __aeabi_cdcmple
wrapper_func __aeabi_cdcmpeq
push {r0-r7,r14}
__aeabi_dfcmple_guts:
ldr r7,=#0x7ff @ flush NaNs and denormals
lsls r4,r1,#1
lsrs r4,#21
beq 1f
cmp r4,r7
bne 2f
lsls r4, r1, #12
bhi 7f
1:
movs r0,#0
lsrs r1,#20
lsls r1,#20
2:
lsls r4,r3,#1
lsrs r4,#21
beq 1f
cmp r4,r7
bne 2f
lsls r4, r3, #12
bhi 7f
1:
movs r2,#0
lsrs r3,#20
lsls r3,#20
2:
movs r6,#1
eors r3,r1
bmi 4f @ opposite signs? then can proceed on basis of sign of x
eors r3,r1 @ restore r3
bpl 2f
cmp r3,r1
bne 7f
1:
cmp r2,r0
7:
pop {r0-r7,r15}
2:
cmp r1,r3
bne 7b
1:
cmp r0,r2
pop {r0-r7,r15}
4:
orrs r3,r1 @ make -0==+0
adds r3,r3
orrs r3,r0
orrs r3,r2
beq 7b
mvns r1, r1 @ carry inverse of r1 sign
adds r1, r1
pop {r0-r7,r15}
// int FUNC_NAME(__aeabi_dcmpeq)(double, double) result (1, 0) denotes (=, ?<>) [2], use for C == and !=
double_wrapper_section __aeabi_dcmpeq
.align 2
wrapper_func __aeabi_dcmpeq
push {lr}
bl __aeabi_cdcmpeq
beq 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_dcmplt)(double, double) result (1, 0) denotes (<, ?>=) [2], use for C <
double_wrapper_section __aeabi_dcmplt
.align 2
wrapper_func __aeabi_dcmplt
push {lr}
bl __aeabi_cdcmple
sbcs r0, r0
pop {pc}
// int FUNC_NAME(__aeabi_dcmple)(double, double) result (1, 0) denotes (<=, ?>) [2], use for C <=
double_wrapper_section __aeabi_dcmple
.align 2
wrapper_func __aeabi_dcmple
push {lr}
bl __aeabi_cdcmple
bls 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_dcmpge)(double, double) result (1, 0) denotes (>=, ?<) [2], use for C >=
double_wrapper_section __aeabi_dcmpge
.align 2
wrapper_func __aeabi_dcmpge
push {lr}
// because of NaNs it is better to reverse the args than the result
bl __aeabi_cdrcmple
bls 1f
movs r0, #0
pop {pc}
1:
movs r0, #1
pop {pc}
// int FUNC_NAME(__aeabi_dcmpgt)(double, double) result (1, 0) denotes (>, ?<=) [2], use for C >
double_wrapper_section __aeabi_dcmpgt
wrapper_func __aeabi_dcmpgt
push {lr}
// because of NaNs it is better to reverse the args than the result
bl __aeabi_cdrcmple
sbcs r0, r0
pop {pc}
// int FUNC_NAME(__aeabi_dcmpun)(double, double) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
double_wrapper_section __aeabi_dcmpun
wrapper_func __aeabi_dcmpun
movs r0, #1
lsls r0, #21
lsls r2, r1, #1
adds r2, r0
bhi 1f
lsls r2, r3, #1
adds r2, r0
bhi 1f
movs r0, #0
bx lr
1:
movs r0, #1
bx lr
movs r0, #0
bx lr
// double FUNC_NAME(__aeabi_ui2d)(unsigned) unsigned to double (double precision) conversion
double_wrapper_section __aeabi_ui2d
shimmable_table_tail_call SF_TABLE_UINT2FLOAT uint2double_shim
double_wrapper_section __aeabi_i2d
wrapper_func __aeabi_ui2d
movs r1, #0
cmp r0, #0
bne 2f
1:
bx lr
// double FUNC_NAME(__aeabi_i2d)(int) integer to double (double precision) conversion
wrapper_func __aeabi_i2d
asrs r1, r0, #31
eors r0, r1
subs r0, r1
beq 1b
lsls r1, #31
2:
push {r0, r1, r4, lr}
ldr r3, =sf_clz_func
ldr r3, [r3]
blx r3
pop {r2, r3}
adds r4, r0, #1
lsls r2, r4
lsls r0, r2, #20
lsrs r2, #12
ldr r1,=#1055
subs r1, r4
lsls r1, #20
orrs r1, r3
orrs r1, r2
pop {r4, pc}
// int FUNC_NAME(__aeabi_d2iz)(double) double (double precision) to integer C-style conversion [3]
double_wrapper_section __aeabi_d2iz
wrapper_func __aeabi_d2iz
regular_func double2int_z
push {r4, lr}
lsls r4, r1, #1
lsrs r2, r4, #21
movs r3, #0x80
adds r2, r3
lsls r3, #3
subs r2, r3
lsls r3, #21
cmp r2, #126
ble 1f
subs r2, #158
bge 2f
asrs r4, r1, #31
lsls r1, #12
lsrs r1, #1
orrs r1, r3
negs r2, r2
lsrs r1, r2
lsls r4, #1
adds r4, #1
adds r2, #21
cmp r2, #32
bge 3f
lsrs r0, r2
orrs r0, r1
muls r0, r4
pop {r4, pc}
1:
movs r0, #0
pop {r4, pc}
3:
mov r0, r1
muls r0, r4
pop {r4, pc}
2:
// overflow
lsrs r0, r1, #31
adds r0, r3
subs r0, #1
pop {r4, pc}
double_section double2int
regular_func double2int
shimmable_table_tail_call SF_TABLE_FLOAT2INT double2int_shim
// unsigned FUNC_NAME(__aeabi_d2uiz)(double) double (double precision) to unsigned C-style conversion [3]
double_wrapper_section __aeabi_d2uiz
wrapper_func __aeabi_d2uiz
regular_func double2uint
shimmable_table_tail_call SF_TABLE_FLOAT2UINT double2uint_shim
double_section fix2double
regular_func fix2double
shimmable_table_tail_call SF_TABLE_FIX2FLOAT fix2double_shim
double_section ufix2double
regular_func ufix2double
shimmable_table_tail_call SF_TABLE_UFIX2FLOAT ufix2double_shim
double_section fix642double
regular_func fix642double
shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642double_shim
double_section ufix2double
regular_func ufix642double
shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642double_shim
// double FUNC_NAME(__aeabi_l2d)(long long) long long to double (double precision) conversion
double_wrapper_section __aeabi_l2d
wrapper_func __aeabi_l2d
shimmable_table_tail_call SF_TABLE_INT642FLOAT int642double_shim
// double FUNC_NAME(__aeabi_l2f)(long long) long long to double (double precision) conversion
double_wrapper_section __aeabi_ul2d
wrapper_func __aeabi_ul2d
shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642double_shim
// long long FUNC_NAME(__aeabi_d2lz)(double) double (double precision) to long long C-style conversion [3]
double_wrapper_section __aeabi_d2lz
wrapper_func __aeabi_d2lz
regular_func double2int64_z
cmn r1, r1
bcc double2int64
push {lr}
lsls r1, #1
lsrs r1, #1
movs r2, #0
bl double2ufix64
cmp r1, #0
bmi 1f
movs r2, #0
rsbs r0, #0
sbcs r2, r1
mov r1, r2
pop {pc}
1:
movs r1, #128
lsls r1, #24
movs r0, #0
pop {pc}
double_section double2int64
regular_func double2int64
shimmable_table_tail_call SF_TABLE_FLOAT2INT64 double2int64_shim
// unsigned long long FUNC_NAME(__aeabi_d2ulz)(double) double to unsigned long long C-style conversion [3]
double_wrapper_section __aeabi_d2ulz
wrapper_func __aeabi_d2ulz
shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 double2uint64_shim
double_section double2fix64
regular_func double2fix64
shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 double2fix64_shim
double_section double2ufix64
regular_func double2ufix64
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 double2ufix64_shim
double_section double2fix
regular_func double2fix
shimmable_table_tail_call SF_TABLE_FLOAT2FIX double2fix_shim
double_section double2ufix
regular_func double2ufix
shimmable_table_tail_call SF_TABLE_FLOAT2UFIX double2ufix_shim
double_wrapper_section __aeabi_d2f
1:
#if PICO_DOUBLE_PROPAGATE_NANS
// copy sign bit and 23 NAN id bits into sign bit and significant id bits, also set high id bit
lsrs r0, #30
lsls r2, r1, #12
lsrs r2, #9
asrs r1, #22
lsls r1, #22
orrs r0, r1
orrs r0, r2
bx lr
#endif
wrapper_func __aeabi_d2f
#if PICO_DOUBLE_PROPAGATE_NANS
movs r3, #1
lsls r3, #21
lsls r2, r1, #1
adds r2, r3
bhi 1b
#endif
// note double->float in double table at same index as float->double in double table
shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE double2float_shim
double_wrapper_section srqt
wrapper_func_d1 sqrt
shimmable_table_tail_call SF_TABLE_FSQRT dsqrt_shim
double_wrapper_section sincostan_remainder
regular_func sincostan_remainder
ldr r2, =0x54442D18 // 2 * M_PI
ldr r3, =0x401921FB
push {lr}
bl remainder
pop {pc}
double_wrapper_section cos
#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
wrapper_func cos
// rom version only works for -1024 < angle < 1024
lsls r2, r1, #2
bcc 1f
lsrs r2, #22
cmp r2, #9
bge 2f
1:
shimmable_table_tail_call SF_TABLE_FCOS dcos_shim
2:
#if PICO_DOUBLE_PROPAGATE_NANS
lsls r2, r1, #1
asrs r2, #21
adds r2, #1
bne 3f
// infinite to nan
movs r2, #1
lsls r2, #19
orrs r1, r2
bx lr
3:
#endif
push {lr}
bl sincostan_remainder
pop {r2}
mov lr, r2
b 1b
double_wrapper_section sin
#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
wrapper_func sin
// rom version only works for -1024 < angle < 1024
lsls r2, r1, #2
bcc 1f
lsrs r2, #22
cmp r2, #9
bge 2f
1:
shimmable_table_tail_call SF_TABLE_FSIN dsin_shim
2:
#if PICO_DOUBLE_PROPAGATE_NANS
lsls r2, r1, #1
asrs r2, #21
adds r2, #1
bne 3f
// infinite to nan
movs r2, #1
lsls r2, #19
orrs r1, r2
bx lr
3:
#endif
push {lr}
bl sincostan_remainder
pop {r2}
mov lr, r2
b 1b
double_wrapper_section sincos
// out of line remainder code for abs(angle)>=1024
2:
#if PICO_DOUBLE_PROPAGATE_NANS
lsls r2, r1, #1
asrs r2, #21
adds r2, #1
bne 3f
// infinite to nan
movs r2, #1
lsls r2, #19
orrs r1, r2
pop {r4-r5}
stmia r4!, {r0, r1}
stmia r5!, {r0, r1}
pop {r4, r5, pc}
3:
#endif
push {lr}
bl sincostan_remainder
pop {r2}
mov lr, r2
b 1f
wrapper_func sincos
push {r2-r5, lr}
// rom version only works for -1024 < angle < 1024
lsls r2, r1, #2
bcc 1f
lsrs r2, #22
cmp r2, #9
bge 2b
1:
bl 2f
pop {r4-r5}
stmia r4!, {r0, r1}
stmia r5!, {r2, r3}
pop {r4, r5, pc}
2:
shimmable_table_tail_call SF_TABLE_V3_FSINCOS sincos_shim_bootstrap
#if PICO_DOUBLE_PROPAGATE_NANS
.align 2
1:
pop {r2, r3}
stmia r2!, {r0, r1}
mov lr, r3
pop {r3}
stmia r3!, {r0, r1}
bx lr
#endif
.thumb_func
sincos_shim_bootstrap:
push {r2, r3, r4}
movs r3, #0x13
ldrb r3, [r3]
#if PICO_DOUBLE_SUPPORT_ROM_V1
cmp r3, #1
bne 1f
ldr r3, =dsincos_shim
b 2f
#endif
1:
ldr r3, =dsincos_shim_v2
2:
ldr r2, =sd_table
str r3, [r2, #SF_TABLE_V3_FSINCOS]
str r3, [sp, #8]
pop {r2, r3, pc}
.thumb_func
dsincos_shim_v2:
push {r4-r7,r14}
bl push_r8_r11
bl v2_rom_dsincos_internal
mov r12,r0 @ save ε
bl v2_rom_dcos_finish
push {r0,r1}
mov r0,r12
bl v2_rom_dsin_finish
pop {r2,r3}
bl pop_r8_r11
pop {r4-r7,r15}
.thumb_func
v2_rom_dsincos_internal:
push {r0, lr}
ldr r0, =0x3855
str r0, [sp, #4]
pop {r0, pc}
.thumb_func
v2_rom_dcos_finish:
push {r0, r1}
ldr r0, =0x389d
str r0, [sp, #4]
pop {r0, pc}
.thumb_func
v2_rom_dsin_finish:
push {r0, r1}
ldr r0, =0x38d9
str r0, [sp, #4]
pop {r0, pc}
double_wrapper_section tan
#don't use _d1 as we're doing a range check anyway and infinites/nans are bigger than 1024
wrapper_func tan
// rom version only works for -1024 < angle < 1024
lsls r2, r1, #2
bcc 1f
lsrs r2, #22
cmp r2, #9
bge 2f
1:
shimmable_table_tail_call SF_TABLE_FTAN dtan_shim
2:
#if PICO_DOUBLE_PROPAGATE_NANS
lsls r2, r1, #1
asrs r2, #21
adds r2, #1
bne 3f
// infinite to nan
movs r2, #1
lsls r2, #19
orrs r1, r2
bx lr
3:
#endif
push {lr}
bl sincostan_remainder
pop {r2}
mov lr, r2
b 1b
double_wrapper_section atan2
wrapper_func_d2 atan2
shimmable_table_tail_call SF_TABLE_FATAN2 datan2_shim
double_wrapper_section exp
wrapper_func_d1 exp
shimmable_table_tail_call SF_TABLE_FEXP dexp_shim
double_wrapper_section log
wrapper_func_d1 log
shimmable_table_tail_call SF_TABLE_FLN dln_shim

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <string.h>
#include "pico/bootrom.h"
#include "pico/bootrom/sf_table.h"
// NOTE THIS FUNCTION TABLE IS NOT PUBLIC OR NECESSARILY COMPLETE...
// IT IS ***NOT*** SAFE TO CALL THESE FUNCTION POINTERS FROM ARBITRARY CODE
uint32_t sd_table[SF_TABLE_V2_SIZE / 2];
#if !PICO_DOUBLE_SUPPORT_ROM_V1
static __attribute__((noreturn)) void missing_double_func_shim() {
panic("missing double function");
}
#endif
extern void double_table_shim_on_use_helper();
void __aeabi_double_init() {
int rom_version = rp2040_rom_version();
#if PICO_DOUBLE_SUPPORT_ROM_V1
if (rom_version == 1) {
// this is a little tricky.. we only want to pull in a shim if the corresponding function
// is called. to that end we include a SVC instruction with the table offset as the call number
// followed by the shim function pointer inside the actual wrapper function. that way if the wrapper
// function is garbage collected, so is the shim function.
//
// double_table_shim_on_use_helper expects this SVC instruction in the calling code soon after the address
// pointed to by IP and patches the double_table entry with the real shim the first time the function is called.
for(uint i=0; i<SF_TABLE_V2_SIZE/4; i++) {
sd_table[i] = (uintptr_t)double_table_shim_on_use_helper;
}
}
#else
if (rom_version == 1) {
// opting for soft failure for now - you'll get a panic at runtime if you call any of the missing methods
for(uint i=0;i<SF_TABLE_V2_SIZE/4;i++) {
sd_table[i] = (uintptr_t)missing_double_func_shim;
}
}
#endif
if (rom_version >= 2) {
void *rom_table = rom_data_lookup(rom_table_code('S', 'D'));
assert(*((uint8_t *)(((void *)rom_data_lookup(rom_table_code('S', 'F')))-2)) * 4 >= SF_TABLE_V2_SIZE);
memcpy(&sd_table, rom_table, SF_TABLE_V2_SIZE);
if (rom_version == 2) {
#ifndef NDEBUG
if (*(uint16_t *)0x3854 != 0xb500 || // this is dsincos(_internal)
*(uint16_t *)0x38d8 != 0x4649 || // this is dsin_finish
*(uint16_t *)0x389c != 0x4659 // this is dcos_finish
) {
panic(NULL);
}
#endif
}
}
if (rom_version < 3) {
// we use the unused entry for SINCOS
sd_table[SF_TABLE_V3_FSINCOS / 4] = (uintptr_t) double_table_shim_on_use_helper;
}
}

View File

@ -0,0 +1,607 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <math.h>
#include "pico/types.h"
#include "pico/double.h"
#include "pico/platform.h"
typedef uint64_t ui64;
typedef uint32_t ui32;
typedef int64_t i64;
#define PINF ( HUGE_VAL)
#define MINF (-HUGE_VAL)
#define PZERO (+0.0)
#define MZERO (-0.0)
#define PI 3.14159265358979323846
#define LOG2 0.69314718055994530941
// Unfortunately in double precision ln(10) is very close to half-way between to representable numbers
#define LOG10 2.30258509299404568401
#define LOG2E 1.44269504088896340737
#define LOG10E 0.43429448190325182765
#define ONETHIRD 0.33333333333333333333
#define PIf 3.14159265358979323846f
#define LOG2f 0.69314718055994530941f
#define LOG2Ef 1.44269504088896340737f
#define LOG10Ef 0.43429448190325182765f
#define ONETHIRDf 0.33333333333333333333f
#define DUNPACK(x,e,m) e=((x)>>52)&0x7ff,m=((x)&0x000fffffffffffffULL)|0x0010000000000000ULL
#define DUNPACKS(x,s,e,m) s=((x)>>63),DUNPACK((x),(e),(m))
_Pragma("GCC diagnostic push")
_Pragma("GCC diagnostic ignored \"-Wstrict-aliasing\"")
static inline bool disnan(double x) {
ui64 ix=*(i64*)&x;
// checks the top bit of the low 32 bit of the NAN, but it I think that is ok
return ((uint32_t)(ix >> 31)) > 0xffe00000u;
}
#if PICO_DOUBLE_PROPAGATE_NANS
#define check_nan_d1(x) if (disnan((x))) return (x)
#define check_nan_d2(x,y) if (disnan((x))) return (x); else if (disnan((y))) return (y);
#else
#define check_nan_d1(x) ((void)0)
#define check_nan_d2(x,y) ((void)0)
#endif
static inline int dgetsignexp(double x) {
ui64 ix=*(ui64*)&x;
return (ix>>52)&0xfff;
}
static inline int dgetexp(double x) {
ui64 ix=*(ui64*)&x;
return (ix>>52)&0x7ff;
}
static inline double dldexp(double x,int de) {
ui64 ix=*(ui64*)&x,iy;
int e;
e=dgetexp(x);
if(e==0||e==0x7ff) return x;
e+=de;
if(e<=0) iy=ix&0x8000000000000000ULL; // signed zero for underflow
else if(e>=0x7ff) iy=(ix&0x8000000000000000ULL)|0x7ff0000000000000ULL; // signed infinity on overflow
else iy=ix+((ui64)de<<52);
return *(double*)&iy;
}
double WRAPPER_FUNC(ldexp)(double x, int de) {
check_nan_d1(x);
return dldexp(x, de);
}
static inline double dcopysign(double x,double y) {
ui64 ix=*(ui64*)&x,iy=*(ui64*)&y;
ix=((ix&0x7fffffffffffffffULL)|(iy&0x8000000000000000ULL));
return *(double*)&ix;
}
double WRAPPER_FUNC(copysign)(double x, double y) {
check_nan_d2(x,y);
return dcopysign(x, y);
}
static inline int diszero(double x) { return dgetexp (x)==0; }
static inline int dispzero(double x) { return dgetsignexp(x)==0; }
static inline int dismzero(double x) { return dgetsignexp(x)==0x800; }
static inline int disinf(double x) { return dgetexp (x)==0x7ff; }
static inline int dispinf(double x) { return dgetsignexp(x)==0x7ff; }
static inline int disminf(double x) { return dgetsignexp(x)==0xfff; }
static inline int disint(double x) {
ui64 ix=*(ui64*)&x,m;
int e=dgetexp(x);
if(e==0) return 1; // 0 is an integer
e-=0x3ff; // remove exponent bias
if(e<0) return 0; // |x|<1
e=52-e; // bit position in mantissa with significance 1
if(e<=0) return 1; // |x| large, so must be an integer
m=(1ULL<<e)-1; // mask for bits of significance <1
if(ix&m) return 0; // not an integer
return 1;
}
static inline int disoddint(double x) {
ui64 ix=*(ui64*)&x,m;
int e=dgetexp(x);
e-=0x3ff; // remove exponent bias
if(e<0) return 0; // |x|<1; 0 is not odd
e=52-e; // bit position in mantissa with significance 1
if(e<0) return 0; // |x| large, so must be even
m=(1ULL<<e)-1; // mask for bits of significance <1 (if any)
if(ix&m) return 0; // not an integer
if(e==52) return 1; // value is exactly 1
return (ix>>e)&1;
}
static inline int disstrictneg(double x) {
ui64 ix=*(ui64*)&x;
if(diszero(x)) return 0;
return ix>>63;
}
static inline int disneg(double x) {
ui64 ix=*(ui64*)&x;
return ix>>63;
}
static inline double dneg(double x) {
ui64 ix=*(ui64*)&x;
ix^=0x8000000000000000ULL;
return *(double*)&ix;
}
static inline int dispo2(double x) {
ui64 ix=*(ui64*)&x;
if(diszero(x)) return 0;
if(disinf(x)) return 0;
ix&=0x000fffffffffffffULL;
return ix==0;
}
static inline double dnan_or(double x) {
#if PICO_DOUBLE_PROPAGATE_NANS
return NAN;
#else
return x;
#endif
}
double WRAPPER_FUNC(trunc)(double x) {
check_nan_d1(x);
ui64 ix=*(ui64*)&x,m;
int e=dgetexp(x);
e-=0x3ff; // remove exponent bias
if(e<0) { // |x|<1
ix&=0x8000000000000000ULL;
return *(double*)&ix;
}
e=52-e; // bit position in mantissa with significance 1
if(e<=0) return x; // |x| large, so must be an integer
m=(1ULL<<e)-1; // mask for bits of significance <1
ix&=~m;
return *(double*)&ix;
}
double WRAPPER_FUNC(round)(double x) {
check_nan_d1(x);
ui64 ix=*(ui64*)&x,m;
int e=dgetexp(x);
e-=0x3ff; // remove exponent bias
if(e<-1) { // |x|<0.5
ix&=0x8000000000000000ULL;
return *(double*)&ix;
}
if(e==-1) { // 0.5<=|x|<1
ix&=0x8000000000000000ULL;
ix|=0x3ff0000000000000ULL; // ±1
return *(double*)&ix;
}
e=52-e; // bit position in mantissa with significance 1, <=52
if(e<=0) return x; // |x| large, so must be an integer
m=1ULL<<(e-1); // mask for bit of significance 0.5
ix+=m;
m=m+m-1; // mask for bits of significance <1
ix&=~m;
return *(double*)&ix;
}
double WRAPPER_FUNC(floor)(double x) {
check_nan_d1(x);
ui64 ix=*(ui64*)&x,m;
int e=dgetexp(x);
if(e==0) { // x==0
ix&=0x8000000000000000ULL;
return *(double*)&ix;
}
e-=0x3ff; // remove exponent bias
if(e<0) { // |x|<1, not zero
if(disneg(x)) return -1;
return PZERO;
}
e=52-e; // bit position in mantissa with significance 1
if(e<=0) return x; // |x| large, so must be an integer
m=(1ULL<<e)-1; // mask for bit of significance <1
if(disneg(x)) ix+=m; // add 1-ε to magnitude if negative
ix&=~m; // truncate
return *(double*)&ix;
}
double WRAPPER_FUNC(ceil)(double x) {
check_nan_d1(x);
ui64 ix=*(ui64*)&x,m;
int e=dgetexp(x);
if(e==0) { // x==0
ix&=0x8000000000000000ULL;
return *(double*)&ix;
}
e-=0x3ff; // remove exponent bias
if(e<0) { // |x|<1, not zero
if(disneg(x)) return MZERO;
return 1;
}
e=52-e; // bit position in mantissa with significance 1
if(e<=0) return x; // |x| large, so must be an integer
m=(1ULL<<e)-1; // mask for bit of significance <1
if(!disneg(x)) ix+=m; // add 1-ε to magnitude if positive
ix&=~m; // truncate
return *(double*)&ix;
}
double WRAPPER_FUNC(asin)(double x) {
check_nan_d1(x);
double u;
u=(1-x)*(1+x);
if(disstrictneg(u)) return dnan_or(PINF);
return atan2(x,sqrt(u));
}
double WRAPPER_FUNC(acos)(double x) {
check_nan_d1(x);
double u;
u=(1-x)*(1+x);
if(disstrictneg(u)) return dnan_or(PINF);
return atan2(sqrt(u),x);
}
double WRAPPER_FUNC(atan)(double x) {
check_nan_d1(x);
if(dispinf(x)) return PI/2;
if(disminf(x)) return -PI/2;
return atan2(x,1);
}
double WRAPPER_FUNC(sinh)(double x) {
check_nan_d1(x);
return dldexp((exp(x)-exp(dneg(x))),-1);
}
double WRAPPER_FUNC(cosh)(double x) {
check_nan_d1(x);
return dldexp((exp(x)+exp(dneg(x))),-1);
}
double WRAPPER_FUNC(tanh)(double x) {
check_nan_d1(x);
double u;
int e;
e=dgetexp(x);
if(e>=5+0x3ff) { // |x|>=32?
if(!disneg(x)) return 1; // 1 << exp 2x; avoid generating infinities later
else return -1; // 1 >> exp 2x
}
u=exp(dldexp(x,1));
return (u-1)/(u+1);
}
double WRAPPER_FUNC(asinh)(double x) {
check_nan_d1(x);
int e;
e=dgetexp(x);
if(e>=32+0x3ff) { // |x|>=2^32?
if(!disneg(x)) return log( x )+LOG2; // 1/x^2 << 1
else return dneg(log(dneg(x))+LOG2); // 1/x^2 << 1
}
if(x>0) return log(sqrt(x*x+1)+x);
else return dneg(log(sqrt(x*x+1)-x));
}
double WRAPPER_FUNC(acosh)(double x) {
check_nan_d1(x);
int e;
if(disneg(x)) x=dneg(x);
e=dgetexp(x);
if(e>=32+0x3ff) return log(x)+LOG2; // |x|>=2^32?
return log(sqrt((x-1)*(x+1))+x);
}
double WRAPPER_FUNC(atanh)(double x) {
check_nan_d1(x);
return dldexp(log((1+x)/(1-x)),-1);
}
double WRAPPER_FUNC(exp2)(double x) {
check_nan_d1(x);
int e;
// extra check for disminf as this catches -Nan, and x<=-4096 doesn't.
if (disminf(x) || x<=-4096) return 0; // easily underflows
else if (x>=4096) return PINF; // easily overflows
e=(int)round(x);
x-=e;
return dldexp(exp(x*LOG2),e);
}
double WRAPPER_FUNC(log2)(double x) { check_nan_d1(x); return log(x)*LOG2E; }
double WRAPPER_FUNC(exp10)(double x) { check_nan_d1(x); return pow(10,x); }
double WRAPPER_FUNC(log10)(double x) { check_nan_d1(x); return log(x)*LOG10E; }
// todo these are marked as lofi
double WRAPPER_FUNC(expm1(double x) { check_nan_d1(x); return exp)(x)-1; }
double WRAPPER_FUNC(log1p(double x) { check_nan_d1(x); return log)(1+x); }
double WRAPPER_FUNC(fma)(double x,double y,double z) { check_nan_d1(x); return x*y+z; }
// general power, x>0, finite
static double dpow_1(double x,double y) {
int a,b,c;
double t,rt,u,v,v0,v1,w,ry;
a=dgetexp(x)-0x3ff;
u=log2(dldexp(x,-a)); // now log_2 x = a+u
if(u>0.5) u-=1,a++; // |u|<=~0.5
if(a==0) return exp2(u*y);
// here |log_2 x| >~0.5
if(y>= 4096) { // then easily over/underflows
if(a<0) return 0;
return PINF;
}
if(y<=-4096) { // then easily over/underflows
if(a<0) return PINF;
return 0;
}
ry=round(y);
v=y-ry;
v0=dldexp(round(ldexp(v,26)),-26);
v1=v-v0;
b=(int)ry; // guaranteed to fit in an int; y=b+v0+v1
// now the result is exp2( (a+u) * (b+v0+v1) )
c=a*b; // integer
t=a*v0;
rt=round(t);
c+=(int)rt;
w=t-rt;
t=a*v1;
w+=t;
t=u*b;
rt=round(t);
c+=(int)rt;
w+=t-rt;
w+=u*v;
return dldexp(exp2(w),c);
}
static double dpow_int2(double x,int y) {
double u;
if(y==1) return x;
u=dpow_int2(x,y/2);
u*=u;
if(y&1) u*=x;
return u;
}
// for the case where x not zero or infinity, y small and not zero
static inline double dpowint_1(double x,int y) {
if(y<0) x=1/x,y=-y;
return dpow_int2(x,y);
}
// for the case where x not zero or infinity
static double dpowint_0(double x,int y) {
int e;
if(disneg(x)) {
if(disoddint(y)) return dneg(dpowint_0(dneg(x),y));
else return dpowint_0(dneg(x),y);
}
if(dispo2(x)) {
e=dgetexp(x)-0x3ff;
if(y>=2048) y= 2047; // avoid overflow
if(y<-2048) y=-2048;
y*=e;
return dldexp(1,y);
}
if(y==0) return 1;
if(y>=-32&&y<=32) return dpowint_1(x,y);
return dpow_1(x,y);
}
double WRAPPER_FUNC(powint)(double x,int y) {
_Pragma("GCC diagnostic push")
_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
if(x==1.0||y==0) return 1;
_Pragma("GCC diagnostic pop")
check_nan_d1(x);
if(diszero(x)) {
if(y>0) {
if(y&1) return x;
else return 0;
}
if((y&1)) return dcopysign(PINF,x);
return PINF;
}
if(dispinf(x)) {
if(y<0) return 0;
else return PINF;
}
if(disminf(x)) {
if(y>0) {
if((y&1)) return MINF;
else return PINF;
}
if((y&1)) return MZERO;
else return PZERO;
}
return dpowint_0(x,y);
}
// for the case where y is guaranteed a finite integer, x not zero or infinity
static double dpow_0(double x,double y) {
int e,p;
if(disneg(x)) {
if(disoddint(y)) return dneg(dpow_0(dneg(x),y));
else return dpow_0(dneg(x),y);
}
p=(int)y;
if(dispo2(x)) {
e=dgetexp(x)-0x3ff;
if(p>=2048) p= 2047; // avoid overflow
if(p<-2048) p=-2048;
p*=e;
return dldexp(1,p);
}
if(p==0) return 1;
if(p>=-32&&p<=32) return dpowint_1(x,p);
return dpow_1(x,y);
}
double WRAPPER_FUNC(pow)(double x,double y) {
_Pragma("GCC diagnostic push")
_Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
if(x==1.0||diszero(y)) return 1;
check_nan_d2(x, y);
if(x==-1.0&&disinf(y)) return 1;
_Pragma("GCC diagnostic pop")
if(diszero(x)) {
if(!disneg(y)) {
if(disoddint(y)) return x;
else return 0;
}
if(disoddint(y)) return dcopysign(PINF,x);
return PINF;
}
if(dispinf(x)) {
if(disneg(y)) return 0;
else return PINF;
}
if(disminf(x)) {
if(!disneg(y)) {
if(disoddint(y)) return MINF;
else return PINF;
}
if(disoddint(y)) return MZERO;
else return PZERO;
}
if(dispinf(y)) {
if(dgetexp(x)<0x3ff) return PZERO;
else return PINF;
}
if(disminf(y)) {
if(dgetexp(x)<0x3ff) return PINF;
else return PZERO;
}
if(disint(y)) return dpow_0(x,y);
if(disneg(x)) return PINF;
return dpow_1(x,y);
}
double WRAPPER_FUNC(hypot)(double x,double y) {
check_nan_d2(x, y);
int ex,ey;
ex=dgetexp(x); ey=dgetexp(y);
if(ex>=0x3ff+400||ey>=0x3ff+400) { // overflow, or nearly so
x=dldexp(x,-600),y=dldexp(y,-600);
return dldexp(sqrt(x*x+y*y), 600);
}
else if(ex<=0x3ff-400&&ey<=0x3ff-400) { // underflow, or nearly so
x=dldexp(x, 600),y=dldexp(y, 600);
return dldexp(sqrt(x*x+y*y),-600);
}
return sqrt(x*x+y*y);
}
double WRAPPER_FUNC(cbrt)(double x) {
check_nan_d1(x);
int e;
if(disneg(x)) return dneg(cbrt(dneg(x)));
if(diszero(x)) return dcopysign(PZERO,x);
e=dgetexp(x)-0x3ff;
e=(e*0x5555+0x8000)>>16; // ~e/3, rounded
x=dldexp(x,-e*3);
x=exp(log(x)*ONETHIRD);
return dldexp(x,e);
}
// reduces mx*2^e modulo my, returning bottom bits of quotient at *pquo
// 2^52<=|mx|,my<2^53, e>=0; 0<=result<my
static i64 drem_0(i64 mx,i64 my,int e,int*pquo) {
int quo=0,q,r=0,s;
if(e>0) {
r=0xffffffffU/(ui32)(my>>36); // reciprocal estimate Q16
}
while(e>0) {
s=e; if(s>12) s=12; // gain up to 12 bits on each iteration
q=(mx>>38)*r; // Q30
q=((q>>(29-s))+1)>>1; // Q(s), rounded
mx=(mx<<s)-my*q;
quo=(quo<<s)+q;
e-=s;
}
if(mx>=my) mx-=my,quo++; // when e==0 mx can be nearly as big as 2my
if(mx>=my) mx-=my,quo++;
if(mx<0) mx+=my,quo--;
if(mx<0) mx+=my,quo--;
if(pquo) *pquo=quo;
return mx;
}
double WRAPPER_FUNC(fmod)(double x,double y) {
check_nan_d2(x, y);
ui64 ix=*(ui64*)&x,iy=*(ui64*)&y;
int sx,ex,ey;
i64 mx,my;
DUNPACKS(ix,sx,ex,mx);
DUNPACK(iy,ey,my);
if(ex==0x7ff) return dnan_or(PINF);
if(ey==0) return PINF;
if(ex==0) {
if(!disneg(x)) return PZERO;
return MZERO;
}
if(ex<ey) return x; // |x|<|y|, including case x=±0
mx=drem_0(mx,my,ex-ey,0);
if(sx) mx=-mx;
return fix642double(mx,0x3ff-ey+52);
}
double WRAPPER_FUNC(remquo)(double x,double y,int*quo) {
check_nan_d2(x, y);
ui64 ix=*(ui64*)&x,iy=*(ui64*)&y;
int sx,sy,ex,ey,q;
i64 mx,my;
DUNPACKS(ix,sx,ex,mx);
DUNPACKS(iy,sy,ey,my);
if(quo) *quo=0;
if(ex==0x7ff) return PINF;
if(ey==0) return PINF;
if(ex==0) return PZERO;
if(ey==0x7ff) return x;
if(ex<ey-1) return x; // |x|<|y|/2
if(ex==ey-1) {
if(mx<=my) return x; // |x|<=|y|/2, even quotient
// here |y|/2<|x|<|y|
if(!sx) { // x>|y|/2
mx-=my+my;
ey--;
q=1;
} else { // x<-|y|/2
mx=my+my-mx;
ey--;
q=-1;
}
}
else {
if(sx) mx=-mx;
mx=drem_0(mx,my,ex-ey,&q);
if(mx+mx>my || (mx+mx==my&&(q&1)) ) { // |x|>|y|/2, or equality and an odd quotient?
mx-=my;
q++;
}
}
if(sy) q=-q;
if(quo) *quo=q;
return fix642double(mx,0x3ff-ey+52);
}
double WRAPPER_FUNC(drem)(double x,double y) { check_nan_d2(x, y); return remquo(x,y,0); }
double WRAPPER_FUNC(remainder)(double x,double y) { check_nan_d2(x, y); return remquo(x,y,0); }
_Pragma("GCC diagnostic pop") // strict-aliasing

View File

@ -0,0 +1,82 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h"
.syntax unified
.cpu cortex-m0plus
.thumb
wrapper_func __aeabi_dadd
wrapper_func __aeabi_ddiv
wrapper_func __aeabi_dmul
wrapper_func __aeabi_drsub
wrapper_func __aeabi_dsub
wrapper_func __aeabi_cdcmpeq
wrapper_func __aeabi_cdrcmple
wrapper_func __aeabi_cdcmple
wrapper_func __aeabi_dcmpeq
wrapper_func __aeabi_dcmplt
wrapper_func __aeabi_dcmple
wrapper_func __aeabi_dcmpge
wrapper_func __aeabi_dcmpgt
wrapper_func __aeabi_dcmpun
wrapper_func __aeabi_i2d
wrapper_func __aeabi_l2d
wrapper_func __aeabi_ui2d
wrapper_func __aeabi_ul2d
wrapper_func __aeabi_d2iz
wrapper_func __aeabi_d2lz
wrapper_func __aeabi_d2uiz
wrapper_func __aeabi_d2ulz
wrapper_func __aeabi_d2f
wrapper_func sqrt
wrapper_func cos
wrapper_func sin
wrapper_func tan
wrapper_func atan2
wrapper_func exp
wrapper_func log
wrapper_func ldexp
wrapper_func copysign
wrapper_func trunc
wrapper_func floor
wrapper_func ceil
wrapper_func round
wrapper_func sincos
wrapper_func asin
wrapper_func acos
wrapper_func atan
wrapper_func sinh
wrapper_func cosh
wrapper_func tanh
wrapper_func asinh
wrapper_func acosh
wrapper_func atanh
wrapper_func exp2
wrapper_func log2
wrapper_func exp10
wrapper_func log10
wrapper_func pow
wrapper_func powint
wrapper_func hypot
wrapper_func cbrt
wrapper_func fmod
wrapper_func drem
wrapper_func remainder
wrapper_func remquo
wrapper_func expm1
wrapper_func log1p
wrapper_func fma
push {lr} // keep stack trace sane
ldr r0, =str
bl panic
str:
.asciz "double support is disabled"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#ifndef _PICO_DOUBLE_H
#define _PICO_DOUBLE_H
#include <math.h>
#include "pico/types.h"
#include "pico/bootrom/sf_table.h"
#ifdef __cplusplus
extern "C" {
#endif
/** \file double.h
* \defgroup pico_double pico_double
*
* Optimized double-precision floating point functions
*
* (Replacement) optimized implementations are provided of the following compiler built-ins
* and math library functions:
*
* - __aeabi_dadd, __aeabi_ddiv, __aeabi_dmul, __aeabi_drsub, __aeabi_dsub, __aeabi_cdcmpeq, __aeabi_cdrcmple, __aeabi_cdcmple, __aeabi_dcmpeq, __aeabi_dcmplt, __aeabi_dcmple, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmpun, __aeabi_i2d, __aeabi_l2d, __aeabi_ui2d, __aeabi_ul2d, __aeabi_d2iz, __aeabi_d2lz, __aeabi_d2uiz, __aeabi_d2ulz, __aeabi_d2f
* - sqrt, cos, sin, tan, atan2, exp, log, ldexp, copysign, trunc, floor, ceil, round, asin, acos, atan, sinh, cosh, tanh, asinh, acosh, atanh, exp2, log2, exp10, log10, pow,, hypot, cbrt, fmod, drem, remainder, remquo, expm1, log1p, fma
* - powint, sincos (GNU extensions)
*
* The following additional optimized functions are also provided:
*
* - fix2double, ufix2double, fix642double, ufix642double, double2fix, double2ufix, double2fix64, double2ufix64, double2int, double2int64, double2int_z, double2int64_z
*/
double fix2double(int32_t m, int e);
double ufix2double(uint32_t m, int e);
double fix642double(int64_t m, int e);
double ufix642double(uint64_t m, int e);
// These methods round towards -Infinity.
int32_t double2fix(double f, int e);
uint32_t double2ufix(double f, int e);
int64_t double2fix64(double f, int e);
uint64_t double2ufix64(double f, int e);
int32_t double2int(double f);
int64_t double2int64(double f);
// These methods round towards 0.
int32_t double2int_z(double f);
int64_t double2int64_z(double f);
double exp10(double x);
void sincos(double x, double *sinx, double *cosx);
double powint(double x, int y);
#ifdef __cplusplus
}
#endif
#endif