diff --git a/src/rp2_common/pico_divider/divider.S b/src/rp2_common/pico_divider/divider.S index 12eae38..8112681 100644 --- a/src/rp2_common/pico_divider/divider.S +++ b/src/rp2_common/pico_divider/divider.S @@ -11,16 +11,21 @@ .cpu cortex-m0plus .thumb +// PICO_CONFIG: PICO_DIVIDER_DISABLE_INTERRUPTS, Disable interrupts around division such that divider state need not be saved/restored in exception handlers, default=0, group=pico_divider + #include "pico/asm_helper.S" +// PICO_CONFIG: PICO_DIVIDER_CALL_IDIV0, Whether 32 bit division by zero should call __aeabi_idiv0, default=1, group=pico_divider #ifndef PICO_DIVIDER_CALL_IDIV0 #define PICO_DIVIDER_CALL_IDIV0 1 #endif +// PICO_CONFIG: PICO_DIVIDER_CALL_IDIV0, Whether 64 bit division by zero should call __aeabi_ldiv0, default=1, group=pico_divider #ifndef PICO_DIVIDER_CALL_LDIV0 #define PICO_DIVIDER_CALL_LDIV0 1 #endif +// PICO_CONFIG: PICO_DIVIDER_IN_RAM, Whether divider functions should be placed in RAM, default=0, group=pico_divider .macro div_section name #if PICO_DIVIDER_IN_RAM .section RAM_SECTION_NAME(\name), "ax" @@ -56,6 +61,8 @@ need to change SHIFT above #error register layout has changed - we rely on this order to make sure we save/restore in the right order #endif +#if !PICO_DIVIDER_DISABLE_INTERRUPTS + # SIO_BASE ptr in r2 .macro save_div_state_and_lr ldr r3, [r2, #SIO_DIV_CSR_OFFSET] @@ -137,6 +144,7 @@ need to change SHIFT above pop {r4, r5, r6, r7, pc} .endm +#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */ // since idiv and idivmod only differ by a cycle, we'll make them the same! div_section WRAPPER_FUNC_NAME(__aeabi_idiv) @@ -145,12 +153,22 @@ wrapper_func __aeabi_idiv wrapper_func __aeabi_idivmod regular_func div_s32s32 regular_func divmod_s32s32 +#if !PICO_DIVIDER_DISABLE_INTERRUPTS ldr r2, =(SIO_BASE) # to support IRQ usage we must save/restore ldr r3, [r2, #SIO_DIV_CSR_OFFSET] lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY bcs divmod_s32s32_savestate regular_func divmod_s32s32_unsafe +#else +# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs +# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there +# are the hardware_divider functions that can be used instead anyway +regular_func divmod_s32s32_unsafe + ldr r2, =(SIO_BASE) + mrs r3, PRIMASK + cpsid i +#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */ str r0, [r2, #SIO_DIV_SDIVIDEND_OFFSET] str r1, [r2, #SIO_DIV_SDIVISOR_OFFSET] cmp r1, #0 @@ -159,8 +177,14 @@ regular_func divmod_s32s32_unsafe // return 64 bit value so we can efficiently return both (note read order is important since QUOTIENT must be read last) ldr r1, [r2, #SIO_DIV_REMAINDER_OFFSET] ldr r0, [r2, #SIO_DIV_QUOTIENT_OFFSET] +#if PICO_DIVIDER_DISABLE_INTERRUPTS + msr PRIMASK, r3 +#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */ bx lr 1: +#if PICO_DIVIDER_DISABLE_INTERRUPTS + msr PRIMASK, r3 +#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */ push {r2, lr} movs r1, #0x80 lsls r1, #24 @@ -176,11 +200,13 @@ regular_func divmod_s32s32_unsafe movs r1, #0 // remainder 0 // need to restore saved r2 as it hold SIO ptr pop {r2, pc} +#if !PICO_DIVIDER_DISABLE_INTERRUPTS .align 2 regular_func divmod_s32s32_savestate save_div_state_and_lr bl divmod_s32s32_unsafe restore_div_state_and_return +#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */ // since uidiv and uidivmod only differ by a cycle, we'll make them the same! div_section WRAPPER_FUNC_NAME(__aeabi_uidiv) @@ -188,12 +214,22 @@ regular_func div_u32u32 regular_func divmod_u32u32 wrapper_func __aeabi_uidiv wrapper_func __aeabi_uidivmod +#if !PICO_DIVIDER_DISABLE_INTERRUPTS ldr r2, =(SIO_BASE) # to support IRQ usage we must save/restore ldr r3, [r2, #SIO_DIV_CSR_OFFSET] lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY bcs divmod_u32u32_savestate regular_func divmod_u32u32_unsafe +#else +# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs +# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there +# are the hardware_divider functions that can be used instead anyway +regular_func divmod_u32u32_unsafe + ldr r2, =(SIO_BASE) + mrs r3, PRIMASK + cpsid i +#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */ str r0, [r2, #SIO_DIV_UDIVIDEND_OFFSET] str r1, [r2, #SIO_DIV_UDIVISOR_OFFSET] cmp r1, #0 @@ -202,8 +238,14 @@ regular_func divmod_u32u32_unsafe // return 64 bit value so we can efficiently return both (note read order is important since QUOTIENT must be read last) ldr r1, [r2, #SIO_DIV_REMAINDER_OFFSET] ldr r0, [r2, #SIO_DIV_QUOTIENT_OFFSET] +#if PICO_DIVIDER_DISABLE_INTERRUPTS + msr PRIMASK, r3 +#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */ bx lr 1: +#if PICO_DIVIDER_DISABLE_INTERRUPTS + msr PRIMASK, r3 +#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */ push {r2, lr} cmp r0, #0 beq 1f @@ -216,11 +258,13 @@ regular_func divmod_u32u32_unsafe movs r1, #0 // remainder 0 // need to restore saved r2 as it hold SIO ptr pop {r2, pc} +#if !PICO_DIVIDER_DISABLE_INTERRUPTS .align 2 regular_func divmod_u32u32_savestate save_div_state_and_lr bl divmod_u32u32_unsafe restore_div_state_and_return +#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */ div_section WRAPPER_FUNC_NAME(__aeabi_ldiv) @@ -228,6 +272,7 @@ div_section WRAPPER_FUNC_NAME(__aeabi_ldiv) wrapper_func __aeabi_ldivmod regular_func div_s64s64 regular_func divmod_s64s64 +#if !PICO_DIVIDER_DISABLE_INTERRUPTS mov ip, r2 ldr r2, =(SIO_BASE) # to support IRQ usage we must save/restore @@ -241,11 +286,20 @@ divmod_s64s64_savestate: save_div_state_and_lr_64 bl divmod_s64s64_unsafe restore_div_state_and_return_64 +#else + push {r4, lr} + mrs r4, PRIMASK + cpsid i + bl divmod_s64s64_unsafe + msr PRIMASK, r4 + pop {r4, pc} +#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */ .align 2 wrapper_func __aeabi_uldivmod regular_func div_u64u64 regular_func divmod_u64u64 +#if !PICO_DIVIDER_DISABLE_INTERRUPTS mov ip, r2 ldr r2, =(SIO_BASE) # to support IRQ usage we must save/restore @@ -259,6 +313,15 @@ regular_func divmod_u64u64_savestate save_div_state_and_lr_64 bl divmod_u64u64_unsafe restore_div_state_and_return_64 +#else + push {r4, lr} + mrs r4, PRIMASK + cpsid i + bl divmod_u64u64_unsafe + msr PRIMASK, r4 + pop {r4, pc} +#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */ + .macro dneg lo,hi mvns \hi,\hi rsbs \lo,#0 diff --git a/test/pico_divider_test/CMakeLists.txt b/test/pico_divider_test/CMakeLists.txt index 4e922a4..c1ee628 100644 --- a/test/pico_divider_test/CMakeLists.txt +++ b/test/pico_divider_test/CMakeLists.txt @@ -12,6 +12,27 @@ if (PICO_ON_DEVICE) pico_add_extra_outputs(pico_divider_test) target_compile_definitions(pico_divider_test PRIVATE + PICO_DIVIDER_DISABLE_INTERRUPTS=1 # TURBO ) + + # this is a separate test as hardware_explicit above causes it not to be tested at all! + add_library(pico_divider_nesting_test_core INTERFACE) + target_sources(pico_divider_nesting_test_core INTERFACE + pico_divider_nesting_test.c + ) + target_link_libraries(pico_divider_nesting_test_core INTERFACE pico_stdlib hardware_dma) + + add_executable(pico_divider_nesting_test_with_dirty_check) + target_link_libraries(pico_divider_nesting_test_with_dirty_check pico_divider_nesting_test_core) + pico_set_divider_implementation(pico_divider_nesting_test_with_dirty_check hardware) + pico_add_extra_outputs(pico_divider_nesting_test_with_dirty_check) + + add_executable(pico_divider_nesting_test_with_disable_irq) + target_link_libraries(pico_divider_nesting_test_with_disable_irq pico_divider_nesting_test_core) + target_compile_definitions(pico_divider_nesting_test_with_disable_irq PRIVATE + PICO_DIVIDER_DISABLE_INTERRUPTS=1) + pico_set_divider_implementation(pico_divider_nesting_test_with_disable_irq hardware) + pico_add_extra_outputs(pico_divider_nesting_test_with_disable_irq) + endif() \ No newline at end of file diff --git a/test/pico_divider_test/pico_divider_nesting_test.c b/test/pico_divider_test/pico_divider_nesting_test.c new file mode 100644 index 0000000..4224181 --- /dev/null +++ b/test/pico_divider_test/pico_divider_nesting_test.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include "pico/stdlib.h" +#include "hardware/dma.h" +#include "hardware/irq.h" + +volatile bool failed; +volatile uint32_t count[3]; +volatile bool done; + +bool timer_callback(repeating_timer_t *t) { + count[0]++; + static int z; + for (int i=0; i<100;i++) { + z += 23; + int a = z / 7; + int b = z % 7; + if (z != a * 7 + b) { + failed = true; + } + } + return !done; +} + +void do_dma_start(uint ch) { + static uint32_t word[2]; + assert(ch < 2); + dma_channel_config c = dma_channel_get_default_config(ch); + // todo remove this; landing in a separate PR +#ifndef DREQ_DMA_TIMER0 +#define DREQ_DMA_TIMER0 0x3b +#endif + channel_config_set_dreq(&c, DREQ_DMA_TIMER0); + dma_channel_configure(ch, &c, &word[ch], &word[ch], 513 + ch * 23, true); +} + +void test_irq_handler0() { + count[1]++; + dma_hw->ints0 |= 1u; + static uint z; + for (int i=0; i<80;i++) { + z += 31; + uint a = z / 11; + uint b = z % 11; + if (z != a * 11 + b) { + failed = true; + } + } + if (done) dma_channel_abort(0); + else do_dma_start(0); +} + +void test_irq_handler1() { + static uint z; + dma_hw->ints1 |= 2u; + count[2]++; + for (int i=0; i<130;i++) { + z += 47; + uint a = z / -13; + uint b = z % -13; + if (z != a * -13 + b) { + failed = true; + } + static uint64_t z64; + z64 -= 47; + uint64_t a64 = z64 / -13; + uint64_t b64 = z64 % -13; + if (z64 != a64 * -13 + b64) { + failed = true; + } + } + if (done) dma_channel_abort(1); + else do_dma_start(1); +} + +void test_nesting() { + uint z = 0; + + // We have 3 different IRQ handlers, one for timer, two for DMA completion (on DMA_IRQ0/1) + // thus we expect re-entrancy even between IRQs + // + // They all busily make use of the dividers, to expose any issues with nested use + + repeating_timer_t timer; + add_repeating_timer_us(529, timer_callback, NULL, &timer); + irq_set_exclusive_handler(DMA_IRQ_0, test_irq_handler0); + irq_set_exclusive_handler(DMA_IRQ_1, test_irq_handler1); + + dma_set_irq0_channel_mask_enabled(1u, true); + dma_set_irq1_channel_mask_enabled(2u, true); + dma_hw->timer[0] = (1 << 16) | 32; // run at 1/32 system clock + + irq_set_enabled(DMA_IRQ_0, 1); + irq_set_enabled(DMA_IRQ_1, 1); + do_dma_start(0); + do_dma_start(1); + absolute_time_t end = delayed_by_ms(get_absolute_time(), 2000); + int count_local=0; + while (!time_reached(end)) { + for(uint i=0;i<100;i++) { + z += 31; + uint a = z / 11; + uint b = z % 11; + if (z != a * 11 + b) { + failed = true; + } + } + count_local++; + } + done = true; + cancel_repeating_timer(&timer); + printf("%d: %d %d %d\n", count_local, (int)count[0], (int)count[1], (int)count[2]); + + // make sure all the IRQs ran + if (!(count_local && count[0] && count[1] && count[2])) { + printf("DID NOT RUN\n"); + exit(1); + } + if (failed) { + printf("FAILED\n"); + exit(1); + } +} + +int main() { +#ifndef uart_default +#warning test/pico_divider requires a default uart +#else + stdio_init_all(); +#endif + test_nesting(); + printf("PASSED\n"); + return 0; +} +