add PICO_DIVIDER_DISABLE_INTERRUPTS flag which makes PICO_DIVIDER disable interrupts around division rather than using co-operative guards to protect nested use (i.e. within IRQ/exception). Use of this flag can simplify porting of RTOSes but with a different performance profile (#372)
This commit is contained in:
parent
c979ca591f
commit
6796faf0d5
@ -11,16 +11,21 @@
|
||||
.cpu cortex-m0plus
|
||||
.thumb
|
||||
|
||||
// PICO_CONFIG: PICO_DIVIDER_DISABLE_INTERRUPTS, Disable interrupts around division such that divider state need not be saved/restored in exception handlers, default=0, group=pico_divider
|
||||
|
||||
#include "pico/asm_helper.S"
|
||||
|
||||
// PICO_CONFIG: PICO_DIVIDER_CALL_IDIV0, Whether 32 bit division by zero should call __aeabi_idiv0, default=1, group=pico_divider
|
||||
#ifndef PICO_DIVIDER_CALL_IDIV0
|
||||
#define PICO_DIVIDER_CALL_IDIV0 1
|
||||
#endif
|
||||
|
||||
// PICO_CONFIG: PICO_DIVIDER_CALL_IDIV0, Whether 64 bit division by zero should call __aeabi_ldiv0, default=1, group=pico_divider
|
||||
#ifndef PICO_DIVIDER_CALL_LDIV0
|
||||
#define PICO_DIVIDER_CALL_LDIV0 1
|
||||
#endif
|
||||
|
||||
// PICO_CONFIG: PICO_DIVIDER_IN_RAM, Whether divider functions should be placed in RAM, default=0, group=pico_divider
|
||||
.macro div_section name
|
||||
#if PICO_DIVIDER_IN_RAM
|
||||
.section RAM_SECTION_NAME(\name), "ax"
|
||||
@ -56,6 +61,8 @@ need to change SHIFT above
|
||||
#error register layout has changed - we rely on this order to make sure we save/restore in the right order
|
||||
#endif
|
||||
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
|
||||
# SIO_BASE ptr in r2
|
||||
.macro save_div_state_and_lr
|
||||
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
|
||||
@ -137,6 +144,7 @@ need to change SHIFT above
|
||||
pop {r4, r5, r6, r7, pc}
|
||||
.endm
|
||||
|
||||
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
|
||||
// since idiv and idivmod only differ by a cycle, we'll make them the same!
|
||||
div_section WRAPPER_FUNC_NAME(__aeabi_idiv)
|
||||
@ -145,12 +153,22 @@ wrapper_func __aeabi_idiv
|
||||
wrapper_func __aeabi_idivmod
|
||||
regular_func div_s32s32
|
||||
regular_func divmod_s32s32
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
ldr r2, =(SIO_BASE)
|
||||
# to support IRQ usage we must save/restore
|
||||
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
|
||||
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
|
||||
bcs divmod_s32s32_savestate
|
||||
regular_func divmod_s32s32_unsafe
|
||||
#else
|
||||
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
|
||||
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
|
||||
# are the hardware_divider functions that can be used instead anyway
|
||||
regular_func divmod_s32s32_unsafe
|
||||
ldr r2, =(SIO_BASE)
|
||||
mrs r3, PRIMASK
|
||||
cpsid i
|
||||
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
str r0, [r2, #SIO_DIV_SDIVIDEND_OFFSET]
|
||||
str r1, [r2, #SIO_DIV_SDIVISOR_OFFSET]
|
||||
cmp r1, #0
|
||||
@ -159,8 +177,14 @@ regular_func divmod_s32s32_unsafe
|
||||
// return 64 bit value so we can efficiently return both (note read order is important since QUOTIENT must be read last)
|
||||
ldr r1, [r2, #SIO_DIV_REMAINDER_OFFSET]
|
||||
ldr r0, [r2, #SIO_DIV_QUOTIENT_OFFSET]
|
||||
#if PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
msr PRIMASK, r3
|
||||
#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
bx lr
|
||||
1:
|
||||
#if PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
msr PRIMASK, r3
|
||||
#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
push {r2, lr}
|
||||
movs r1, #0x80
|
||||
lsls r1, #24
|
||||
@ -176,11 +200,13 @@ regular_func divmod_s32s32_unsafe
|
||||
movs r1, #0 // remainder 0
|
||||
// need to restore saved r2 as it hold SIO ptr
|
||||
pop {r2, pc}
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
.align 2
|
||||
regular_func divmod_s32s32_savestate
|
||||
save_div_state_and_lr
|
||||
bl divmod_s32s32_unsafe
|
||||
restore_div_state_and_return
|
||||
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
|
||||
// since uidiv and uidivmod only differ by a cycle, we'll make them the same!
|
||||
div_section WRAPPER_FUNC_NAME(__aeabi_uidiv)
|
||||
@ -188,12 +214,22 @@ regular_func div_u32u32
|
||||
regular_func divmod_u32u32
|
||||
wrapper_func __aeabi_uidiv
|
||||
wrapper_func __aeabi_uidivmod
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
ldr r2, =(SIO_BASE)
|
||||
# to support IRQ usage we must save/restore
|
||||
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
|
||||
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
|
||||
bcs divmod_u32u32_savestate
|
||||
regular_func divmod_u32u32_unsafe
|
||||
#else
|
||||
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
|
||||
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
|
||||
# are the hardware_divider functions that can be used instead anyway
|
||||
regular_func divmod_u32u32_unsafe
|
||||
ldr r2, =(SIO_BASE)
|
||||
mrs r3, PRIMASK
|
||||
cpsid i
|
||||
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
str r0, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
|
||||
str r1, [r2, #SIO_DIV_UDIVISOR_OFFSET]
|
||||
cmp r1, #0
|
||||
@ -202,8 +238,14 @@ regular_func divmod_u32u32_unsafe
|
||||
// return 64 bit value so we can efficiently return both (note read order is important since QUOTIENT must be read last)
|
||||
ldr r1, [r2, #SIO_DIV_REMAINDER_OFFSET]
|
||||
ldr r0, [r2, #SIO_DIV_QUOTIENT_OFFSET]
|
||||
#if PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
msr PRIMASK, r3
|
||||
#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
bx lr
|
||||
1:
|
||||
#if PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
msr PRIMASK, r3
|
||||
#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
push {r2, lr}
|
||||
cmp r0, #0
|
||||
beq 1f
|
||||
@ -216,11 +258,13 @@ regular_func divmod_u32u32_unsafe
|
||||
movs r1, #0 // remainder 0
|
||||
// need to restore saved r2 as it hold SIO ptr
|
||||
pop {r2, pc}
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
.align 2
|
||||
regular_func divmod_u32u32_savestate
|
||||
save_div_state_and_lr
|
||||
bl divmod_u32u32_unsafe
|
||||
restore_div_state_and_return
|
||||
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
|
||||
div_section WRAPPER_FUNC_NAME(__aeabi_ldiv)
|
||||
|
||||
@ -228,6 +272,7 @@ div_section WRAPPER_FUNC_NAME(__aeabi_ldiv)
|
||||
wrapper_func __aeabi_ldivmod
|
||||
regular_func div_s64s64
|
||||
regular_func divmod_s64s64
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE)
|
||||
# to support IRQ usage we must save/restore
|
||||
@ -241,11 +286,20 @@ divmod_s64s64_savestate:
|
||||
save_div_state_and_lr_64
|
||||
bl divmod_s64s64_unsafe
|
||||
restore_div_state_and_return_64
|
||||
#else
|
||||
push {r4, lr}
|
||||
mrs r4, PRIMASK
|
||||
cpsid i
|
||||
bl divmod_s64s64_unsafe
|
||||
msr PRIMASK, r4
|
||||
pop {r4, pc}
|
||||
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
|
||||
.align 2
|
||||
wrapper_func __aeabi_uldivmod
|
||||
regular_func div_u64u64
|
||||
regular_func divmod_u64u64
|
||||
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
|
||||
mov ip, r2
|
||||
ldr r2, =(SIO_BASE)
|
||||
# to support IRQ usage we must save/restore
|
||||
@ -259,6 +313,15 @@ regular_func divmod_u64u64_savestate
|
||||
save_div_state_and_lr_64
|
||||
bl divmod_u64u64_unsafe
|
||||
restore_div_state_and_return_64
|
||||
#else
|
||||
push {r4, lr}
|
||||
mrs r4, PRIMASK
|
||||
cpsid i
|
||||
bl divmod_u64u64_unsafe
|
||||
msr PRIMASK, r4
|
||||
pop {r4, pc}
|
||||
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
|
||||
|
||||
.macro dneg lo,hi
|
||||
mvns \hi,\hi
|
||||
rsbs \lo,#0
|
||||
|
@ -12,6 +12,27 @@ if (PICO_ON_DEVICE)
|
||||
pico_add_extra_outputs(pico_divider_test)
|
||||
|
||||
target_compile_definitions(pico_divider_test PRIVATE
|
||||
PICO_DIVIDER_DISABLE_INTERRUPTS=1
|
||||
# TURBO
|
||||
)
|
||||
|
||||
# this is a separate test as hardware_explicit above causes it not to be tested at all!
|
||||
add_library(pico_divider_nesting_test_core INTERFACE)
|
||||
target_sources(pico_divider_nesting_test_core INTERFACE
|
||||
pico_divider_nesting_test.c
|
||||
)
|
||||
target_link_libraries(pico_divider_nesting_test_core INTERFACE pico_stdlib hardware_dma)
|
||||
|
||||
add_executable(pico_divider_nesting_test_with_dirty_check)
|
||||
target_link_libraries(pico_divider_nesting_test_with_dirty_check pico_divider_nesting_test_core)
|
||||
pico_set_divider_implementation(pico_divider_nesting_test_with_dirty_check hardware)
|
||||
pico_add_extra_outputs(pico_divider_nesting_test_with_dirty_check)
|
||||
|
||||
add_executable(pico_divider_nesting_test_with_disable_irq)
|
||||
target_link_libraries(pico_divider_nesting_test_with_disable_irq pico_divider_nesting_test_core)
|
||||
target_compile_definitions(pico_divider_nesting_test_with_disable_irq PRIVATE
|
||||
PICO_DIVIDER_DISABLE_INTERRUPTS=1)
|
||||
pico_set_divider_implementation(pico_divider_nesting_test_with_disable_irq hardware)
|
||||
pico_add_extra_outputs(pico_divider_nesting_test_with_disable_irq)
|
||||
|
||||
endif()
|
142
test/pico_divider_test/pico_divider_nesting_test.c
Normal file
142
test/pico_divider_test/pico_divider_nesting_test.c
Normal file
@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include "pico/stdlib.h"
|
||||
#include "hardware/dma.h"
|
||||
#include "hardware/irq.h"
|
||||
|
||||
volatile bool failed;
|
||||
volatile uint32_t count[3];
|
||||
volatile bool done;
|
||||
|
||||
bool timer_callback(repeating_timer_t *t) {
|
||||
count[0]++;
|
||||
static int z;
|
||||
for (int i=0; i<100;i++) {
|
||||
z += 23;
|
||||
int a = z / 7;
|
||||
int b = z % 7;
|
||||
if (z != a * 7 + b) {
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
return !done;
|
||||
}
|
||||
|
||||
void do_dma_start(uint ch) {
|
||||
static uint32_t word[2];
|
||||
assert(ch < 2);
|
||||
dma_channel_config c = dma_channel_get_default_config(ch);
|
||||
// todo remove this; landing in a separate PR
|
||||
#ifndef DREQ_DMA_TIMER0
|
||||
#define DREQ_DMA_TIMER0 0x3b
|
||||
#endif
|
||||
channel_config_set_dreq(&c, DREQ_DMA_TIMER0);
|
||||
dma_channel_configure(ch, &c, &word[ch], &word[ch], 513 + ch * 23, true);
|
||||
}
|
||||
|
||||
void test_irq_handler0() {
|
||||
count[1]++;
|
||||
dma_hw->ints0 |= 1u;
|
||||
static uint z;
|
||||
for (int i=0; i<80;i++) {
|
||||
z += 31;
|
||||
uint a = z / 11;
|
||||
uint b = z % 11;
|
||||
if (z != a * 11 + b) {
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
if (done) dma_channel_abort(0);
|
||||
else do_dma_start(0);
|
||||
}
|
||||
|
||||
void test_irq_handler1() {
|
||||
static uint z;
|
||||
dma_hw->ints1 |= 2u;
|
||||
count[2]++;
|
||||
for (int i=0; i<130;i++) {
|
||||
z += 47;
|
||||
uint a = z / -13;
|
||||
uint b = z % -13;
|
||||
if (z != a * -13 + b) {
|
||||
failed = true;
|
||||
}
|
||||
static uint64_t z64;
|
||||
z64 -= 47;
|
||||
uint64_t a64 = z64 / -13;
|
||||
uint64_t b64 = z64 % -13;
|
||||
if (z64 != a64 * -13 + b64) {
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
if (done) dma_channel_abort(1);
|
||||
else do_dma_start(1);
|
||||
}
|
||||
|
||||
void test_nesting() {
|
||||
uint z = 0;
|
||||
|
||||
// We have 3 different IRQ handlers, one for timer, two for DMA completion (on DMA_IRQ0/1)
|
||||
// thus we expect re-entrancy even between IRQs
|
||||
//
|
||||
// They all busily make use of the dividers, to expose any issues with nested use
|
||||
|
||||
repeating_timer_t timer;
|
||||
add_repeating_timer_us(529, timer_callback, NULL, &timer);
|
||||
irq_set_exclusive_handler(DMA_IRQ_0, test_irq_handler0);
|
||||
irq_set_exclusive_handler(DMA_IRQ_1, test_irq_handler1);
|
||||
|
||||
dma_set_irq0_channel_mask_enabled(1u, true);
|
||||
dma_set_irq1_channel_mask_enabled(2u, true);
|
||||
dma_hw->timer[0] = (1 << 16) | 32; // run at 1/32 system clock
|
||||
|
||||
irq_set_enabled(DMA_IRQ_0, 1);
|
||||
irq_set_enabled(DMA_IRQ_1, 1);
|
||||
do_dma_start(0);
|
||||
do_dma_start(1);
|
||||
absolute_time_t end = delayed_by_ms(get_absolute_time(), 2000);
|
||||
int count_local=0;
|
||||
while (!time_reached(end)) {
|
||||
for(uint i=0;i<100;i++) {
|
||||
z += 31;
|
||||
uint a = z / 11;
|
||||
uint b = z % 11;
|
||||
if (z != a * 11 + b) {
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
count_local++;
|
||||
}
|
||||
done = true;
|
||||
cancel_repeating_timer(&timer);
|
||||
printf("%d: %d %d %d\n", count_local, (int)count[0], (int)count[1], (int)count[2]);
|
||||
|
||||
// make sure all the IRQs ran
|
||||
if (!(count_local && count[0] && count[1] && count[2])) {
|
||||
printf("DID NOT RUN\n");
|
||||
exit(1);
|
||||
}
|
||||
if (failed) {
|
||||
printf("FAILED\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
#ifndef uart_default
|
||||
#warning test/pico_divider requires a default uart
|
||||
#else
|
||||
stdio_init_all();
|
||||
#endif
|
||||
test_nesting();
|
||||
printf("PASSED\n");
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user