add PICO_DIVIDER_DISABLE_INTERRUPTS flag which makes PICO_DIVIDER disable interrupts around division rather than using co-operative guards to protect nested use (i.e. within IRQ/exception). Use of this flag can simplify porting of RTOSes but with a different performance profile (#372)

This commit is contained in:
Graham Sanderson 2021-05-04 07:48:07 -05:00 committed by GitHub
parent c979ca591f
commit 6796faf0d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 226 additions and 0 deletions

View File

@ -11,16 +11,21 @@
.cpu cortex-m0plus .cpu cortex-m0plus
.thumb .thumb
// PICO_CONFIG: PICO_DIVIDER_DISABLE_INTERRUPTS, Disable interrupts around division such that divider state need not be saved/restored in exception handlers, default=0, group=pico_divider
#include "pico/asm_helper.S" #include "pico/asm_helper.S"
// PICO_CONFIG: PICO_DIVIDER_CALL_IDIV0, Whether 32 bit division by zero should call __aeabi_idiv0, default=1, group=pico_divider
#ifndef PICO_DIVIDER_CALL_IDIV0 #ifndef PICO_DIVIDER_CALL_IDIV0
#define PICO_DIVIDER_CALL_IDIV0 1 #define PICO_DIVIDER_CALL_IDIV0 1
#endif #endif
// PICO_CONFIG: PICO_DIVIDER_CALL_IDIV0, Whether 64 bit division by zero should call __aeabi_ldiv0, default=1, group=pico_divider
#ifndef PICO_DIVIDER_CALL_LDIV0 #ifndef PICO_DIVIDER_CALL_LDIV0
#define PICO_DIVIDER_CALL_LDIV0 1 #define PICO_DIVIDER_CALL_LDIV0 1
#endif #endif
// PICO_CONFIG: PICO_DIVIDER_IN_RAM, Whether divider functions should be placed in RAM, default=0, group=pico_divider
.macro div_section name .macro div_section name
#if PICO_DIVIDER_IN_RAM #if PICO_DIVIDER_IN_RAM
.section RAM_SECTION_NAME(\name), "ax" .section RAM_SECTION_NAME(\name), "ax"
@ -56,6 +61,8 @@ need to change SHIFT above
#error register layout has changed - we rely on this order to make sure we save/restore in the right order #error register layout has changed - we rely on this order to make sure we save/restore in the right order
#endif #endif
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
# SIO_BASE ptr in r2 # SIO_BASE ptr in r2
.macro save_div_state_and_lr .macro save_div_state_and_lr
ldr r3, [r2, #SIO_DIV_CSR_OFFSET] ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
@ -137,6 +144,7 @@ need to change SHIFT above
pop {r4, r5, r6, r7, pc} pop {r4, r5, r6, r7, pc}
.endm .endm
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
// since idiv and idivmod only differ by a cycle, we'll make them the same! // since idiv and idivmod only differ by a cycle, we'll make them the same!
div_section WRAPPER_FUNC_NAME(__aeabi_idiv) div_section WRAPPER_FUNC_NAME(__aeabi_idiv)
@ -145,12 +153,22 @@ wrapper_func __aeabi_idiv
wrapper_func __aeabi_idivmod wrapper_func __aeabi_idivmod
regular_func div_s32s32 regular_func div_s32s32
regular_func divmod_s32s32 regular_func divmod_s32s32
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore # to support IRQ usage we must save/restore
ldr r3, [r2, #SIO_DIV_CSR_OFFSET] ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs divmod_s32s32_savestate bcs divmod_s32s32_savestate
regular_func divmod_s32s32_unsafe regular_func divmod_s32s32_unsafe
#else
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
# are the hardware_divider functions that can be used instead anyway
regular_func divmod_s32s32_unsafe
ldr r2, =(SIO_BASE)
mrs r3, PRIMASK
cpsid i
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
str r0, [r2, #SIO_DIV_SDIVIDEND_OFFSET] str r0, [r2, #SIO_DIV_SDIVIDEND_OFFSET]
str r1, [r2, #SIO_DIV_SDIVISOR_OFFSET] str r1, [r2, #SIO_DIV_SDIVISOR_OFFSET]
cmp r1, #0 cmp r1, #0
@ -159,8 +177,14 @@ regular_func divmod_s32s32_unsafe
// return 64 bit value so we can efficiently return both (note read order is important since QUOTIENT must be read last) // return 64 bit value so we can efficiently return both (note read order is important since QUOTIENT must be read last)
ldr r1, [r2, #SIO_DIV_REMAINDER_OFFSET] ldr r1, [r2, #SIO_DIV_REMAINDER_OFFSET]
ldr r0, [r2, #SIO_DIV_QUOTIENT_OFFSET] ldr r0, [r2, #SIO_DIV_QUOTIENT_OFFSET]
#if PICO_DIVIDER_DISABLE_INTERRUPTS
msr PRIMASK, r3
#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */
bx lr bx lr
1: 1:
#if PICO_DIVIDER_DISABLE_INTERRUPTS
msr PRIMASK, r3
#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */
push {r2, lr} push {r2, lr}
movs r1, #0x80 movs r1, #0x80
lsls r1, #24 lsls r1, #24
@ -176,11 +200,13 @@ regular_func divmod_s32s32_unsafe
movs r1, #0 // remainder 0 movs r1, #0 // remainder 0
// need to restore saved r2 as it hold SIO ptr // need to restore saved r2 as it hold SIO ptr
pop {r2, pc} pop {r2, pc}
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
.align 2 .align 2
regular_func divmod_s32s32_savestate regular_func divmod_s32s32_savestate
save_div_state_and_lr save_div_state_and_lr
bl divmod_s32s32_unsafe bl divmod_s32s32_unsafe
restore_div_state_and_return restore_div_state_and_return
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
// since uidiv and uidivmod only differ by a cycle, we'll make them the same! // since uidiv and uidivmod only differ by a cycle, we'll make them the same!
div_section WRAPPER_FUNC_NAME(__aeabi_uidiv) div_section WRAPPER_FUNC_NAME(__aeabi_uidiv)
@ -188,12 +214,22 @@ regular_func div_u32u32
regular_func divmod_u32u32 regular_func divmod_u32u32
wrapper_func __aeabi_uidiv wrapper_func __aeabi_uidiv
wrapper_func __aeabi_uidivmod wrapper_func __aeabi_uidivmod
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore # to support IRQ usage we must save/restore
ldr r3, [r2, #SIO_DIV_CSR_OFFSET] ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs divmod_u32u32_savestate bcs divmod_u32u32_savestate
regular_func divmod_u32u32_unsafe regular_func divmod_u32u32_unsafe
#else
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
# are the hardware_divider functions that can be used instead anyway
regular_func divmod_u32u32_unsafe
ldr r2, =(SIO_BASE)
mrs r3, PRIMASK
cpsid i
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
str r0, [r2, #SIO_DIV_UDIVIDEND_OFFSET] str r0, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
str r1, [r2, #SIO_DIV_UDIVISOR_OFFSET] str r1, [r2, #SIO_DIV_UDIVISOR_OFFSET]
cmp r1, #0 cmp r1, #0
@ -202,8 +238,14 @@ regular_func divmod_u32u32_unsafe
// return 64 bit value so we can efficiently return both (note read order is important since QUOTIENT must be read last) // return 64 bit value so we can efficiently return both (note read order is important since QUOTIENT must be read last)
ldr r1, [r2, #SIO_DIV_REMAINDER_OFFSET] ldr r1, [r2, #SIO_DIV_REMAINDER_OFFSET]
ldr r0, [r2, #SIO_DIV_QUOTIENT_OFFSET] ldr r0, [r2, #SIO_DIV_QUOTIENT_OFFSET]
#if PICO_DIVIDER_DISABLE_INTERRUPTS
msr PRIMASK, r3
#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */
bx lr bx lr
1: 1:
#if PICO_DIVIDER_DISABLE_INTERRUPTS
msr PRIMASK, r3
#endif /* PICO_DIVIDER_DISABLE_INTERRUPTS */
push {r2, lr} push {r2, lr}
cmp r0, #0 cmp r0, #0
beq 1f beq 1f
@ -216,11 +258,13 @@ regular_func divmod_u32u32_unsafe
movs r1, #0 // remainder 0 movs r1, #0 // remainder 0
// need to restore saved r2 as it hold SIO ptr // need to restore saved r2 as it hold SIO ptr
pop {r2, pc} pop {r2, pc}
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
.align 2 .align 2
regular_func divmod_u32u32_savestate regular_func divmod_u32u32_savestate
save_div_state_and_lr save_div_state_and_lr
bl divmod_u32u32_unsafe bl divmod_u32u32_unsafe
restore_div_state_and_return restore_div_state_and_return
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
div_section WRAPPER_FUNC_NAME(__aeabi_ldiv) div_section WRAPPER_FUNC_NAME(__aeabi_ldiv)
@ -228,6 +272,7 @@ div_section WRAPPER_FUNC_NAME(__aeabi_ldiv)
wrapper_func __aeabi_ldivmod wrapper_func __aeabi_ldivmod
regular_func div_s64s64 regular_func div_s64s64
regular_func divmod_s64s64 regular_func divmod_s64s64
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
mov ip, r2 mov ip, r2
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore # to support IRQ usage we must save/restore
@ -241,11 +286,20 @@ divmod_s64s64_savestate:
save_div_state_and_lr_64 save_div_state_and_lr_64
bl divmod_s64s64_unsafe bl divmod_s64s64_unsafe
restore_div_state_and_return_64 restore_div_state_and_return_64
#else
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl divmod_s64s64_unsafe
msr PRIMASK, r4
pop {r4, pc}
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
.align 2 .align 2
wrapper_func __aeabi_uldivmod wrapper_func __aeabi_uldivmod
regular_func div_u64u64 regular_func div_u64u64
regular_func divmod_u64u64 regular_func divmod_u64u64
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
mov ip, r2 mov ip, r2
ldr r2, =(SIO_BASE) ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore # to support IRQ usage we must save/restore
@ -259,6 +313,15 @@ regular_func divmod_u64u64_savestate
save_div_state_and_lr_64 save_div_state_and_lr_64
bl divmod_u64u64_unsafe bl divmod_u64u64_unsafe
restore_div_state_and_return_64 restore_div_state_and_return_64
#else
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl divmod_u64u64_unsafe
msr PRIMASK, r4
pop {r4, pc}
#endif /* !PICO_DIVIDER_DISABLE_INTERRUPTS */
.macro dneg lo,hi .macro dneg lo,hi
mvns \hi,\hi mvns \hi,\hi
rsbs \lo,#0 rsbs \lo,#0

View File

@ -12,6 +12,27 @@ if (PICO_ON_DEVICE)
pico_add_extra_outputs(pico_divider_test) pico_add_extra_outputs(pico_divider_test)
target_compile_definitions(pico_divider_test PRIVATE target_compile_definitions(pico_divider_test PRIVATE
PICO_DIVIDER_DISABLE_INTERRUPTS=1
# TURBO # TURBO
) )
# this is a separate test as hardware_explicit above causes it not to be tested at all!
add_library(pico_divider_nesting_test_core INTERFACE)
target_sources(pico_divider_nesting_test_core INTERFACE
pico_divider_nesting_test.c
)
target_link_libraries(pico_divider_nesting_test_core INTERFACE pico_stdlib hardware_dma)
add_executable(pico_divider_nesting_test_with_dirty_check)
target_link_libraries(pico_divider_nesting_test_with_dirty_check pico_divider_nesting_test_core)
pico_set_divider_implementation(pico_divider_nesting_test_with_dirty_check hardware)
pico_add_extra_outputs(pico_divider_nesting_test_with_dirty_check)
add_executable(pico_divider_nesting_test_with_disable_irq)
target_link_libraries(pico_divider_nesting_test_with_disable_irq pico_divider_nesting_test_core)
target_compile_definitions(pico_divider_nesting_test_with_disable_irq PRIVATE
PICO_DIVIDER_DISABLE_INTERRUPTS=1)
pico_set_divider_implementation(pico_divider_nesting_test_with_disable_irq hardware)
pico_add_extra_outputs(pico_divider_nesting_test_with_disable_irq)
endif() endif()

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "pico/stdlib.h"
#include "hardware/dma.h"
#include "hardware/irq.h"
volatile bool failed;
volatile uint32_t count[3];
volatile bool done;
bool timer_callback(repeating_timer_t *t) {
count[0]++;
static int z;
for (int i=0; i<100;i++) {
z += 23;
int a = z / 7;
int b = z % 7;
if (z != a * 7 + b) {
failed = true;
}
}
return !done;
}
void do_dma_start(uint ch) {
static uint32_t word[2];
assert(ch < 2);
dma_channel_config c = dma_channel_get_default_config(ch);
// todo remove this; landing in a separate PR
#ifndef DREQ_DMA_TIMER0
#define DREQ_DMA_TIMER0 0x3b
#endif
channel_config_set_dreq(&c, DREQ_DMA_TIMER0);
dma_channel_configure(ch, &c, &word[ch], &word[ch], 513 + ch * 23, true);
}
void test_irq_handler0() {
count[1]++;
dma_hw->ints0 |= 1u;
static uint z;
for (int i=0; i<80;i++) {
z += 31;
uint a = z / 11;
uint b = z % 11;
if (z != a * 11 + b) {
failed = true;
}
}
if (done) dma_channel_abort(0);
else do_dma_start(0);
}
void test_irq_handler1() {
static uint z;
dma_hw->ints1 |= 2u;
count[2]++;
for (int i=0; i<130;i++) {
z += 47;
uint a = z / -13;
uint b = z % -13;
if (z != a * -13 + b) {
failed = true;
}
static uint64_t z64;
z64 -= 47;
uint64_t a64 = z64 / -13;
uint64_t b64 = z64 % -13;
if (z64 != a64 * -13 + b64) {
failed = true;
}
}
if (done) dma_channel_abort(1);
else do_dma_start(1);
}
void test_nesting() {
uint z = 0;
// We have 3 different IRQ handlers, one for timer, two for DMA completion (on DMA_IRQ0/1)
// thus we expect re-entrancy even between IRQs
//
// They all busily make use of the dividers, to expose any issues with nested use
repeating_timer_t timer;
add_repeating_timer_us(529, timer_callback, NULL, &timer);
irq_set_exclusive_handler(DMA_IRQ_0, test_irq_handler0);
irq_set_exclusive_handler(DMA_IRQ_1, test_irq_handler1);
dma_set_irq0_channel_mask_enabled(1u, true);
dma_set_irq1_channel_mask_enabled(2u, true);
dma_hw->timer[0] = (1 << 16) | 32; // run at 1/32 system clock
irq_set_enabled(DMA_IRQ_0, 1);
irq_set_enabled(DMA_IRQ_1, 1);
do_dma_start(0);
do_dma_start(1);
absolute_time_t end = delayed_by_ms(get_absolute_time(), 2000);
int count_local=0;
while (!time_reached(end)) {
for(uint i=0;i<100;i++) {
z += 31;
uint a = z / 11;
uint b = z % 11;
if (z != a * 11 + b) {
failed = true;
}
}
count_local++;
}
done = true;
cancel_repeating_timer(&timer);
printf("%d: %d %d %d\n", count_local, (int)count[0], (int)count[1], (int)count[2]);
// make sure all the IRQs ran
if (!(count_local && count[0] && count[1] && count[2])) {
printf("DID NOT RUN\n");
exit(1);
}
if (failed) {
printf("FAILED\n");
exit(1);
}
}
int main() {
#ifndef uart_default
#warning test/pico_divider requires a default uart
#else
stdio_init_all();
#endif
test_nesting();
printf("PASSED\n");
return 0;
}