From cc169ddd68a3ab9921bd4b8ccbe2994175c658dc Mon Sep 17 00:00:00 2001 From: Graham Sanderson Date: Mon, 6 Feb 2023 16:27:39 -0600 Subject: [PATCH] stdio hardening + new mutex API (#1224) * * Harden stdio_usb and stdio in general against deadlocks which could otherwise result from doing printfs from within IRQs * Add a test for the above * Add mutex_try_enter_block_until API. * Make best_effort_wfe_or_timeout not use alarms if called from within IRQ --- src/common/pico_sync/include/pico/mutex.h | 14 +++++ src/common/pico_sync/mutex.c | 11 ++++ src/common/pico_time/time.c | 19 ++++--- .../pico_stdio/include/pico/stdio.h | 5 ++ src/rp2_common/pico_stdio/stdio.c | 13 +---- src/rp2_common/pico_stdio_usb/stdio_usb.c | 36 +++++++----- test/CMakeLists.txt | 1 + test/pico_stdio_test/CMakeLists.txt | 16 ++++++ test/pico_stdio_test/pico_stdio_test.c | 57 +++++++++++++++++++ 9 files changed, 138 insertions(+), 34 deletions(-) create mode 100644 test/pico_stdio_test/CMakeLists.txt create mode 100644 test/pico_stdio_test/pico_stdio_test.c diff --git a/src/common/pico_sync/include/pico/mutex.h b/src/common/pico_sync/include/pico/mutex.h index 088bb4c..bcb3e99 100644 --- a/src/common/pico_sync/include/pico/mutex.h +++ b/src/common/pico_sync/include/pico/mutex.h @@ -114,6 +114,20 @@ void recursive_mutex_enter_blocking(recursive_mutex_t *mtx); */ bool mutex_try_enter(mutex_t *mtx, uint32_t *owner_out); +/*! \brief Attempt to take ownership of a mutex until the specified time + * \ingroup mutex + * + * If the mutex wasn't owned, this method will immediately claim the mutex for the caller and return true. + * If the mutex is owned by the caller, this method will immediately return false, + * If the mutex is owned by someone else, this method will try to claim it until the specified time, returning + * true if it succeeds, or false on timeout + * + * \param mtx Pointer to mutex structure + * \param until The time after which to return if the caller cannot be granted ownership of the mutex + * \return true if mutex now owned, false otherwise + */ +bool mutex_try_enter_block_until(mutex_t *mtx, absolute_time_t until); + /*! \brief Attempt to take ownership of a recursive mutex * \ingroup mutex * diff --git a/src/common/pico_sync/mutex.c b/src/common/pico_sync/mutex.c index 3ea81c7..828be68 100644 --- a/src/common/pico_sync/mutex.c +++ b/src/common/pico_sync/mutex.c @@ -80,6 +80,17 @@ bool __time_critical_func(mutex_try_enter)(mutex_t *mtx, uint32_t *owner_out) { return entered; } +bool __time_critical_func(mutex_try_enter_block_until)(mutex_t *mtx, absolute_time_t until) { + // not using lock_owner_id_t to avoid backwards incompatibility change to mutex_try_enter API + static_assert(sizeof(lock_owner_id_t) <= 4, ""); + uint32_t owner; + if (!mutex_try_enter(mtx, &owner)) { + if ((lock_owner_id_t)owner == lock_get_caller_owner_id()) return false; // deadlock, so we can never own it + return mutex_enter_block_until(mtx, until); + } + return true; +} + bool __time_critical_func(recursive_mutex_try_enter)(recursive_mutex_t *mtx, uint32_t *owner_out) { bool entered; lock_owner_id_t caller = lock_get_caller_owner_id(); diff --git a/src/common/pico_time/time.c b/src/common/pico_time/time.c index b478fc7..a4c0b27 100644 --- a/src/common/pico_time/time.c +++ b/src/common/pico_time/time.c @@ -431,16 +431,21 @@ void sleep_ms(uint32_t ms) { bool best_effort_wfe_or_timeout(absolute_time_t timeout_timestamp) { #if !PICO_TIME_DEFAULT_ALARM_POOL_DISABLED - alarm_id_t id; - id = add_alarm_at(timeout_timestamp, sleep_until_callback, NULL, false); - if (id <= 0) { + if (__get_current_exception()) { tight_loop_contents(); return time_reached(timeout_timestamp); } else { - __wfe(); - // we need to clean up if it wasn't us that caused the wfe; if it was this will be a noop. - cancel_alarm(id); - return time_reached(timeout_timestamp); + alarm_id_t id; + id = add_alarm_at(timeout_timestamp, sleep_until_callback, NULL, false); + if (id <= 0) { + tight_loop_contents(); + return time_reached(timeout_timestamp); + } else { + __wfe(); + // we need to clean up if it wasn't us that caused the wfe; if it was this will be a noop. + cancel_alarm(id); + return time_reached(timeout_timestamp); + } } #else tight_loop_contents(); diff --git a/src/rp2_common/pico_stdio/include/pico/stdio.h b/src/rp2_common/pico_stdio/include/pico/stdio.h index 205473b..430be36 100644 --- a/src/rp2_common/pico_stdio/include/pico/stdio.h +++ b/src/rp2_common/pico_stdio/include/pico/stdio.h @@ -36,6 +36,11 @@ #define PICO_STDIO_STACK_BUFFER_SIZE 128 #endif +// PICO_CONFIG: PICO_STDIO_DEADLOCK_TIMEOUT_MS, Time after which to assume stdio_usb is deadlocked by use in IRQ and give up, type=int, default=1000, group=pico_stdio +#ifndef PICO_STDIO_DEADLOCK_TIMEOUT_MS +#define PICO_STDIO_DEADLOCK_TIMEOUT_MS 1000 +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/src/rp2_common/pico_stdio/stdio.c b/src/rp2_common/pico_stdio/stdio.c index 22aea46..3656d2f 100644 --- a/src/rp2_common/pico_stdio/stdio.c +++ b/src/rp2_common/pico_stdio/stdio.c @@ -43,18 +43,7 @@ static stdio_driver_t *filter; auto_init_mutex(print_mutex); bool stdout_serialize_begin(void) { - lock_owner_id_t caller = lock_get_caller_owner_id(); - // not using lock_owner_id_t to avoid backwards incompatibility change to mutex_try_enter API - static_assert(sizeof(lock_owner_id_t) <= 4, ""); - uint32_t owner; - if (!mutex_try_enter(&print_mutex, &owner)) { - if (owner == (uint32_t)caller) { - return false; - } - // we are not a nested call, so lets wait - mutex_enter_blocking(&print_mutex); - } - return true; + return mutex_try_enter_block_until(&print_mutex, make_timeout_time_ms(PICO_STDIO_DEADLOCK_TIMEOUT_MS)); } void stdout_serialize_end(void) { diff --git a/src/rp2_common/pico_stdio_usb/stdio_usb.c b/src/rp2_common/pico_stdio_usb/stdio_usb.c index fda2fda..32f1f33 100644 --- a/src/rp2_common/pico_stdio_usb/stdio_usb.c +++ b/src/rp2_common/pico_stdio_usb/stdio_usb.c @@ -93,10 +93,8 @@ static void usb_irq(void) { static void stdio_usb_out_chars(const char *buf, int length) { static uint64_t last_avail_time; - uint32_t owner; - if (!mutex_try_enter(&stdio_usb_mutex, &owner)) { - if (owner == get_core_num()) return; // would deadlock otherwise - mutex_enter_blocking(&stdio_usb_mutex); + if (!mutex_try_enter_block_until(&stdio_usb_mutex, make_timeout_time_ms(PICO_STDIO_DEADLOCK_TIMEOUT_MS))) { + return; } if (stdio_usb_connected()) { for (int i = 0; i < length;) { @@ -126,20 +124,28 @@ static void stdio_usb_out_chars(const char *buf, int length) { } int stdio_usb_in_chars(char *buf, int length) { - uint32_t owner; - if (!mutex_try_enter(&stdio_usb_mutex, &owner)) { - if (owner == get_core_num()) return PICO_ERROR_NO_DATA; // would deadlock otherwise - mutex_enter_blocking(&stdio_usb_mutex); - } + // note we perform this check outside the lock, to try and prevent possible deadlock conditions + // with printf in IRQs (which we will escape through timeouts elsewhere, but that would be less graceful). + // + // these are just checks of state, so we can call them while not holding the lock. + // they may be wrong, but only if we are in the middle of a tud_task call, in which case at worst + // we will mistakenly think we have data available when we do not (we will check again), or + // tud_task will complete running and we will check the right values the next time. + // int rc = PICO_ERROR_NO_DATA; if (stdio_usb_connected() && tud_cdc_available()) { - int count = (int) tud_cdc_read(buf, (uint32_t) length); - rc = count ? count : PICO_ERROR_NO_DATA; - } else { - // because our mutex use may starve out the background task, run tud_task here (we own the mutex) - tud_task(); + if (!mutex_try_enter_block_until(&stdio_usb_mutex, make_timeout_time_ms(PICO_STDIO_DEADLOCK_TIMEOUT_MS))) { + return PICO_ERROR_NO_DATA; // would deadlock otherwise + } + if (stdio_usb_connected() && tud_cdc_available()) { + int count = (int) tud_cdc_read(buf, (uint32_t) length); + rc = count ? count : PICO_ERROR_NO_DATA; + } else { + // because our mutex use may starve out the background task, run tud_task here (we own the mutex) + tud_task(); + } + mutex_exit(&stdio_usb_mutex); } - mutex_exit(&stdio_usb_mutex); return rc; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a61388a..207cb72 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(pico_test) add_subdirectory(pico_stdlib_test) +add_subdirectory(pico_stdio_test) add_subdirectory(pico_time_test) add_subdirectory(pico_divider_test) if (PICO_ON_DEVICE) diff --git a/test/pico_stdio_test/CMakeLists.txt b/test/pico_stdio_test/CMakeLists.txt new file mode 100644 index 0000000..e7759dd --- /dev/null +++ b/test/pico_stdio_test/CMakeLists.txt @@ -0,0 +1,16 @@ +if (NOT PICO_TIME_NO_ALARM_SUPPORT) + add_executable(pico_stdio_test_uart pico_stdio_test.c) + target_link_libraries(pico_stdio_test_uart PRIVATE pico_stdlib pico_test pico_multicore) + pico_add_extra_outputs(pico_stdio_test_uart) + pico_enable_stdio_uart(pico_stdio_test_uart 1) + pico_enable_stdio_usb(pico_stdio_test_uart 0) + + add_executable(pico_stdio_test_usb pico_stdio_test.c) + target_link_libraries(pico_stdio_test_usb PRIVATE pico_stdlib pico_test pico_multicore) + target_compile_definitions(pico_stdio_test_usb PRIVATE + PICO_STDIO_USB_CONNECT_WAIT_TIMEOUT_MS=-1) # wait for USB connect + + pico_add_extra_outputs(pico_stdio_test_usb) + pico_enable_stdio_uart(pico_stdio_test_usb 0) + pico_enable_stdio_usb(pico_stdio_test_usb 1) +endif() \ No newline at end of file diff --git a/test/pico_stdio_test/pico_stdio_test.c b/test/pico_stdio_test/pico_stdio_test.c new file mode 100644 index 0000000..a798533 --- /dev/null +++ b/test/pico_stdio_test/pico_stdio_test.c @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2023 Raspberry Pi (Trading) Ltd. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include "pico/stdlib.h" +#include "pico/multicore.h" +#include "pico/test.h" + +PICOTEST_MODULE_NAME("pico_stdio_test", "pico_stdio test harness"); + +static volatile bool deadlock_test_done; +static void deadlock_test_core1(void) { + busy_wait_ms(250); + for(int i=0;i<1000;i++) { + if (deadlock_test_done) break; + printf("Hello from core 1 - %d\n", i); + busy_wait_ms(23); + } +} + +static volatile bool deadlock_test_irq_called; + +static int64_t deadlock_test_alarm(alarm_id_t id, void *param) { + static int foo; + printf("Here is a printf from the IRQ %d\n", ++foo); + deadlock_test_irq_called = true; + return 100; +} + +int main() { + stdio_init_all(); + + for(int i=0;i<10;i++) { + printf("Hello %d\n", i); + } + printf("pico_stdio_test begins\n"); + PICOTEST_START(); + + // Check default config has valid data in it + PICOTEST_START_SECTION("STDIO deadlock test"); + multicore_launch_core1(deadlock_test_core1); + alarm_id_t alarm_id = add_alarm_in_ms(500, deadlock_test_alarm, NULL, false); + PICOTEST_CHECK(getchar_timeout_us(2000000) < 0, "someone pressed a key!"); + + deadlock_test_done = true; + cancel_alarm(alarm_id); + + sleep_ms(100); + PICOTEST_CHECK(deadlock_test_irq_called, "deadlock_test_irq was not called"); + PICOTEST_END_SECTION(); + + PICOTEST_END_TEST(); +} +