Rework lock_core / timers (#378)

- Add recursive_mutex - Make all locking primitives and sleep use common overridable wait/notify support to allow RTOS implementations to replace WFE/SEV with something more appropriate - Add busy_wait_ms
2021-05-05 11:46:25 -05:00
parent ec0dc7a88b
commit 6d87da4c59
15 changed files with 434 additions and 152 deletions
--- a/src/common/pico_sync/critical_section.c
+++ b/src/common/pico_sync/critical_section.c
@ -10,15 +10,18 @@
 static_assert(sizeof(critical_section_t) == 8, "");
 #endif

-void critical_section_init(critical_section_t *critsec) {
-    critical_section_init_with_lock_num(critsec, (uint)spin_lock_claim_unused(true));
+void critical_section_init(critical_section_t *crit_sec) {
+    critical_section_init_with_lock_num(crit_sec, (uint)spin_lock_claim_unused(true));
 }

-void critical_section_init_with_lock_num(critical_section_t *critsec, uint lock_num) {
-    lock_init(&critsec->core, lock_num);
+void critical_section_init_with_lock_num(critical_section_t *crit_sec, uint lock_num) {
+    crit_sec->spin_lock = spin_lock_instance(lock_num);
    __mem_fence_release();
 }

-void critical_section_deinit(critical_section_t *critsec) {
-    spin_lock_unclaim(spin_lock_get_num(critsec->core.spin_lock));
+void critical_section_deinit(critical_section_t *crit_sec) {
+    spin_lock_unclaim(spin_lock_get_num(crit_sec->spin_lock));
+#ifndef NDEBUG
+    crit_sec->spin_lock = (spin_lock_t *)-1;
+#endif
 }
--- a/src/common/pico_sync/include/pico/critical_section.h
+++ b/src/common/pico_sync/include/pico/critical_section.h
@ -22,11 +22,12 @@ extern "C" {
 *  from the other core, and from (higher priority) interrupts on the same core. It does the former
 *  using a spin lock and the latter by disabling interrupts on the calling core.
 *
- *  Because interrupts are disabled by this function, uses of the critical_section should be as short as possible.
+ *  Because interrupts are disabled when a critical_section is owned, uses of the critical_section
+ *  should be as short as possible.
 */

 typedef struct __packed_aligned critical_section {
-    lock_core_t core;
+    spin_lock_t *spin_lock;
    uint32_t save;
 } critical_section_t;

@ -38,16 +39,16 @@ typedef struct __packed_aligned critical_section {
 * critical sections, however if you do so you *must* use \ref critical_section_init_with_lock_num
 * to ensure that the spin lock's used are different.
 *
- * \param critsec Pointer to critical_section structure
+ * \param crit_sec Pointer to critical_section structure
 */
-void critical_section_init(critical_section_t *critsec);
+void critical_section_init(critical_section_t *crit_sec);

 /*! \brief  Initialise a critical_section structure assigning a specific spin lock number
 *  \ingroup critical_section
- * \param critsec Pointer to critical_section structure
+ * \param crit_sec Pointer to critical_section structure
 * \param lock_num the specific spin lock number to use
 */
-void critical_section_init_with_lock_num(critical_section_t *critsec, uint lock_num);
+void critical_section_init_with_lock_num(critical_section_t *crit_sec, uint lock_num);

 /*! \brief  Enter a critical_section
 *  \ingroup critical_section
@ -55,20 +56,32 @@ void critical_section_init_with_lock_num(critical_section_t *critsec, uint lock_
 * If the spin lock associated with this critical section is in use, then this
 * method will block until it is released.
 *
- * \param critsec Pointer to critical_section structure
+ * \param crit_sec Pointer to critical_section structure
 */
-static inline void critical_section_enter_blocking(critical_section_t *critsec) {
-    critsec->save = spin_lock_blocking(critsec->core.spin_lock);
+static inline void critical_section_enter_blocking(critical_section_t *crit_sec) {
+    crit_sec->save = spin_lock_blocking(crit_sec->spin_lock);
 }

 /*! \brief  Release a critical_section
 *  \ingroup critical_section
 *
- * \param critsec Pointer to critical_section structure
+ * \param crit_sec Pointer to critical_section structure
 */
-static inline void critical_section_exit(critical_section_t *critsec) {
-    spin_unlock(critsec->core.spin_lock, critsec->save);
+static inline void critical_section_exit(critical_section_t *crit_sec) {
+    spin_unlock(crit_sec->spin_lock, crit_sec->save);
 }
+
+/*! \brief  De-Initialise a critical_section created by the critical_section_init method
+ *  \ingroup critical_section
+ *
+ * This method is only used to free the associated spin lock allocated via
+ * the critical_section_init method (it should not be used to de-initialize a spin lock
+ * created via critical_section_init_with_lock_num). After this call, the critical section is invalid
+ *
+ * \param crit_sec Pointer to critical_section structure
+ */
+void critical_section_deinit(critical_section_t *crit_sec);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/common/pico_sync/include/pico/lock_core.h
+++ b/src/common/pico_sync/include/pico/lock_core.h
@ -8,26 +8,183 @@
 #define _PICO_LOCK_CORE_H

 #include "pico.h"
+#include "pico/time.h"
 #include "hardware/sync.h"

+/** \file lock_core.h
+ *  \defgroup lock_core lock_core
+ *  \ingroup pico_sync
+ * \brief base synchronization/lock primitive support
+ *
+ * Most of the pico_sync locking primitives contain a lock_core_t structure member. This currently just holds a spin
+ * lock which is used only to protect the contents of the rest of the structure as part of implementing the synchronization
+ * primitive. As such, the spin_lock member of lock core is never still held on return from any function for the primitive.
+ *
+ * \ref critical_section is an exceptional case in that it does not have a lock_core_t and simply wraps a pin lock, providing
+ * methods to lock and unlock said spin lock.
+ *
+ * lock_core based structures work by locking the spin lock, checking state, and then deciding whether they additionally need to block
+ * or notify when the spin lock is released. In the blocking case, they will wake up again in the future, and try the process again.
+ *
+ * By default the SDK just uses the processors' events via SEV and WEV for notification and blocking as these are sufficient for
+ * cross core, and notification from interrupt handlers. However macros are defined in this file that abstract the wait
+ * and notify mechanisms to allow the SDK locking functions to effectively be used within an RTOS on other environment.
+ *
+ * When implementing an RTOS, it is desirable for the SDK synchronization primitives that wait, to block the calling task (and immediately yield),
+ * and those that notify, to wake a blocked task which isn't on processor. At least the wait macro implementation needs to be atomic with the protecting
+ * spin_lock unlock from the callers point of view; i.e. the task should unlock the spin lock when as it starts its wait. Such implementation is
+ * up to the RTOS integration, however the macros are defined such that such operations are always combined into a single call
+ * (so they can be perfomed atomically) even though the default implementation does not need this, as a WFE which starts
+ * following the corresponding SEV is not missed.
+ */
+
 // PICO_CONFIG: PARAM_ASSERTIONS_ENABLED_LOCK_CORE, Enable/disable assertions in the lock core, type=bool, default=0, group=pico_sync
 #ifndef PARAM_ASSERTIONS_ENABLED_LOCK_CORE
 #define PARAM_ASSERTIONS_ENABLED_LOCK_CORE 0
 #endif

 /** \file lock_core.h
- *  \ingroup pico_sync
+ *  \ingroup lock_core
 *
- * Base implementation for locking primitives protected by a spin lock
+ * Base implementation for locking primitives protected by a spin lock. The spin lock is only used to protect
+ * access to the remaining lock state (in primitives using lock_core); it is never left locked outside
+ * of the function implementations
 */
-typedef struct lock_core {
+struct lock_core {
    // spin lock protecting this lock's state
    spin_lock_t *spin_lock;

    // note any lock members in containing structures need not be volatile;
    // they are protected by memory/compiler barriers when gaining and release spin locks
-} lock_core_t;
+};

+typedef struct lock_core lock_core_t;
+
+/*! \brief  Initialise a lock structure
+ *  \ingroup lock_core
+ *
+ * Inititalize a lock structure, providing the spin lock number to use for protecting internal state.
+ *
+ * \param core Pointer to the lock_core to initialize
+ * \param lock_num Spin lock number to use for the lock. As the spin lock is only used internally to the locking primitive
+ *                 method implementations, this does not need to be globally unique, however could suffer contention
+ */
 void lock_init(lock_core_t *core, uint lock_num);

+#ifndef lock_owner_id_t
+/*! \brief  type to use to store the 'owner' of a lock.
+ *  \ingroup lock_core
+ * By default this is int8_t as it only needs to store the core number or -1, however it may be
+ * overridden if a larger type is required (e.g. for an RTOS task id)
+ */
+#define lock_owner_id_t int8_t
 #endif
+
+#ifndef LOCK_INVALID_OWNER_ID
+/*! \brief  marker value to use for a lock_owner_id_t which does not refer to any valid owner
+ *  \ingroup lock_core
+ */
+#define LOCK_INVALID_OWNER_ID ((lock_owner_id_t)-1)
+#endif
+
+#ifndef lock_get_caller_owner_id
+/*! \brief  return the owner id for the caller
+ *  \ingroup lock_core
+ * By default this returns the calling core number, but may be overridden (e.g. to return an RTOS task id)
+ */
+#define lock_get_caller_owner_id() ((lock_owner_id_t)get_core_num())
+#endif
+
+#ifndef lock_internal_spin_unlock_with_wait
+/*! \brief   Atomically unlock the lock's spin lock, and wait for a notification.
+ *  \ingroup lock_core
+ *
+ * _Atomic_ here refers to the fact that it should not be possible for a concurrent lock_internal_spin_unlock_with_notify
+ * to insert itself between the spin unlock and this wait in a way that the wait does not see the notification (i.e. causing
+ * a missed notification). In other words this method should always wake up in response to a lock_internal_spin_unlock_with_notify
+ * for the same lock, which completes after this call starts.
+ *
+ * In an ideal implementation, this method would return exactly after the corresponding lock_internal_spin_unlock_with_notify
+ * has subsequently been called on the same lock instance, however this method is free to return at _any_ point before that;
+ * this macro is _always_ used in a loop which locks the spin lock, checks the internal locking primitive state and then
+ * waits again if the calling thread should not proceed.
+ *
+ * By default this macro simply unlocks the spin lock, and then performs a WFE, but may be overridden
+ * (e.g. to actually block the RTOS task).
+ *
+ * \param lock the lock_core for the primitive which needs to block
+ * \param save the uint32_t value that should be passed to spin_unlock when the spin lock is unlocked. (i.e. the `PRIMASK`
+ *             state when the spin lock was acquire
+ */
+#define lock_internal_spin_unlock_with_wait(lock, save) spin_unlock((lock)->spin_lock, save), __wfe()
+#endif
+
+#ifndef lock_internal_spin_unlock_with_notify
+/*! \brief   Atomically unlock the lock's spin lock, and send a notification
+ *  \ingroup lock_core
+ *
+ * _Atomic_ here refers to the fact that it should not be possible for this notification to happen during a
+ * lock_internal_spin_unlock_with_wait in a way that that wait does not see the notification (i.e. causing
+ * a missed notification). In other words this method should always wake up any lock_internal_spin_unlock_with_wait
+ * which started before this call completes.
+ *
+ * In an ideal implementation, this method would wake up only the corresponding lock_internal_spin_unlock_with_wait
+ * that has been called on the same lock instance, however it is free to wake up any of them, as they will check
+ * their condition and then re-wait if necessary/
+ *
+ * By default this macro simply unlocks the spin lock, and then performs a SEV, but may be overridden
+ * (e.g. to actually un-block RTOS task(s)).
+ *
+ * \param lock the lock_core for the primitive which needs to block
+ * \param save the uint32_t value that should be passed to spin_unlock when the spin lock is unlocked. (i.e. the PRIMASK
+ *             state when the spin lock was acquire)
+ */
+#define lock_internal_spin_unlock_with_notify(lock, save) spin_unlock((lock)->spin_lock, save), __sev()
+#endif
+
+#ifndef lock_internal_spin_unlock_with_best_effort_wait_or_timeout
+/*! \brief   Atomically unlock the lock's spin lock, and wait for a notification or a timeout
+ *  \ingroup lock_core
+ *
+ * _Atomic_ here refers to the fact that it should not be possible for a concurrent lock_internal_spin_unlock_with_notify
+ * to insert itself between the spin unlock and this wait in a way that the wait does not see the notification (i.e. causing
+ * a missed notification). In other words this method should always wake up in response to a lock_internal_spin_unlock_with_notify
+ * for the same lock, which completes after this call starts.
+ *
+ * In an ideal implementation, this method would return exactly after the corresponding lock_internal_spin_unlock_with_notify
+ * has subsequently been called on the same lock instance or the timeout has been reached, however this method is free to return
+ * at _any_ point before that; this macro is _always_ used in a loop which locks the spin lock, checks the internal locking
+ * primitive state and then waits again if the calling thread should not proceed.
+ *
+ * By default this simply unlocks the spin lock, and then calls \ref best_effort_wfe_or_timeout
+ * but may be overridden (e.g. to actually block the RTOS task with a timeout).
+ *
+ * \param lock the lock_core for the primitive which needs to block
+ * \param save the uint32_t value that should be passed to spin_unlock when the spin lock is unlocked. (i.e. the PRIMASK
+ *             state when the spin lock was acquire)
+ * \param until the \ref absolute_time_t value
+ * \return true if the timeout has been reached
+ */
+#define lock_internal_spin_unlock_with_best_effort_wait_or_timeout(lock, save, until) ({ \
+    spin_unlock((lock)->spin_lock, save);                                                \
+    best_effort_wfe_or_timeout(until);                                                   \
+})
+#endif
+
+#ifndef sync_internal_yield_until_before
+/*! \brief   yield to other processing until some time before the requested time
+ *  \ingroup lock_core
+ *
+ * This method is provided for cases where the caller has no useful work to do
+ * until the specified time.
+ *
+ * By default this method does nothing, however if can be overridden (for example by an
+ * RTOS which is able to block the current task until the scheduler tick before
+ * the given time)
+ *
+ * \param until the \ref absolute_time_t value
+ */
+#define sync_internal_yield_until_before(until) ((void)0)
+#endif
+
+#endif
--- a/src/common/pico_sync/include/pico/mutex.h
+++ b/src/common/pico_sync/include/pico/mutex.h
@ -28,12 +28,17 @@ extern "C" {
 *
 * See \ref critical_section.h for protecting access between multiple cores AND IRQ handlers
 */
-
 typedef struct __packed_aligned mutex {
    lock_core_t core;
-    int8_t owner; //! core number or -1 for unowned
+    lock_owner_id_t owner;      //! owner id LOCK_INVALID_OWNER_ID for unowned
+    uint8_t recursion_state;    //! 0 means non recursive (owner or unowned)
+                                //! 1 is a maxed out recursive lock
+                                //! 2-254 is an owned lock
+                                //! 255 is an un-owned lock
 } mutex_t;

+#define MAX_RECURSION_STATE ((uint8_t)255)
+
 /*! \brief  Initialise a mutex structure
 *  \ingroup mutex
 *
@ -41,6 +46,15 @@ typedef struct __packed_aligned mutex {
 */
 void mutex_init(mutex_t *mtx);

+/*! \brief  Initialise a recursive mutex structure
+ *  \ingroup mutex
+ *
+ * A recursive mutex may be entered in a nested fashion by the same owner
+ *
+ * \param mtx Pointer to mutex structure
+ */
+void recursive_mutex_init(mutex_t *mtx);
+
 /*! \brief  Take ownership of a mutex
 *  \ingroup mutex
 *
@ -129,6 +143,29 @@ static inline bool mutex_is_initialzed(mutex_t *mtx) {
 */
 #define auto_init_mutex(name) static __attribute__((section(".mutex_array"))) mutex_t name

+/*! \brief Helper macro for static definition of recursive mutexes
+ *  \ingroup mutex
+ *
+ * A recursive mutex defined as follows:
+ *
+ * ```c
+ * auto_init_recursive_mutex(my_mutex);
+ * ```
+ *
+ * Is equivalent to doing
+ *
+ * ```c
+ * static mutex_t my_mutex;
+ *
+ * void my_init_function() {
+ *    recursive_mutex_init(&my_mutex);
+ * }
+ * ```
+ *
+ * But the initialization of the mutex is performed automatically during runtime initialization
+ */
+#define auto_init_recursive_mutex(name) static __attribute__((section(".mutex_array"))) mutex_t name = { .recursion_state = MAX_RECURSION_STATE }
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/common/pico_sync/mutex.c
+++ b/src/common/pico_sync/mutex.c
@ -7,37 +7,53 @@
 #include "pico/mutex.h"
 #include "pico/time.h"

-#if !PICO_NO_HARDWARE
-static_assert(sizeof(mutex_t) == 8, "");
-#endif
+static void mutex_init_internal(mutex_t *mtx, uint8_t recursion_state) {
+    lock_init(&mtx->core, next_striped_spin_lock_num());
+    mtx->owner = LOCK_INVALID_OWNER_ID;
+    mtx->recursion_state = recursion_state;
+    __mem_fence_release();
+}

 void mutex_init(mutex_t *mtx) {
-    lock_init(&mtx->core, next_striped_spin_lock_num());
-    mtx->owner = -1;
-    __mem_fence_release();
+    mutex_init_internal(mtx, 0);
+}
+
+void recursive_mutex_init(mutex_t *mtx) {
+    mutex_init_internal(mtx, MAX_RECURSION_STATE);
 }

 void __time_critical_func(mutex_enter_blocking)(mutex_t *mtx) {
    assert(mtx->core.spin_lock);
-    bool block = true;
    do {
        uint32_t save = spin_lock_blocking(mtx->core.spin_lock);
-        if (mtx->owner < 0) {
-            mtx->owner = (int8_t)get_core_num();
-            block = false;
+        lock_owner_id_t caller = lock_get_caller_owner_id();
+        if (mtx->owner == LOCK_INVALID_OWNER_ID) {
+            mtx->owner = caller;
+            if (mtx->recursion_state) {
+                assert(mtx->recursion_state == MAX_RECURSION_STATE);
+                mtx->recursion_state--;
+            }
+        } else if (mtx->owner == caller && mtx->recursion_state > 1) {
+            mtx->recursion_state--;
+        } else {
+            lock_internal_spin_unlock_with_wait(&mtx->core, save);
+            // spin lock already unlocked, so loop again
+            continue;
        }
        spin_unlock(mtx->core.spin_lock, save);
-        if (block) {
-            __wfe();
-        }
-    } while (block);
+        break;
+    } while (true);
 }

 bool __time_critical_func(mutex_try_enter)(mutex_t *mtx, uint32_t *owner_out) {
    bool entered;
    uint32_t save = spin_lock_blocking(mtx->core.spin_lock);
-    if (mtx->owner < 0) {
-        mtx->owner = (int8_t)get_core_num();
+    lock_owner_id_t caller = lock_get_caller_owner_id();
+    if (mtx->owner == LOCK_INVALID_OWNER_ID) {
+        mtx->owner = lock_get_caller_owner_id();
+        entered = true;
+    } else if (mtx->owner == caller && mtx->recursion_state > 1) {
+        mtx->recursion_state--;
        entered = true;
    } else {
        if (owner_out) *owner_out = (uint32_t) mtx->owner;
@ -53,27 +69,41 @@ bool __time_critical_func(mutex_enter_timeout_ms)(mutex_t *mtx, uint32_t timeout

 bool __time_critical_func(mutex_enter_block_until)(mutex_t *mtx, absolute_time_t until) {
    assert(mtx->core.spin_lock);
-    bool block = true;
    do {
        uint32_t save = spin_lock_blocking(mtx->core.spin_lock);
-        if (mtx->owner < 0) {
-            mtx->owner = (int8_t)get_core_num();
-            block = false;
-        }
-        spin_unlock(mtx->core.spin_lock, save);
-        if (block) {
-            if (best_effort_wfe_or_timeout(until)) {
+        lock_owner_id_t caller = lock_get_caller_owner_id();
+        if (mtx->owner == LOCK_INVALID_OWNER_ID) {
+            mtx->owner = caller;
+        } else if (mtx->owner == caller && mtx->recursion_state > 1) {
+            mtx->recursion_state--;
+        } else {
+            if (lock_internal_spin_unlock_with_best_effort_wait_or_timeout(&mtx->core, save, until)) {
+                // timed out
                return false;
+            } else {
+                // not timed out; spin lock already unlocked, so loop again
+                continue;
            }
        }
-    } while (block);
-    return true;
+        spin_unlock(mtx->core.spin_lock, save);
+        return true;
+    } while (true);
 }

 void __time_critical_func(mutex_exit)(mutex_t *mtx) {
    uint32_t save = spin_lock_blocking(mtx->core.spin_lock);
-    assert(mtx->owner >= 0);
-    mtx->owner = -1;
-    __sev();
-    spin_unlock(mtx->core.spin_lock, save);
-}
+    assert(mtx->owner != LOCK_INVALID_OWNER_ID);
+    if (!mtx->recursion_state) {
+        mtx->owner = LOCK_INVALID_OWNER_ID;
+        lock_internal_spin_unlock_with_notify(&mtx->core, save);
+    } else {
+        mtx->recursion_state++;
+        assert(mtx->recursion_state);
+        if (mtx->recursion_state == MAX_RECURSION_STATE) {
+            mtx->owner = LOCK_INVALID_OWNER_ID;
+            lock_internal_spin_unlock_with_notify(&mtx->core, save);
+        } else {
+            spin_unlock(mtx->core.spin_lock, save);
+        }
+    }
+}
--- a/src/common/pico_sync/sem.c
+++ b/src/common/pico_sync/sem.c
@ -20,64 +20,57 @@ int __time_critical_func(sem_available)(semaphore_t *sem) {
 }

 void __time_critical_func(sem_acquire_blocking)(semaphore_t *sem) {
-    bool block = true;
    do {
        uint32_t save = spin_lock_blocking(sem->core.spin_lock);
        if (sem->permits > 0) {
            sem->permits--;
-            __sev();
-            block = false;
+            lock_internal_spin_unlock_with_notify(&sem->core, save);
+            break;
        }
-        spin_unlock(sem->core.spin_lock, save);
-        if (block) {
-            __wfe();
-        }
-    } while (block);
+        lock_internal_spin_unlock_with_wait(&sem->core, save);
+    } while (true);
 }

 bool __time_critical_func(sem_acquire_timeout_ms)(semaphore_t *sem, uint32_t timeout_ms) {
-    bool block = true;
    absolute_time_t target = nil_time;
    do {
        uint32_t save = spin_lock_blocking(sem->core.spin_lock);
        if (sem->permits > 0) {
            sem->permits--;
-            __sev();
-            block = false;
+            lock_internal_spin_unlock_with_notify(&sem->core, save);
+            return true;
        }
-        spin_unlock(sem->core.spin_lock, save);
-        if (block) {
-            if (is_nil_time(target)) {
-                target = make_timeout_time_ms(timeout_ms);
-            }
-            if (best_effort_wfe_or_timeout(target)) {
-                return false;
-            }
+        if (is_nil_time(target)) {
+            target = make_timeout_time_ms(timeout_ms);
        }
-    } while (block);
-    return true;
+        if (lock_internal_spin_unlock_with_best_effort_wait_or_timeout(&sem->core, save, target)) {
+            return false;
+        }
+    } while (true);
 }

 // todo this should really have a blocking variant for when permits are maxed out
 bool __time_critical_func(sem_release)(semaphore_t *sem) {
-    bool rc;
    uint32_t save = spin_lock_blocking(sem->core.spin_lock);
    int32_t count = sem->permits;
    if (count < sem->max_permits) {
        sem->permits = (int16_t)(count + 1);
-        __sev();
-        rc = true;
+        lock_internal_spin_unlock_with_notify(&sem->core, save);
+        return true;
    } else {
-        rc = false;
+        spin_unlock(sem->core.spin_lock, save);
+        return false;
    }
-    spin_unlock(sem->core.spin_lock, save);
-    return rc;
 }

 void __time_critical_func(sem_reset)(semaphore_t *sem, int16_t permits) {
    assert(permits >= 0 && permits <= sem->max_permits);
    uint32_t save = spin_lock_blocking(sem->core.spin_lock);
-    if (permits > sem->permits) __sev();
-    sem->permits = permits;
-    spin_unlock(sem->core.spin_lock, save);
+    if (permits > sem->permits) {
+        sem->permits = permits;
+        lock_internal_spin_unlock_with_notify(&sem->core, save);
+    } else {
+        sem->permits = permits;
+        spin_unlock(sem->core.spin_lock, save);
+    }
 }