diff options
author | Thomas Hellstrom <thellstrom@vmware.com> | 2018-04-11 09:33:05 +0200 |
---|---|---|
committer | Thomas Hellstrom <thellstrom@vmware.com> | 2018-04-11 09:33:05 +0200 |
commit | eebaa7f86212d7ebab3c87aae1f9d68cade1b49e (patch) | |
tree | 38b556f915ea612375202be6ec434f9f75c98b6a /ww_mutex.c |
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Diffstat (limited to 'ww_mutex.c')
-rw-r--r-- | ww_mutex.c | 583 |
1 files changed, 583 insertions, 0 deletions
diff --git a/ww_mutex.c b/ww_mutex.c new file mode 100644 index 0000000..6858949 --- /dev/null +++ b/ww_mutex.c @@ -0,0 +1,583 @@ +/* + * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance + * + * Original mutex implementation started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * Wound/wait implementation: + * Copyright (C) 2013 Canonical Ltd. + * + * Wound/ wait drop-in, actual wound-wait semantics and batching: + * Copyright (C) 2016-2018 VMWare Inc. + */ + +#include "ww_mutex.h" +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> + +#ifndef WW_BUILTIN + +#undef EXPORT_SYMBOL_GPL +#define EXPORT_SYMBOL_GPL(_a) +#define MUTEX_FLAGS 0x07 + +#ifndef CONFIG_DEBUG_MUTEXES +#define debug_ww_mutex_add_waiter(_a, _b, _c) +#define debug_ww_mutex_wake_waiter(_a, _b) +#define debug_ww_mutex_lock_common(_a, _b) +#define mutex_remove_waiter(__lock, __waiter, __task) \ + __list_del((__waiter)->list.prev, (__waiter)->list.next) +#else +static void debug_ww_mutex_add_waiter(struct ww_mutex *ww, + struct mutex_waiter *waiter, + struct task_struct *task) +{ + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock)); + task->blocked_on = waiter; +} + +static void debug_ww_mutex_wake_waiter(struct ww_mutex *ww, + struct mutex_waiter *waiter) +{ + struct mutex *lock = &ww->base; + + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock)); + DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); + DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); +} + +static void debug_ww_mutex_lock_common(struct ww_mutex *ww, + struct mutex_waiter *waiter) +{ + memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); + waiter->magic = waiter; + INIT_LIST_HEAD(&waiter->list); +} + +static void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct task_struct *task) +{ + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); + DEBUG_LOCKS_WARN_ON(waiter->task != task); + DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); + task->blocked_on = NULL; + + list_del_init(&waiter->list); + waiter->task = NULL; +} +#endif + + +/** + * ww_acquire_class_lock - Lock the global class spinlock unless batching + * + * @ww: The ww mutex. + * @ww_ctx: The acquire context. + * + * Take the global class spinlock unless we're batching in which case the + * global class spinlock was taken during batch begin. + */ +static void ww_acquire_class_lock(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ + if (!ww_ctx || !ww_ctx->batched) + spin_lock(&ww->my_class->lock); +} + + +/** + * ww_acquire_class_unlock - Unlock the global class spinlock unless batching + * + * @ww: The ww mutex. + * @ww_ctx: The acquire context. + * + * Free the global class spinlock unless we're batching in which case the + * global class spinlock is freed at batch end. + */ +static void ww_acquire_class_unlock(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ + if (!ww_ctx || !ww_ctx->batched) + spin_unlock(&ww->my_class->lock); +} + +static bool __mutex_waiter_is_first(struct mutex *lock, + struct mutex_waiter *waiter) +{ + return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == + waiter; +} + + +/** + * __ww_mutex_trylock - Trylock a ww_mutex + * + * @ww: The mutex to trylock + * @ww_ctx: The acquire_ctx to register as locker or NULL. + * @waiter: The waiter if a waiter is trying to lock, or NULL. + * Return: true if lock succeeded. false otherwise. + */ +static bool __ww_mutex_trylock(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx, + struct mutex_waiter *waiter) +{ + struct mutex *lock = &ww->base; + + lockdep_assert_held(&ww->my_class->lock); + + if (atomic_long_read(&lock->owner)) + return false; + + /* + * No lock stealing for now. If there are waiters, only the first + * waiter is allowed to lock. + */ + if (!list_empty(&lock->wait_list) && + !__mutex_waiter_is_first(lock, waiter)) + return false; + + atomic_long_set(&lock->owner, (unsigned long) current); + ww->ctx = ww_ctx; + if (ww_ctx) + ww_ctx->acquired++; + + return true; +} + +static bool +__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) +{ + return a->stamp - b->stamp <= LONG_MAX && + (a->stamp != b->stamp || a > b); +} + +static struct task_struct *__owner_task(unsigned long owner) +{ + return (struct task_struct *)(owner & ~MUTEX_FLAGS); +} + + +/** + * ww_mutex_waiter_backoff - Whether to backoff if younger + * + * @us: Our acquire_ctx. + * @other: other waiter or lock owner acquire_ctx. + * Return: Whether to back off for another waiter or lock owner if we're + * younger than the lock owner or other waiter. + */ +static bool ww_mutex_waiter_backoff(struct ww_acquire_ctx *us, + struct ww_acquire_ctx *other) +{ + return (us->my_class->is_wait_die || us->wounded) && us->acquired > 0 && + !other->done_acquire; +} + + +/** + * ww_mutex_backoff - Whether to backoff + * + * @us: Our acquire_ctx. + * @other: other waiter or lock owner acquire_ctx. + * Return: Whether to back off for another waiter or lock owner. + */ +static bool ww_mutex_backoff(struct ww_acquire_ctx *us, + struct ww_acquire_ctx *other) +{ + return other && ww_mutex_waiter_backoff(us, other) && + us != other && __ww_ctx_stamp_after(us, other); +} + + +/** + * ww_mutex_lock_backoff - Check backoff for lock owner and all other waiters. + * + * @us: Our acquire_ctx. + * @ww: The ww_mutex considered. + * Return: Whether to back off for any other waiters or lock owner. + */ +static bool ww_mutex_lock_backoff(struct ww_acquire_ctx *us, + struct ww_mutex *ww) +{ + struct mutex_waiter *cur; + + if (!us) + return false; + + /* + * Wounded contexts lazy-preempt before first wounded lock wait, so that + * we have a lock to wait on after backoff. + */ + if (us->wounded) + return true; + + /* Backoff for lock owner? */ + if (ww_mutex_backoff(us, ww->ctx)) + return true; + + /* Backoff for other waiters? */ + list_for_each_entry(cur, &ww->base.wait_list, list) { + if (ww_mutex_backoff(us, cur->ww_ctx)) + return true; + } + + return false; +} + +static int +__ww_mutex_add_waiter(struct mutex_waiter *waiter, + struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ + struct mutex *lock = &ww->base; + struct mutex_waiter *cur; + struct list_head *pos; + bool is_wait_die = ww->my_class->is_wait_die; + + waiter->task = current; + waiter->ww_ctx = ww_ctx; + + if (!ww_ctx) { + list_add_tail(&waiter->list, &lock->wait_list); + return 0; + } + + /* + * Add the waiter before the first waiter with a higher stamp. + * Waiters without a context are skipped to avoid starving + * them. + */ + pos = &lock->wait_list; + list_for_each_entry_reverse(cur, &lock->wait_list, list) { + if (!cur->ww_ctx) + continue; + + /* Early backoff for other waiter */ + if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { + if (ww_mutex_waiter_backoff(ww_ctx, cur->ww_ctx)) + return -EDEADLK; + + break; + } + + pos = &cur->list; + + /* + * Other waiter needs to backoff for us. + * Wake up the waiter so that it gets a chance to back + * off. + */ + if (ww_mutex_waiter_backoff(cur->ww_ctx, ww_ctx)) { + debug_ww_mutex_wake_waiter(ww, cur); + wake_up_process(cur->task); + } + } + + list_add_tail(&waiter->list, pos); + + /* Need to wound the lock owner? */ + if (!is_wait_die && ww->ctx && __ww_ctx_stamp_after(ww->ctx, ww_ctx) && + ww_ctx->acquired > 0){ + ww->ctx->wounded = true; + + /* + * Wake up the lock owner in case it's sleeping on + * another ww_mutex.. + */ + wake_up_process(__owner_task(atomic_long_read(&lock->owner))); + } + + return 0; +} + + +/** + * ww_mutex_wake_first_waiter - Wake first waiter if any. + * + * @ww: The ww_mutex on which to wake first waiter. + */ +static void ww_mutex_wake_first_waiter(struct ww_mutex *ww) +{ + struct mutex_waiter *cur; + struct mutex *lock = &ww->base; + + if (!list_empty(&lock->wait_list)) { + cur = list_first_entry(&lock->wait_list, struct mutex_waiter, + list); + debug_ww_mutex_wake_waiter(ww, cur); + wake_up_process(cur->task); + } +} + +static int __ww_mutex_lock(struct ww_mutex *ww, long state, + unsigned int subclass, + struct lockdep_map *nest_lock, + unsigned long ip, + struct ww_acquire_ctx *ww_ctx) +{ + struct mutex_waiter waiter; + int ret = 0; + + lockdep_assert_held(&ww->my_class->lock); + + if (ww_ctx) { + /* + * If we've backed off when wounded, there are no more + * acquired locks, and we can clear the wounded flag. + */ + if (ww_ctx->acquired == 0) + ww_ctx->wounded = false; + + if (ww->ctx == ww_ctx) { + ret = -EALREADY; + goto err_early_backoff; + } + } + + if (__ww_mutex_trylock(ww, ww_ctx, &waiter)) + goto skip_wait; + + debug_ww_mutex_lock_common(ww, &waiter); + debug_ww_mutex_add_waiter(ww, &waiter, current); + + lock_contended(&ww->base.dep_map, ip); + + ret = __ww_mutex_add_waiter(&waiter, ww, ww_ctx); + if (ret) + goto err_early_backoff; + + set_current_state(state); + for (;;) { + if (__ww_mutex_trylock(ww, ww_ctx, &waiter)) + break; + + if (unlikely(signal_pending_state(state, current))) { + ret = -EINTR; + goto err; + } + + if (ww_mutex_lock_backoff(ww_ctx, ww)) { + ret = -EDEADLK; + goto err; + } + + spin_unlock(&ww->my_class->lock); + schedule(); + spin_lock(&ww->my_class->lock); + set_current_state(state); + } + set_current_state(TASK_RUNNING); + mutex_remove_waiter(&ww->base, &waiter, current); +skip_wait: + lock_acquired(&ww->base.dep_map, ip); + return 0; + +err: + set_current_state(TASK_RUNNING); + mutex_remove_waiter(&ww->base, &waiter, current); + ww_mutex_wake_first_waiter(ww); +err_early_backoff: + lock_release(&ww->base.dep_map, !!ww_ctx, ip); + return ret; +} + +static void __ww_mutex_unlock(struct ww_mutex *ww, unsigned long ip) +{ + struct mutex *lock = &ww->base; + + lockdep_assert_held(&ww->my_class->lock); + lock_release(&lock->dep_map, !!ww->ctx, ip); + if (ww->ctx) + ww->ctx->acquired--; + ww->ctx = NULL; + atomic_long_set(&lock->owner, 0); + ww_mutex_wake_first_waiter(ww); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static int ww_mutex_deadlock_injection(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + unsigned tmp; + + if (ww_ctx->deadlock_inject_countdown-- == 0) { + tmp = ww_ctx->deadlock_inject_interval; + if (tmp > UINT_MAX/4) + tmp = UINT_MAX; + else + tmp = tmp*2 + tmp + tmp/2; + + ww_ctx->deadlock_inject_interval = tmp; + ww_ctx->deadlock_inject_countdown = tmp; + + ww_mutex_unlock(ww); + + return -EDEADLK; + } + + return 0; +#endif /* CONFIG_DEBUG_WW_MUTEX_SLOWPATH */ +} + +/** + * ww_class_lock_annotate - lockdep annotation at locking time. + * + * @ww: The ww_mutex + * @ww_ctx: The acquire ctx or NULL + * @ip: The caller stack + */ +static void ww_class_lock_annotate(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx, + unsigned long ip) +{ + if (!ww_ctx || !ww_ctx->batched) { + /* Annotate wait lock before the spinlock. */ + lock_acquire(&ww->base.dep_map, 0, 0, 0, 1, + ww_ctx ? &ww_ctx->dep_map : NULL, ip); + } else { + /* + * OK, We'd like to annotate ww trylocks under the class + * spinlock, but since each trylock becomes a lockdep level, + * we'd quickly run out of levels. And we can't annotate + * nested trylocks, since apparently lockdep can't cope + * with that. So cheat lockdep and fake a class spinlock + * release and annotate a wating ww lock... + */ + lock_release(&ww->my_class->lock.dep_map, 0, ip); + lock_acquire(&ww->base.dep_map, 0, 0, 0, 1, + &ww_ctx->dep_map, ip); + lock_acquire(&ww->my_class->lock.dep_map, 0, 1, 0, 1, NULL, ip); + } +} + +int +ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + int ret; + + ww_class_lock_annotate(ww, ww_ctx, _RET_IP_); + ww_acquire_class_lock(ww, ww_ctx); + ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, + 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_, + ww_ctx); + ww_acquire_class_unlock(ww, ww_ctx); + if (!ret && ww_ctx && ww_ctx->acquired > 1) + return ww_mutex_deadlock_injection(ww, ww_ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock); + + +int +ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + int ret; + + ww_class_lock_annotate(ww, ww_ctx, _RET_IP_); + ww_acquire_class_lock(ww, ww_ctx); + ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, + 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_, + ww_ctx); + ww_acquire_class_unlock(ww, ww_ctx); + if (!ret && ww_ctx && ww_ctx->acquired > 1) + return ww_mutex_deadlock_injection(ww, ww_ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); + +#else /* CONFIG_DEBUG_LOCK_ALLOC */ + +int +ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + int ret; + + ww_acquire_class_lock(ww, ww_ctx); + ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx); + ww_acquire_class_unlock(ww, ww_ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock); + + +int +ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + int ret; + + ww_acquire_class_lock(ww, ww_ctx); + ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx); + ww_acquire_class_unlock(ww, ww_ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); + +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ + +/** + * ww_mutex_unlock_batched - Replacement to be used for batched unlock + * + * @ww: The mutex to unlock + */ +void +ww_mutex_unlock_batched(struct ww_mutex *ww) +{ + + __ww_mutex_unlock(ww, _RET_IP_); +} +EXPORT_SYMBOL_GPL(ww_mutex_unlock_batched); + +void +ww_mutex_unlock(struct ww_mutex *ww) +{ + struct ww_acquire_ctx *ww_ctx = ww->ctx; + + ww_acquire_class_lock(ww, ww_ctx); + __ww_mutex_unlock(ww, _RET_IP_); + ww_acquire_class_unlock(ww, ww_ctx); +} +EXPORT_SYMBOL_GPL(ww_mutex_unlock); + +#ifdef WW_BATCHING +void ww_acquire_batch_begin(struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + WARN_ON(ww_ctx->batched); +#endif + spin_lock(&ww_ctx->my_class->lock); + ww_ctx->batched = true; +} +EXPORT_SYMBOL_GPL(ww_acquire_batch_begin); + +void ww_acquire_batch_end(struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + WARN_ON(!ww_ctx->batched); +#endif + ww_ctx->batched = false; + spin_unlock(&ww_ctx->my_class->lock); +} +EXPORT_SYMBOL_GPL(ww_acquire_batch_end); +#endif + +int ww_mutex_trylock(struct ww_mutex *ww) +{ + bool locked; + + spin_lock(&ww->my_class->lock); + locked = __ww_mutex_trylock(ww, NULL, NULL); + spin_unlock(&ww->my_class->lock); + if (locked) + lock_acquire(&ww->base.dep_map, 0, 1, 0, 1, NULL, _RET_IP_); + + return locked; +} +EXPORT_SYMBOL_GPL(ww_mutex_trylock); + +#endif |