summaryrefslogtreecommitdiff
path: root/ww_mutex.c
diff options
context:
space:
mode:
authorThomas Hellstrom <thellstrom@vmware.com>2018-04-11 09:33:05 +0200
committerThomas Hellstrom <thellstrom@vmware.com>2018-04-11 09:33:05 +0200
commiteebaa7f86212d7ebab3c87aae1f9d68cade1b49e (patch)
tree38b556f915ea612375202be6ec434f9f75c98b6a /ww_mutex.c
Initial commit.HEADmaster
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Diffstat (limited to 'ww_mutex.c')
-rw-r--r--ww_mutex.c583
1 files changed, 583 insertions, 0 deletions
diff --git a/ww_mutex.c b/ww_mutex.c
new file mode 100644
index 0000000..6858949
--- /dev/null
+++ b/ww_mutex.c
@@ -0,0 +1,583 @@
+/*
+ * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance
+ *
+ * Original mutex implementation started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Wound/wait implementation:
+ * Copyright (C) 2013 Canonical Ltd.
+ *
+ * Wound/ wait drop-in, actual wound-wait semantics and batching:
+ * Copyright (C) 2016-2018 VMWare Inc.
+ */
+
+#include "ww_mutex.h"
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+
+#ifndef WW_BUILTIN
+
+#undef EXPORT_SYMBOL_GPL
+#define EXPORT_SYMBOL_GPL(_a)
+#define MUTEX_FLAGS 0x07
+
+#ifndef CONFIG_DEBUG_MUTEXES
+#define debug_ww_mutex_add_waiter(_a, _b, _c)
+#define debug_ww_mutex_wake_waiter(_a, _b)
+#define debug_ww_mutex_lock_common(_a, _b)
+#define mutex_remove_waiter(__lock, __waiter, __task) \
+ __list_del((__waiter)->list.prev, (__waiter)->list.next)
+#else
+static void debug_ww_mutex_add_waiter(struct ww_mutex *ww,
+ struct mutex_waiter *waiter,
+ struct task_struct *task)
+{
+ SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
+ task->blocked_on = waiter;
+}
+
+static void debug_ww_mutex_wake_waiter(struct ww_mutex *ww,
+ struct mutex_waiter *waiter)
+{
+ struct mutex *lock = &ww->base;
+
+ SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
+ DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+ DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
+ DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+}
+
+static void debug_ww_mutex_lock_common(struct ww_mutex *ww,
+ struct mutex_waiter *waiter)
+{
+ memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
+ waiter->magic = waiter;
+ INIT_LIST_HEAD(&waiter->list);
+}
+
+static void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+ struct task_struct *task)
+{
+ DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+ DEBUG_LOCKS_WARN_ON(waiter->task != task);
+ DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
+ task->blocked_on = NULL;
+
+ list_del_init(&waiter->list);
+ waiter->task = NULL;
+}
+#endif
+
+
+/**
+ * ww_acquire_class_lock - Lock the global class spinlock unless batching
+ *
+ * @ww: The ww mutex.
+ * @ww_ctx: The acquire context.
+ *
+ * Take the global class spinlock unless we're batching in which case the
+ * global class spinlock was taken during batch begin.
+ */
+static void ww_acquire_class_lock(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ if (!ww_ctx || !ww_ctx->batched)
+ spin_lock(&ww->my_class->lock);
+}
+
+
+/**
+ * ww_acquire_class_unlock - Unlock the global class spinlock unless batching
+ *
+ * @ww: The ww mutex.
+ * @ww_ctx: The acquire context.
+ *
+ * Free the global class spinlock unless we're batching in which case the
+ * global class spinlock is freed at batch end.
+ */
+static void ww_acquire_class_unlock(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ if (!ww_ctx || !ww_ctx->batched)
+ spin_unlock(&ww->my_class->lock);
+}
+
+static bool __mutex_waiter_is_first(struct mutex *lock,
+ struct mutex_waiter *waiter)
+{
+ return list_first_entry(&lock->wait_list, struct mutex_waiter, list) ==
+ waiter;
+}
+
+
+/**
+ * __ww_mutex_trylock - Trylock a ww_mutex
+ *
+ * @ww: The mutex to trylock
+ * @ww_ctx: The acquire_ctx to register as locker or NULL.
+ * @waiter: The waiter if a waiter is trying to lock, or NULL.
+ * Return: true if lock succeeded. false otherwise.
+ */
+static bool __ww_mutex_trylock(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx,
+ struct mutex_waiter *waiter)
+{
+ struct mutex *lock = &ww->base;
+
+ lockdep_assert_held(&ww->my_class->lock);
+
+ if (atomic_long_read(&lock->owner))
+ return false;
+
+ /*
+ * No lock stealing for now. If there are waiters, only the first
+ * waiter is allowed to lock.
+ */
+ if (!list_empty(&lock->wait_list) &&
+ !__mutex_waiter_is_first(lock, waiter))
+ return false;
+
+ atomic_long_set(&lock->owner, (unsigned long) current);
+ ww->ctx = ww_ctx;
+ if (ww_ctx)
+ ww_ctx->acquired++;
+
+ return true;
+}
+
+static bool
+__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
+{
+ return a->stamp - b->stamp <= LONG_MAX &&
+ (a->stamp != b->stamp || a > b);
+}
+
+static struct task_struct *__owner_task(unsigned long owner)
+{
+ return (struct task_struct *)(owner & ~MUTEX_FLAGS);
+}
+
+
+/**
+ * ww_mutex_waiter_backoff - Whether to backoff if younger
+ *
+ * @us: Our acquire_ctx.
+ * @other: other waiter or lock owner acquire_ctx.
+ * Return: Whether to back off for another waiter or lock owner if we're
+ * younger than the lock owner or other waiter.
+ */
+static bool ww_mutex_waiter_backoff(struct ww_acquire_ctx *us,
+ struct ww_acquire_ctx *other)
+{
+ return (us->my_class->is_wait_die || us->wounded) && us->acquired > 0 &&
+ !other->done_acquire;
+}
+
+
+/**
+ * ww_mutex_backoff - Whether to backoff
+ *
+ * @us: Our acquire_ctx.
+ * @other: other waiter or lock owner acquire_ctx.
+ * Return: Whether to back off for another waiter or lock owner.
+ */
+static bool ww_mutex_backoff(struct ww_acquire_ctx *us,
+ struct ww_acquire_ctx *other)
+{
+ return other && ww_mutex_waiter_backoff(us, other) &&
+ us != other && __ww_ctx_stamp_after(us, other);
+}
+
+
+/**
+ * ww_mutex_lock_backoff - Check backoff for lock owner and all other waiters.
+ *
+ * @us: Our acquire_ctx.
+ * @ww: The ww_mutex considered.
+ * Return: Whether to back off for any other waiters or lock owner.
+ */
+static bool ww_mutex_lock_backoff(struct ww_acquire_ctx *us,
+ struct ww_mutex *ww)
+{
+ struct mutex_waiter *cur;
+
+ if (!us)
+ return false;
+
+ /*
+ * Wounded contexts lazy-preempt before first wounded lock wait, so that
+ * we have a lock to wait on after backoff.
+ */
+ if (us->wounded)
+ return true;
+
+ /* Backoff for lock owner? */
+ if (ww_mutex_backoff(us, ww->ctx))
+ return true;
+
+ /* Backoff for other waiters? */
+ list_for_each_entry(cur, &ww->base.wait_list, list) {
+ if (ww_mutex_backoff(us, cur->ww_ctx))
+ return true;
+ }
+
+ return false;
+}
+
+static int
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+ struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex *lock = &ww->base;
+ struct mutex_waiter *cur;
+ struct list_head *pos;
+ bool is_wait_die = ww->my_class->is_wait_die;
+
+ waiter->task = current;
+ waiter->ww_ctx = ww_ctx;
+
+ if (!ww_ctx) {
+ list_add_tail(&waiter->list, &lock->wait_list);
+ return 0;
+ }
+
+ /*
+ * Add the waiter before the first waiter with a higher stamp.
+ * Waiters without a context are skipped to avoid starving
+ * them.
+ */
+ pos = &lock->wait_list;
+ list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+ if (!cur->ww_ctx)
+ continue;
+
+ /* Early backoff for other waiter */
+ if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+ if (ww_mutex_waiter_backoff(ww_ctx, cur->ww_ctx))
+ return -EDEADLK;
+
+ break;
+ }
+
+ pos = &cur->list;
+
+ /*
+ * Other waiter needs to backoff for us.
+ * Wake up the waiter so that it gets a chance to back
+ * off.
+ */
+ if (ww_mutex_waiter_backoff(cur->ww_ctx, ww_ctx)) {
+ debug_ww_mutex_wake_waiter(ww, cur);
+ wake_up_process(cur->task);
+ }
+ }
+
+ list_add_tail(&waiter->list, pos);
+
+ /* Need to wound the lock owner? */
+ if (!is_wait_die && ww->ctx && __ww_ctx_stamp_after(ww->ctx, ww_ctx) &&
+ ww_ctx->acquired > 0){
+ ww->ctx->wounded = true;
+
+ /*
+ * Wake up the lock owner in case it's sleeping on
+ * another ww_mutex..
+ */
+ wake_up_process(__owner_task(atomic_long_read(&lock->owner)));
+ }
+
+ return 0;
+}
+
+
+/**
+ * ww_mutex_wake_first_waiter - Wake first waiter if any.
+ *
+ * @ww: The ww_mutex on which to wake first waiter.
+ */
+static void ww_mutex_wake_first_waiter(struct ww_mutex *ww)
+{
+ struct mutex_waiter *cur;
+ struct mutex *lock = &ww->base;
+
+ if (!list_empty(&lock->wait_list)) {
+ cur = list_first_entry(&lock->wait_list, struct mutex_waiter,
+ list);
+ debug_ww_mutex_wake_waiter(ww, cur);
+ wake_up_process(cur->task);
+ }
+}
+
+static int __ww_mutex_lock(struct ww_mutex *ww, long state,
+ unsigned int subclass,
+ struct lockdep_map *nest_lock,
+ unsigned long ip,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex_waiter waiter;
+ int ret = 0;
+
+ lockdep_assert_held(&ww->my_class->lock);
+
+ if (ww_ctx) {
+ /*
+ * If we've backed off when wounded, there are no more
+ * acquired locks, and we can clear the wounded flag.
+ */
+ if (ww_ctx->acquired == 0)
+ ww_ctx->wounded = false;
+
+ if (ww->ctx == ww_ctx) {
+ ret = -EALREADY;
+ goto err_early_backoff;
+ }
+ }
+
+ if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
+ goto skip_wait;
+
+ debug_ww_mutex_lock_common(ww, &waiter);
+ debug_ww_mutex_add_waiter(ww, &waiter, current);
+
+ lock_contended(&ww->base.dep_map, ip);
+
+ ret = __ww_mutex_add_waiter(&waiter, ww, ww_ctx);
+ if (ret)
+ goto err_early_backoff;
+
+ set_current_state(state);
+ for (;;) {
+ if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
+ break;
+
+ if (unlikely(signal_pending_state(state, current))) {
+ ret = -EINTR;
+ goto err;
+ }
+
+ if (ww_mutex_lock_backoff(ww_ctx, ww)) {
+ ret = -EDEADLK;
+ goto err;
+ }
+
+ spin_unlock(&ww->my_class->lock);
+ schedule();
+ spin_lock(&ww->my_class->lock);
+ set_current_state(state);
+ }
+ set_current_state(TASK_RUNNING);
+ mutex_remove_waiter(&ww->base, &waiter, current);
+skip_wait:
+ lock_acquired(&ww->base.dep_map, ip);
+ return 0;
+
+err:
+ set_current_state(TASK_RUNNING);
+ mutex_remove_waiter(&ww->base, &waiter, current);
+ ww_mutex_wake_first_waiter(ww);
+err_early_backoff:
+ lock_release(&ww->base.dep_map, !!ww_ctx, ip);
+ return ret;
+}
+
+static void __ww_mutex_unlock(struct ww_mutex *ww, unsigned long ip)
+{
+ struct mutex *lock = &ww->base;
+
+ lockdep_assert_held(&ww->my_class->lock);
+ lock_release(&lock->dep_map, !!ww->ctx, ip);
+ if (ww->ctx)
+ ww->ctx->acquired--;
+ ww->ctx = NULL;
+ atomic_long_set(&lock->owner, 0);
+ ww_mutex_wake_first_waiter(ww);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static int ww_mutex_deadlock_injection(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+ unsigned tmp;
+
+ if (ww_ctx->deadlock_inject_countdown-- == 0) {
+ tmp = ww_ctx->deadlock_inject_interval;
+ if (tmp > UINT_MAX/4)
+ tmp = UINT_MAX;
+ else
+ tmp = tmp*2 + tmp + tmp/2;
+
+ ww_ctx->deadlock_inject_interval = tmp;
+ ww_ctx->deadlock_inject_countdown = tmp;
+
+ ww_mutex_unlock(ww);
+
+ return -EDEADLK;
+ }
+
+ return 0;
+#endif /* CONFIG_DEBUG_WW_MUTEX_SLOWPATH */
+}
+
+/**
+ * ww_class_lock_annotate - lockdep annotation at locking time.
+ *
+ * @ww: The ww_mutex
+ * @ww_ctx: The acquire ctx or NULL
+ * @ip: The caller stack
+ */
+static void ww_class_lock_annotate(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx,
+ unsigned long ip)
+{
+ if (!ww_ctx || !ww_ctx->batched) {
+ /* Annotate wait lock before the spinlock. */
+ lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
+ ww_ctx ? &ww_ctx->dep_map : NULL, ip);
+ } else {
+ /*
+ * OK, We'd like to annotate ww trylocks under the class
+ * spinlock, but since each trylock becomes a lockdep level,
+ * we'd quickly run out of levels. And we can't annotate
+ * nested trylocks, since apparently lockdep can't cope
+ * with that. So cheat lockdep and fake a class spinlock
+ * release and annotate a wating ww lock...
+ */
+ lock_release(&ww->my_class->lock.dep_map, 0, ip);
+ lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
+ &ww_ctx->dep_map, ip);
+ lock_acquire(&ww->my_class->lock.dep_map, 0, 1, 0, 1, NULL, ip);
+ }
+}
+
+int
+ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
+ ww_acquire_class_lock(ww, ww_ctx);
+ ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE,
+ 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
+ ww_ctx);
+ ww_acquire_class_unlock(ww, ww_ctx);
+ if (!ret && ww_ctx && ww_ctx->acquired > 1)
+ return ww_mutex_deadlock_injection(ww, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
+
+
+int
+ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
+ ww_acquire_class_lock(ww, ww_ctx);
+ ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE,
+ 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
+ ww_ctx);
+ ww_acquire_class_unlock(ww, ww_ctx);
+ if (!ret && ww_ctx && ww_ctx->acquired > 1)
+ return ww_mutex_deadlock_injection(ww, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
+
+#else /* CONFIG_DEBUG_LOCK_ALLOC */
+
+int
+ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ ww_acquire_class_lock(ww, ww_ctx);
+ ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
+ ww_acquire_class_unlock(ww, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
+
+
+int
+ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ ww_acquire_class_lock(ww, ww_ctx);
+ ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
+ ww_acquire_class_unlock(ww, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
+
+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * ww_mutex_unlock_batched - Replacement to be used for batched unlock
+ *
+ * @ww: The mutex to unlock
+ */
+void
+ww_mutex_unlock_batched(struct ww_mutex *ww)
+{
+
+ __ww_mutex_unlock(ww, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(ww_mutex_unlock_batched);
+
+void
+ww_mutex_unlock(struct ww_mutex *ww)
+{
+ struct ww_acquire_ctx *ww_ctx = ww->ctx;
+
+ ww_acquire_class_lock(ww, ww_ctx);
+ __ww_mutex_unlock(ww, _RET_IP_);
+ ww_acquire_class_unlock(ww, ww_ctx);
+}
+EXPORT_SYMBOL_GPL(ww_mutex_unlock);
+
+#ifdef WW_BATCHING
+void ww_acquire_batch_begin(struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+ WARN_ON(ww_ctx->batched);
+#endif
+ spin_lock(&ww_ctx->my_class->lock);
+ ww_ctx->batched = true;
+}
+EXPORT_SYMBOL_GPL(ww_acquire_batch_begin);
+
+void ww_acquire_batch_end(struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+ WARN_ON(!ww_ctx->batched);
+#endif
+ ww_ctx->batched = false;
+ spin_unlock(&ww_ctx->my_class->lock);
+}
+EXPORT_SYMBOL_GPL(ww_acquire_batch_end);
+#endif
+
+int ww_mutex_trylock(struct ww_mutex *ww)
+{
+ bool locked;
+
+ spin_lock(&ww->my_class->lock);
+ locked = __ww_mutex_trylock(ww, NULL, NULL);
+ spin_unlock(&ww->my_class->lock);
+ if (locked)
+ lock_acquire(&ww->base.dep_map, 0, 1, 0, 1, NULL, _RET_IP_);
+
+ return locked;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_trylock);
+
+#endif