Initial commit.HEAD master

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
author: Thomas Hellstrom <thellstrom@vmware.com> 2018-04-11 09:33:05 +0200
committer: Thomas Hellstrom <thellstrom@vmware.com> 2018-04-11 09:33:05 +0200
commit: eebaa7f86212d7ebab3c87aae1f9d68cade1b49e (patch)
tree: 38b556f915ea612375202be6ec434f9f75c98b6a /ww_mutex.c
1 files changed, 583 insertions, 0 deletions
diff --git a/ww_mutex.c b/ww_mutex.c
new file mode 100644
index 0000000..6858949
--- /dev/null
+++ b/ww_mutex.c
@@ -0,0 +1,583 @@
+/*
+ * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance
+ *
+ * Original mutex implementation started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Wound/wait implementation:
+ *  Copyright (C) 2013 Canonical Ltd.
+ *
+ * Wound/ wait drop-in, actual wound-wait semantics and batching:
+ *  Copyright (C) 2016-2018 VMWare Inc.
+ */
+
+#include "ww_mutex.h"
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+
+#ifndef WW_BUILTIN
+
+#undef EXPORT_SYMBOL_GPL
+#define EXPORT_SYMBOL_GPL(_a)
+#define MUTEX_FLAGS		0x07
+
+#ifndef CONFIG_DEBUG_MUTEXES
+#define debug_ww_mutex_add_waiter(_a, _b, _c)
+#define debug_ww_mutex_wake_waiter(_a, _b)
+#define debug_ww_mutex_lock_common(_a, _b)
+#define mutex_remove_waiter(__lock, __waiter, __task) \
+	__list_del((__waiter)->list.prev, (__waiter)->list.next)
+#else
+static void debug_ww_mutex_add_waiter(struct ww_mutex *ww,
+				      struct mutex_waiter *waiter,
+				      struct task_struct *task)
+{
+	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
+	task->blocked_on = waiter;
+}
+
+static void debug_ww_mutex_wake_waiter(struct ww_mutex *ww,
+				       struct mutex_waiter *waiter)
+{
+	struct mutex *lock = &ww->base;
+
+	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
+	DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+	DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
+	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+}
+
+static void debug_ww_mutex_lock_common(struct ww_mutex *ww,
+				       struct mutex_waiter *waiter)
+{
+	memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
+	waiter->magic = waiter;
+	INIT_LIST_HEAD(&waiter->list);
+}
+
+static void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+				struct task_struct *task)
+{
+	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+	DEBUG_LOCKS_WARN_ON(waiter->task != task);
+	DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
+	task->blocked_on = NULL;
+
+	list_del_init(&waiter->list);
+	waiter->task = NULL;
+}
+#endif
+
+
+/**
+ * ww_acquire_class_lock - Lock the global class spinlock unless batching
+ *
+ * @ww: The ww mutex.
+ * @ww_ctx: The acquire context.
+ *
+ * Take the global class spinlock unless we're batching in which case the
+ * global class spinlock was taken during batch begin.
+ */
+static void ww_acquire_class_lock(struct ww_mutex *ww,
+				  struct ww_acquire_ctx *ww_ctx)
+{
+	if (!ww_ctx || !ww_ctx->batched)
+		spin_lock(&ww->my_class->lock);
+}
+
+
+/**
+ * ww_acquire_class_unlock - Unlock the global class spinlock unless batching
+ *
+ * @ww: The ww mutex.
+ * @ww_ctx: The acquire context.
+ *
+ * Free the global class spinlock unless we're batching in which case the
+ * global class spinlock is freed at batch end.
+ */
+static void ww_acquire_class_unlock(struct ww_mutex *ww,
+				    struct ww_acquire_ctx *ww_ctx)
+{
+	if (!ww_ctx || !ww_ctx->batched)
+		spin_unlock(&ww->my_class->lock);
+}
+
+static bool __mutex_waiter_is_first(struct mutex *lock,
+				    struct mutex_waiter *waiter)
+{
+	return list_first_entry(&lock->wait_list, struct mutex_waiter, list) ==
+		waiter;
+}
+
+
+/**
+ * __ww_mutex_trylock - Trylock a ww_mutex
+ *
+ * @ww: The mutex to trylock
+ * @ww_ctx: The acquire_ctx to register as locker or NULL.
+ * @waiter: The waiter if a waiter is trying to lock, or NULL.
+ * Return: true if lock succeeded. false otherwise.
+ */
+static bool __ww_mutex_trylock(struct ww_mutex *ww,
+			       struct ww_acquire_ctx *ww_ctx,
+			       struct mutex_waiter *waiter)
+{
+	struct mutex *lock = &ww->base;
+
+	lockdep_assert_held(&ww->my_class->lock);
+
+	if (atomic_long_read(&lock->owner))
+		return false;
+
+	/*
+	 * No lock stealing for now. If there are waiters, only the first
+	 * waiter is allowed to lock.
+	 */
+	if (!list_empty(&lock->wait_list) &&
+	    !__mutex_waiter_is_first(lock, waiter))
+		return false;
+
+	atomic_long_set(&lock->owner, (unsigned long) current);
+	ww->ctx = ww_ctx;
+	if (ww_ctx)
+		ww_ctx->acquired++;
+
+	return true;
+}
+
+static bool
+__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
+{
+	return a->stamp - b->stamp <= LONG_MAX &&
+	       (a->stamp != b->stamp || a > b);
+}
+
+static struct task_struct *__owner_task(unsigned long owner)
+{
+	return (struct task_struct *)(owner & ~MUTEX_FLAGS);
+}
+
+
+/**
+ * ww_mutex_waiter_backoff - Whether to backoff if younger
+ *
+ * @us: Our acquire_ctx.
+ * @other: other waiter or lock owner acquire_ctx.
+ * Return: Whether to back off for another waiter or lock owner if we're
+ * younger than the lock owner or other waiter.
+ */
+static bool ww_mutex_waiter_backoff(struct ww_acquire_ctx *us,
+				    struct ww_acquire_ctx *other)
+{
+  return (us->my_class->is_wait_die || us->wounded) && us->acquired > 0 &&
+	  !other->done_acquire;
+}
+
+
+/**
+ * ww_mutex_backoff - Whether to backoff
+ *
+ * @us: Our acquire_ctx.
+ * @other: other waiter or lock owner acquire_ctx.
+ * Return: Whether to back off for another waiter or lock owner.
+ */
+static bool ww_mutex_backoff(struct ww_acquire_ctx *us,
+			     struct ww_acquire_ctx *other)
+{
+	return other && ww_mutex_waiter_backoff(us, other) &&
+		us != other && __ww_ctx_stamp_after(us, other);
+}
+
+
+/**
+ * ww_mutex_lock_backoff - Check backoff for lock owner and all other waiters.
+ *
+ * @us: Our acquire_ctx.
+ * @ww: The ww_mutex considered.
+ * Return: Whether to back off for any other waiters or lock owner.
+ */
+static bool ww_mutex_lock_backoff(struct ww_acquire_ctx *us,
+				  struct ww_mutex *ww)
+{
+	struct mutex_waiter *cur;
+
+	if (!us)
+		return false;
+
+	/*
+	 * Wounded contexts lazy-preempt before first wounded lock wait, so that
+	 * we have a lock to wait on after backoff.
+	 */
+	if (us->wounded)
+		return true;
+
+	/* Backoff for lock owner? */
+	if (ww_mutex_backoff(us, ww->ctx))
+		return true;
+
+	/* Backoff for other waiters? */
+	list_for_each_entry(cur, &ww->base.wait_list, list) {
+		if (ww_mutex_backoff(us, cur->ww_ctx))
+			return true;
+	}
+
+	return false;
+}
+
+static int
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+		      struct ww_mutex *ww,
+		      struct ww_acquire_ctx *ww_ctx)
+{
+	struct mutex *lock = &ww->base;
+	struct mutex_waiter *cur;
+	struct list_head *pos;
+	bool is_wait_die = ww->my_class->is_wait_die;
+
+	waiter->task = current;
+	waiter->ww_ctx = ww_ctx;
+
+	if (!ww_ctx) {
+		list_add_tail(&waiter->list, &lock->wait_list);
+		return 0;
+	}
+
+	/*
+	 * Add the waiter before the first waiter with a higher stamp.
+	 * Waiters without a context are skipped to avoid starving
+	 * them.
+	 */
+	pos = &lock->wait_list;
+	list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+		if (!cur->ww_ctx)
+			continue;
+
+		/* Early backoff for other waiter */
+		if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+			if (ww_mutex_waiter_backoff(ww_ctx, cur->ww_ctx))
+				return -EDEADLK;
+
+			break;
+		}
+
+		pos = &cur->list;
+
+		/*
+		 * Other waiter needs to backoff for us.
+		 * Wake up the waiter so that it gets a chance to back
+		 * off.
+		 */
+		if (ww_mutex_waiter_backoff(cur->ww_ctx, ww_ctx)) {
+			debug_ww_mutex_wake_waiter(ww, cur);
+			wake_up_process(cur->task);
+		}
+	}
+
+	list_add_tail(&waiter->list, pos);
+
+	/* Need to wound the lock owner? */
+	if (!is_wait_die && ww->ctx && __ww_ctx_stamp_after(ww->ctx, ww_ctx) &&
+	    ww_ctx->acquired > 0){
+		ww->ctx->wounded = true;
+
+		/*
+		 * Wake up the lock owner in case it's sleeping on
+		 * another ww_mutex..
+		 */
+		wake_up_process(__owner_task(atomic_long_read(&lock->owner)));
+	}
+
+	return 0;
+}
+
+
+/**
+ * ww_mutex_wake_first_waiter - Wake first waiter if any.
+ *
+ * @ww: The ww_mutex on which to wake first waiter.
+ */
+static void ww_mutex_wake_first_waiter(struct ww_mutex *ww)
+{
+	struct mutex_waiter *cur;
+	struct mutex *lock = &ww->base;
+
+	if (!list_empty(&lock->wait_list)) {
+		cur = list_first_entry(&lock->wait_list, struct mutex_waiter,
+				       list);
+		debug_ww_mutex_wake_waiter(ww, cur);
+		wake_up_process(cur->task);
+	}
+}
+
+static int __ww_mutex_lock(struct ww_mutex *ww, long state,
+			   unsigned int subclass,
+			   struct lockdep_map *nest_lock,
+			   unsigned long ip,
+			   struct ww_acquire_ctx *ww_ctx)
+{
+	struct mutex_waiter waiter;
+	int ret = 0;
+
+	lockdep_assert_held(&ww->my_class->lock);
+
+	if (ww_ctx) {
+		/*
+		 * If we've backed off when wounded, there are no more
+		 * acquired locks, and we can clear the wounded flag.
+		 */
+		if (ww_ctx->acquired == 0)
+			ww_ctx->wounded = false;
+
+		if (ww->ctx == ww_ctx) {
+			ret = -EALREADY;
+			goto err_early_backoff;
+		}
+	}
+
+	if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
+		goto skip_wait;
+
+	debug_ww_mutex_lock_common(ww, &waiter);
+	debug_ww_mutex_add_waiter(ww, &waiter, current);
+
+	lock_contended(&ww->base.dep_map, ip);
+
+	ret = __ww_mutex_add_waiter(&waiter, ww, ww_ctx);
+	if (ret)
+		goto err_early_backoff;
+
+	set_current_state(state);
+	for (;;) {
+		if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
+			break;
+
+		if (unlikely(signal_pending_state(state, current))) {
+			ret = -EINTR;
+			goto err;
+		}
+
+		if (ww_mutex_lock_backoff(ww_ctx, ww)) {
+			ret = -EDEADLK;
+			goto err;
+		}
+
+		spin_unlock(&ww->my_class->lock);
+		schedule();
+		spin_lock(&ww->my_class->lock);
+		set_current_state(state);
+	}
+	set_current_state(TASK_RUNNING);
+	mutex_remove_waiter(&ww->base, &waiter, current);
+skip_wait:
+	lock_acquired(&ww->base.dep_map, ip);
+	return 0;
+
+err:
+	set_current_state(TASK_RUNNING);
+	mutex_remove_waiter(&ww->base, &waiter, current);
+	ww_mutex_wake_first_waiter(ww);
+err_early_backoff:
+	lock_release(&ww->base.dep_map, !!ww_ctx, ip);
+	return ret;
+}
+
+static void __ww_mutex_unlock(struct ww_mutex *ww, unsigned long ip)
+{
+	struct mutex *lock = &ww->base;
+
+	lockdep_assert_held(&ww->my_class->lock);
+	lock_release(&lock->dep_map, !!ww->ctx, ip);
+	if (ww->ctx)
+		ww->ctx->acquired--;
+	ww->ctx = NULL;
+	atomic_long_set(&lock->owner, 0);
+	ww_mutex_wake_first_waiter(ww);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static int ww_mutex_deadlock_injection(struct ww_mutex *ww,
+				       struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+	unsigned tmp;
+
+	if (ww_ctx->deadlock_inject_countdown-- == 0) {
+		tmp = ww_ctx->deadlock_inject_interval;
+		if (tmp > UINT_MAX/4)
+			tmp = UINT_MAX;
+		else
+			tmp = tmp*2 + tmp + tmp/2;
+
+		ww_ctx->deadlock_inject_interval = tmp;
+		ww_ctx->deadlock_inject_countdown = tmp;
+
+		ww_mutex_unlock(ww);
+
+		return -EDEADLK;
+	}
+
+	return 0;
+#endif /* CONFIG_DEBUG_WW_MUTEX_SLOWPATH */
+}
+
+/**
+ * ww_class_lock_annotate - lockdep annotation at locking time.
+ *
+ * @ww: The ww_mutex
+ * @ww_ctx: The acquire ctx or NULL
+ * @ip: The caller stack
+ */
+static void  ww_class_lock_annotate(struct ww_mutex *ww,
+					   struct ww_acquire_ctx *ww_ctx,
+					   unsigned long ip)
+{
+	if (!ww_ctx || !ww_ctx->batched) {
+		/* Annotate wait lock before the spinlock. */
+		lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
+			     ww_ctx ? &ww_ctx->dep_map : NULL, ip);
+	} else {
+		/*
+		 * OK, We'd like to annotate ww trylocks under the class
+		 * spinlock, but since each trylock becomes a lockdep level,
+		 * we'd quickly run out of levels. And we can't annotate
+		 * nested trylocks, since apparently lockdep can't cope
+		 * with that. So cheat lockdep and fake a class spinlock
+		 * release and annotate a wating ww lock...
+		 */
+		lock_release(&ww->my_class->lock.dep_map, 0, ip);
+		lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
+			     &ww_ctx->dep_map, ip);
+		lock_acquire(&ww->my_class->lock.dep_map, 0, 1, 0, 1, NULL, ip);
+	}
+}
+
+int
+ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	int ret;
+
+	ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
+	ww_acquire_class_lock(ww, ww_ctx);
+	ret =  __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE,
+			       0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
+			       ww_ctx);
+	ww_acquire_class_unlock(ww, ww_ctx);
+	if (!ret && ww_ctx && ww_ctx->acquired > 1)
+		return ww_mutex_deadlock_injection(ww, ww_ctx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
+
+
+int
+ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	int ret;
+
+	ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
+	ww_acquire_class_lock(ww, ww_ctx);
+	ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE,
+			      0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
+			      ww_ctx);
+	ww_acquire_class_unlock(ww, ww_ctx);
+	if (!ret && ww_ctx && ww_ctx->acquired > 1)
+		return ww_mutex_deadlock_injection(ww, ww_ctx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
+
+#else /* CONFIG_DEBUG_LOCK_ALLOC */
+
+int
+ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	int ret;
+
+	ww_acquire_class_lock(ww, ww_ctx);
+	ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
+	ww_acquire_class_unlock(ww, ww_ctx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
+
+
+int
+ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	int ret;
+
+	ww_acquire_class_lock(ww, ww_ctx);
+	ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
+	ww_acquire_class_unlock(ww, ww_ctx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
+
+#endif  /* CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * ww_mutex_unlock_batched - Replacement to be used for batched unlock
+ *
+ * @ww: The mutex to unlock
+ */
+void
+ww_mutex_unlock_batched(struct ww_mutex *ww)
+{
+
+	__ww_mutex_unlock(ww, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(ww_mutex_unlock_batched);
+
+void
+ww_mutex_unlock(struct ww_mutex *ww)
+{
+	struct ww_acquire_ctx *ww_ctx = ww->ctx;
+
+	ww_acquire_class_lock(ww, ww_ctx);
+	__ww_mutex_unlock(ww, _RET_IP_);
+	ww_acquire_class_unlock(ww, ww_ctx);
+}
+EXPORT_SYMBOL_GPL(ww_mutex_unlock);
+
+#ifdef WW_BATCHING
+void  ww_acquire_batch_begin(struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+	WARN_ON(ww_ctx->batched);
+#endif
+	spin_lock(&ww_ctx->my_class->lock);
+	ww_ctx->batched = true;
+}
+EXPORT_SYMBOL_GPL(ww_acquire_batch_begin);
+
+void  ww_acquire_batch_end(struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+	WARN_ON(!ww_ctx->batched);
+#endif
+	ww_ctx->batched = false;
+	spin_unlock(&ww_ctx->my_class->lock);
+}
+EXPORT_SYMBOL_GPL(ww_acquire_batch_end);
+#endif
+
+int  ww_mutex_trylock(struct ww_mutex *ww)
+{
+	bool locked;
+
+        spin_lock(&ww->my_class->lock);
+	locked = __ww_mutex_trylock(ww, NULL, NULL);
+	spin_unlock(&ww->my_class->lock);
+	if (locked)
+		lock_acquire(&ww->base.dep_map, 0, 1, 0, 1, NULL, _RET_IP_);
+
+	return locked;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_trylock);
+
+#endif
author	Thomas Hellstrom <thellstrom@vmware.com>	2018-04-11 09:33:05 +0200
committer	Thomas Hellstrom <thellstrom@vmware.com>	2018-04-11 09:33:05 +0200
commit	eebaa7f86212d7ebab3c87aae1f9d68cade1b49e (patch)
tree	38b556f915ea612375202be6ec434f9f75c98b6a /ww_mutex.c