Initial commit.HEAD master

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
author: Thomas Hellstrom <thellstrom@vmware.com> 2018-04-11 09:33:05 +0200
committer: Thomas Hellstrom <thellstrom@vmware.com> 2018-04-11 09:33:05 +0200
commit: eebaa7f86212d7ebab3c87aae1f9d68cade1b49e (patch)
tree: 38b556f915ea612375202be6ec434f9f75c98b6a
6 files changed, 1091 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..16f7b2f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+wtest-y := ww_mutex_test.o  ww_mutex.o
+
+obj-m += wtest.o
+
+all:
+	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
+
+clean:
+	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
+
+install:
+	cp wtest.ko /lib/modules/$(shell uname -r)/extra
+	depmod -a
diff --git a/README b/README
new file mode 100644
index 0000000..77a732f
--- /dev/null
+++ b/README
@@ -0,0 +1,59 @@
+The ww_mutex_test kernel module
+
+Compile using
+make
+make install
+
+Start a test sequence using
+sudo modprobe wtest
+
+After test sequence use
+sudo rmmod wtest
+
+The test sequence simulates a number of gpu command submissions taking
+ww_mutex_locks. The module maintains a number of global locks and each thread
+is required to lock a local number of locks that are randomly determined from
+the set of global locks. Typically threads will sometimes try to acquire
+the same lock and the ww mutex rollback will be initialized. Defines to
+tune this behaviour (ww_mutex_test.h):
+#define WWT_NUM_LOCKS 100000 /* Number of global locks *
+#define WWT_NUM_T_LOCKS 800  /* Number of locks per thread out of the global */
+#define WWT_NUM_THREADS 16   /* Number of locking threads to fire off */
+
+Each thread performs a number of simulated command submissions with the same
+locks. Each command submission consists of
+*) Taking the locks
+*) Busy-wait for a while (Mimicing time use to submit GPU commands)
+*) Releasing the locks.
+The busy wait makes it more difficult to judge how much time was used for
+the actual locking, but would on the other hand give more real-world-like
+results for the number of rollbacks. Related defines:
+#define WWT_NUM_SUB 10000    /* Number of command submissions */
+#define WWT_CS_UDELAY 000    /* Command submission udelay */
+
+The results can be wiewed as starting and ending time for each thread in
+"dmesg". Each thread also prints the number of rollbacks it had to do.
+There are two ways to have zero rollbacks: One is to fire off the threads
+sequentially in which case there will be no contention. The other one is to
+make sure there are no common locks between threads. Be careful with the latter
+option so that there are enough global locks to accommodate the requests for
+all threads. Otherwise module loading may lock up.
+Related defines:
+#define WWT_NO_SHARED         /* No shared mutexes - No rollbacks */
+#define WWT_SEQUENTIAL        /* Fire off locking threads sequentially */
+
+The module can either use the kernel built-in ww mutex implementation or a
+replacement drop-in implementation. The drop in replacement implements a
+choice of algorithms: Wait-Die and Wound-Wait. It's also possible to batch
+mutex locks and unlocks, significantly reducing the number of locked CPU cycles.
+Note that the drop-in replacement manipulates locking state under a class
+global spinlock instead of the builtin atomic operation manipulation. This
+is slightly slower in cases where the global spinlock is not contended, and
+significantly slower in cases where the global spinlock is contended, but
+it allows for batching locks and unlocks in a single global spinlock
+critical section.
+
+Related defines:
+#define WW_BUILTIN            /* Use kernel builtin ww mutexes */
+#define WW_WAITDIE true       /* Use wait-die, not wound-wait */
+#define WW_BATCHING           /* Batch locks and unlocks */
diff --git a/ww_mutex.c b/ww_mutex.c
new file mode 100644
index 0000000..6858949
--- /dev/null
+++ b/ww_mutex.c
@@ -0,0 +1,583 @@
+/*
+ * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance
+ *
+ * Original mutex implementation started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Wound/wait implementation:
+ *  Copyright (C) 2013 Canonical Ltd.
+ *
+ * Wound/ wait drop-in, actual wound-wait semantics and batching:
+ *  Copyright (C) 2016-2018 VMWare Inc.
+ */
+
+#include "ww_mutex.h"
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+
+#ifndef WW_BUILTIN
+
+#undef EXPORT_SYMBOL_GPL
+#define EXPORT_SYMBOL_GPL(_a)
+#define MUTEX_FLAGS		0x07
+
+#ifndef CONFIG_DEBUG_MUTEXES
+#define debug_ww_mutex_add_waiter(_a, _b, _c)
+#define debug_ww_mutex_wake_waiter(_a, _b)
+#define debug_ww_mutex_lock_common(_a, _b)
+#define mutex_remove_waiter(__lock, __waiter, __task) \
+	__list_del((__waiter)->list.prev, (__waiter)->list.next)
+#else
+static void debug_ww_mutex_add_waiter(struct ww_mutex *ww,
+				      struct mutex_waiter *waiter,
+				      struct task_struct *task)
+{
+	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
+	task->blocked_on = waiter;
+}
+
+static void debug_ww_mutex_wake_waiter(struct ww_mutex *ww,
+				       struct mutex_waiter *waiter)
+{
+	struct mutex *lock = &ww->base;
+
+	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
+	DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+	DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
+	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+}
+
+static void debug_ww_mutex_lock_common(struct ww_mutex *ww,
+				       struct mutex_waiter *waiter)
+{
+	memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
+	waiter->magic = waiter;
+	INIT_LIST_HEAD(&waiter->list);
+}
+
+static void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+				struct task_struct *task)
+{
+	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+	DEBUG_LOCKS_WARN_ON(waiter->task != task);
+	DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
+	task->blocked_on = NULL;
+
+	list_del_init(&waiter->list);
+	waiter->task = NULL;
+}
+#endif
+
+
+/**
+ * ww_acquire_class_lock - Lock the global class spinlock unless batching
+ *
+ * @ww: The ww mutex.
+ * @ww_ctx: The acquire context.
+ *
+ * Take the global class spinlock unless we're batching in which case the
+ * global class spinlock was taken during batch begin.
+ */
+static void ww_acquire_class_lock(struct ww_mutex *ww,
+				  struct ww_acquire_ctx *ww_ctx)
+{
+	if (!ww_ctx || !ww_ctx->batched)
+		spin_lock(&ww->my_class->lock);
+}
+
+
+/**
+ * ww_acquire_class_unlock - Unlock the global class spinlock unless batching
+ *
+ * @ww: The ww mutex.
+ * @ww_ctx: The acquire context.
+ *
+ * Free the global class spinlock unless we're batching in which case the
+ * global class spinlock is freed at batch end.
+ */
+static void ww_acquire_class_unlock(struct ww_mutex *ww,
+				    struct ww_acquire_ctx *ww_ctx)
+{
+	if (!ww_ctx || !ww_ctx->batched)
+		spin_unlock(&ww->my_class->lock);
+}
+
+static bool __mutex_waiter_is_first(struct mutex *lock,
+				    struct mutex_waiter *waiter)
+{
+	return list_first_entry(&lock->wait_list, struct mutex_waiter, list) ==
+		waiter;
+}
+
+
+/**
+ * __ww_mutex_trylock - Trylock a ww_mutex
+ *
+ * @ww: The mutex to trylock
+ * @ww_ctx: The acquire_ctx to register as locker or NULL.
+ * @waiter: The waiter if a waiter is trying to lock, or NULL.
+ * Return: true if lock succeeded. false otherwise.
+ */
+static bool __ww_mutex_trylock(struct ww_mutex *ww,
+			       struct ww_acquire_ctx *ww_ctx,
+			       struct mutex_waiter *waiter)
+{
+	struct mutex *lock = &ww->base;
+
+	lockdep_assert_held(&ww->my_class->lock);
+
+	if (atomic_long_read(&lock->owner))
+		return false;
+
+	/*
+	 * No lock stealing for now. If there are waiters, only the first
+	 * waiter is allowed to lock.
+	 */
+	if (!list_empty(&lock->wait_list) &&
+	    !__mutex_waiter_is_first(lock, waiter))
+		return false;
+
+	atomic_long_set(&lock->owner, (unsigned long) current);
+	ww->ctx = ww_ctx;
+	if (ww_ctx)
+		ww_ctx->acquired++;
+
+	return true;
+}
+
+static bool
+__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
+{
+	return a->stamp - b->stamp <= LONG_MAX &&
+	       (a->stamp != b->stamp || a > b);
+}
+
+static struct task_struct *__owner_task(unsigned long owner)
+{
+	return (struct task_struct *)(owner & ~MUTEX_FLAGS);
+}
+
+
+/**
+ * ww_mutex_waiter_backoff - Whether to backoff if younger
+ *
+ * @us: Our acquire_ctx.
+ * @other: other waiter or lock owner acquire_ctx.
+ * Return: Whether to back off for another waiter or lock owner if we're
+ * younger than the lock owner or other waiter.
+ */
+static bool ww_mutex_waiter_backoff(struct ww_acquire_ctx *us,
+				    struct ww_acquire_ctx *other)
+{
+  return (us->my_class->is_wait_die || us->wounded) && us->acquired > 0 &&
+	  !other->done_acquire;
+}
+
+
+/**
+ * ww_mutex_backoff - Whether to backoff
+ *
+ * @us: Our acquire_ctx.
+ * @other: other waiter or lock owner acquire_ctx.
+ * Return: Whether to back off for another waiter or lock owner.
+ */
+static bool ww_mutex_backoff(struct ww_acquire_ctx *us,
+			     struct ww_acquire_ctx *other)
+{
+	return other && ww_mutex_waiter_backoff(us, other) &&
+		us != other && __ww_ctx_stamp_after(us, other);
+}
+
+
+/**
+ * ww_mutex_lock_backoff - Check backoff for lock owner and all other waiters.
+ *
+ * @us: Our acquire_ctx.
+ * @ww: The ww_mutex considered.
+ * Return: Whether to back off for any other waiters or lock owner.
+ */
+static bool ww_mutex_lock_backoff(struct ww_acquire_ctx *us,
+				  struct ww_mutex *ww)
+{
+	struct mutex_waiter *cur;
+
+	if (!us)
+		return false;
+
+	/*
+	 * Wounded contexts lazy-preempt before first wounded lock wait, so that
+	 * we have a lock to wait on after backoff.
+	 */
+	if (us->wounded)
+		return true;
+
+	/* Backoff for lock owner? */
+	if (ww_mutex_backoff(us, ww->ctx))
+		return true;
+
+	/* Backoff for other waiters? */
+	list_for_each_entry(cur, &ww->base.wait_list, list) {
+		if (ww_mutex_backoff(us, cur->ww_ctx))
+			return true;
+	}
+
+	return false;
+}
+
+static int
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+		      struct ww_mutex *ww,
+		      struct ww_acquire_ctx *ww_ctx)
+{
+	struct mutex *lock = &ww->base;
+	struct mutex_waiter *cur;
+	struct list_head *pos;
+	bool is_wait_die = ww->my_class->is_wait_die;
+
+	waiter->task = current;
+	waiter->ww_ctx = ww_ctx;
+
+	if (!ww_ctx) {
+		list_add_tail(&waiter->list, &lock->wait_list);
+		return 0;
+	}
+
+	/*
+	 * Add the waiter before the first waiter with a higher stamp.
+	 * Waiters without a context are skipped to avoid starving
+	 * them.
+	 */
+	pos = &lock->wait_list;
+	list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+		if (!cur->ww_ctx)
+			continue;
+
+		/* Early backoff for other waiter */
+		if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+			if (ww_mutex_waiter_backoff(ww_ctx, cur->ww_ctx))
+				return -EDEADLK;
+
+			break;
+		}
+
+		pos = &cur->list;
+
+		/*
+		 * Other waiter needs to backoff for us.
+		 * Wake up the waiter so that it gets a chance to back
+		 * off.
+		 */
+		if (ww_mutex_waiter_backoff(cur->ww_ctx, ww_ctx)) {
+			debug_ww_mutex_wake_waiter(ww, cur);
+			wake_up_process(cur->task);
+		}
+	}
+
+	list_add_tail(&waiter->list, pos);
+
+	/* Need to wound the lock owner? */
+	if (!is_wait_die && ww->ctx && __ww_ctx_stamp_after(ww->ctx, ww_ctx) &&
+	    ww_ctx->acquired > 0){
+		ww->ctx->wounded = true;
+
+		/*
+		 * Wake up the lock owner in case it's sleeping on
+		 * another ww_mutex..
+		 */
+		wake_up_process(__owner_task(atomic_long_read(&lock->owner)));
+	}
+
+	return 0;
+}
+
+
+/**
+ * ww_mutex_wake_first_waiter - Wake first waiter if any.
+ *
+ * @ww: The ww_mutex on which to wake first waiter.
+ */
+static void ww_mutex_wake_first_waiter(struct ww_mutex *ww)
+{
+	struct mutex_waiter *cur;
+	struct mutex *lock = &ww->base;
+
+	if (!list_empty(&lock->wait_list)) {
+		cur = list_first_entry(&lock->wait_list, struct mutex_waiter,
+				       list);
+		debug_ww_mutex_wake_waiter(ww, cur);
+		wake_up_process(cur->task);
+	}
+}
+
+static int __ww_mutex_lock(struct ww_mutex *ww, long state,
+			   unsigned int subclass,
+			   struct lockdep_map *nest_lock,
+			   unsigned long ip,
+			   struct ww_acquire_ctx *ww_ctx)
+{
+	struct mutex_waiter waiter;
+	int ret = 0;
+
+	lockdep_assert_held(&ww->my_class->lock);
+
+	if (ww_ctx) {
+		/*
+		 * If we've backed off when wounded, there are no more
+		 * acquired locks, and we can clear the wounded flag.
+		 */
+		if (ww_ctx->acquired == 0)
+			ww_ctx->wounded = false;
+
+		if (ww->ctx == ww_ctx) {
+			ret = -EALREADY;
+			goto err_early_backoff;
+		}
+	}
+
+	if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
+		goto skip_wait;
+
+	debug_ww_mutex_lock_common(ww, &waiter);
+	debug_ww_mutex_add_waiter(ww, &waiter, current);
+
+	lock_contended(&ww->base.dep_map, ip);
+
+	ret = __ww_mutex_add_waiter(&waiter, ww, ww_ctx);
+	if (ret)
+		goto err_early_backoff;
+
+	set_current_state(state);
+	for (;;) {
+		if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
+			break;
+
+		if (unlikely(signal_pending_state(state, current))) {
+			ret = -EINTR;
+			goto err;
+		}
+
+		if (ww_mutex_lock_backoff(ww_ctx, ww)) {
+			ret = -EDEADLK;
+			goto err;
+		}
+
+		spin_unlock(&ww->my_class->lock);
+		schedule();
+		spin_lock(&ww->my_class->lock);
+		set_current_state(state);
+	}
+	set_current_state(TASK_RUNNING);
+	mutex_remove_waiter(&ww->base, &waiter, current);
+skip_wait:
+	lock_acquired(&ww->base.dep_map, ip);
+	return 0;
+
+err:
+	set_current_state(TASK_RUNNING);
+	mutex_remove_waiter(&ww->base, &waiter, current);
+	ww_mutex_wake_first_waiter(ww);
+err_early_backoff:
+	lock_release(&ww->base.dep_map, !!ww_ctx, ip);
+	return ret;
+}
+
+static void __ww_mutex_unlock(struct ww_mutex *ww, unsigned long ip)
+{
+	struct mutex *lock = &ww->base;
+
+	lockdep_assert_held(&ww->my_class->lock);
+	lock_release(&lock->dep_map, !!ww->ctx, ip);
+	if (ww->ctx)
+		ww->ctx->acquired--;
+	ww->ctx = NULL;
+	atomic_long_set(&lock->owner, 0);
+	ww_mutex_wake_first_waiter(ww);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static int ww_mutex_deadlock_injection(struct ww_mutex *ww,
+				       struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+	unsigned tmp;
+
+	if (ww_ctx->deadlock_inject_countdown-- == 0) {
+		tmp = ww_ctx->deadlock_inject_interval;
+		if (tmp > UINT_MAX/4)
+			tmp = UINT_MAX;
+		else
+			tmp = tmp*2 + tmp + tmp/2;
+
+		ww_ctx->deadlock_inject_interval = tmp;
+		ww_ctx->deadlock_inject_countdown = tmp;
+
+		ww_mutex_unlock(ww);
+
+		return -EDEADLK;
+	}
+
+	return 0;
+#endif /* CONFIG_DEBUG_WW_MUTEX_SLOWPATH */
+}
+
+/**
+ * ww_class_lock_annotate - lockdep annotation at locking time.
+ *
+ * @ww: The ww_mutex
+ * @ww_ctx: The acquire ctx or NULL
+ * @ip: The caller stack
+ */
+static void  ww_class_lock_annotate(struct ww_mutex *ww,
+					   struct ww_acquire_ctx *ww_ctx,
+					   unsigned long ip)
+{
+	if (!ww_ctx || !ww_ctx->batched) {
+		/* Annotate wait lock before the spinlock. */
+		lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
+			     ww_ctx ? &ww_ctx->dep_map : NULL, ip);
+	} else {
+		/*
+		 * OK, We'd like to annotate ww trylocks under the class
+		 * spinlock, but since each trylock becomes a lockdep level,
+		 * we'd quickly run out of levels. And we can't annotate
+		 * nested trylocks, since apparently lockdep can't cope
+		 * with that. So cheat lockdep and fake a class spinlock
+		 * release and annotate a wating ww lock...
+		 */
+		lock_release(&ww->my_class->lock.dep_map, 0, ip);
+		lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
+			     &ww_ctx->dep_map, ip);
+		lock_acquire(&ww->my_class->lock.dep_map, 0, 1, 0, 1, NULL, ip);
+	}
+}
+
+int
+ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	int ret;
+
+	ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
+	ww_acquire_class_lock(ww, ww_ctx);
+	ret =  __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE,
+			       0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
+			       ww_ctx);
+	ww_acquire_class_unlock(ww, ww_ctx);
+	if (!ret && ww_ctx && ww_ctx->acquired > 1)
+		return ww_mutex_deadlock_injection(ww, ww_ctx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
+
+
+int
+ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	int ret;
+
+	ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
+	ww_acquire_class_lock(ww, ww_ctx);
+	ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE,
+			      0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
+			      ww_ctx);
+	ww_acquire_class_unlock(ww, ww_ctx);
+	if (!ret && ww_ctx && ww_ctx->acquired > 1)
+		return ww_mutex_deadlock_injection(ww, ww_ctx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
+
+#else /* CONFIG_DEBUG_LOCK_ALLOC */
+
+int
+ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	int ret;
+
+	ww_acquire_class_lock(ww, ww_ctx);
+	ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
+	ww_acquire_class_unlock(ww, ww_ctx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
+
+
+int
+ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	int ret;
+
+	ww_acquire_class_lock(ww, ww_ctx);
+	ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
+	ww_acquire_class_unlock(ww, ww_ctx);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
+
+#endif  /* CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * ww_mutex_unlock_batched - Replacement to be used for batched unlock
+ *
+ * @ww: The mutex to unlock
+ */
+void
+ww_mutex_unlock_batched(struct ww_mutex *ww)
+{
+
+	__ww_mutex_unlock(ww, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(ww_mutex_unlock_batched);
+
+void
+ww_mutex_unlock(struct ww_mutex *ww)
+{
+	struct ww_acquire_ctx *ww_ctx = ww->ctx;
+
+	ww_acquire_class_lock(ww, ww_ctx);
+	__ww_mutex_unlock(ww, _RET_IP_);
+	ww_acquire_class_unlock(ww, ww_ctx);
+}
+EXPORT_SYMBOL_GPL(ww_mutex_unlock);
+
+#ifdef WW_BATCHING
+void  ww_acquire_batch_begin(struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+	WARN_ON(ww_ctx->batched);
+#endif
+	spin_lock(&ww_ctx->my_class->lock);
+	ww_ctx->batched = true;
+}
+EXPORT_SYMBOL_GPL(ww_acquire_batch_begin);
+
+void  ww_acquire_batch_end(struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+	WARN_ON(!ww_ctx->batched);
+#endif
+	ww_ctx->batched = false;
+	spin_unlock(&ww_ctx->my_class->lock);
+}
+EXPORT_SYMBOL_GPL(ww_acquire_batch_end);
+#endif
+
+int  ww_mutex_trylock(struct ww_mutex *ww)
+{
+	bool locked;
+
+        spin_lock(&ww->my_class->lock);
+	locked = __ww_mutex_trylock(ww, NULL, NULL);
+	spin_unlock(&ww->my_class->lock);
+	if (locked)
+		lock_acquire(&ww->base.dep_map, 0, 1, 0, 1, NULL, _RET_IP_);
+
+	return locked;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_trylock);
+
+#endif
diff --git a/ww_mutex.h b/ww_mutex.h
new file mode 100644
index 0000000..177f107
--- /dev/null
+++ b/ww_mutex.h
@@ -0,0 +1,187 @@
+/*
+ * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance
+ *
+ * Original mutex implementation started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Wound/wait implementation:
+ *  Copyright (C) 2013 Canonical Ltd.
+ *
+ * Wound/ wait drop-in, actual wound-wait semantics and batching:
+ *  Copyright (C) 2016-2018 VMWare Inc.
+ *
+ * This file contains the main data structure and API definitions.
+ */
+
+#ifndef _WW_MUTEX_H_
+#define _WW_MUTEX_H_
+
+#include "ww_mutex_test.h"
+#include <linux/kernel.h>
+#include <asm/atomic.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+
+struct ww_class {
+	spinlock_t lock;
+	atomic_long_t stamp;
+	struct lock_class_key acquire_key;
+	struct lock_class_key mutex_key;
+	const char *acquire_name;
+	const char *mutex_name;
+	bool is_wait_die;
+};
+
+struct ww_acquire_ctx {
+	struct ww_class *my_class;
+	unsigned long stamp;
+	unsigned long acquired;
+	u32 batched : 1;
+	u32 wounded : 1;
+	u32 done_acquire : 1;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+	struct lockdep_map batch_map;
+#endif
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+	unsigned deadlock_inject_interval;
+	unsigned deadlock_inject_countdown;
+#endif
+};
+
+struct ww_mutex {
+	struct mutex base;
+	struct ww_class *my_class;
+	wait_queue_head_t queue;
+	struct ww_acquire_ctx *ctx;
+};
+
+#define __WW_CLASS_INITIALIZER(ww_class) {				\
+		.lock = __SPIN_LOCK_UNLOCKED(ww_class.lock),		\
+			.stamp = ATOMIC_LONG_INIT(0),			\
+			.acquire_name = #ww_class "_acquire",		\
+			.mutex_name = #ww_class "_mutex",		\
+			.is_wait_die = WW_WAITDIE,			\
+}
+
+#define DEFINE_WW_CLASS(classname)			\
+	struct ww_class classname = __WW_CLASS_INITIALIZER(classname)
+
+#ifdef WW_BATCHING
+void ww_acquire_batch_begin(struct ww_acquire_ctx *ctx);
+void ww_acquire_batch_end(struct ww_acquire_ctx *ctx);
+#else
+#define ww_acquire_batch_begin(_a)
+#define ww_acquire_batch_end(_a)
+#endif
+
+static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
+				   struct ww_class *ww_class)
+{
+	ctx->my_class = ww_class;
+	ctx->stamp = atomic_long_inc_return(&ww_class->stamp);
+	ctx->acquired = 0L;
+	ctx->batched = false;
+	ctx->wounded = false;
+	ctx->done_acquire = 0;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	lockdep_init_map(&ctx->dep_map, ww_class->acquire_name,
+			 &ww_class->acquire_key, 0);
+	lockdep_init_map(&ctx->batch_map, ww_class->mutex_name,
+			 &ww_class->mutex_key, 0);
+	lock_acquire(&ctx->dep_map, 0, 0, 0, 1, NULL, _RET_IP_);
+	lock_acquired(&ctx->dep_map, _RET_IP_);
+#endif
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+	ctx->deadlock_inject_interval = 1L;
+	ctx->deadlock_inject_countdown = ctx->stamp & 0x0f;
+#endif
+}
+
+/**
+ * ww_acquire_done - marks the end of the acquire phase
+ * @ctx: the acquire context
+ *
+ * Marks the end of the acquire phase, any further w/w mutex lock calls using
+ * this context are forbidden.
+ *
+ * Calling this function is optional, it is just useful to document w/w mutex
+ * code and clearly designated the acquire phase from actually using the locked
+ * data structures.
+ */
+static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+	lockdep_assert_held(ctx);
+
+	WARN_ON(ctx->batched);
+	DEBUG_LOCKS_WARN_ON(ctx->done_acquire);
+#endif
+	ctx->done_acquire = 1;
+}
+
+/**
+ * ww_acquire_fini - releases a w/w acquire context
+ * @ctx: the acquire context to free
+ *
+ * Releases a w/w acquire context. This must be called _after_ all acquired w/w
+ * mutexes have been released with ww_mutex_unlock.
+ */
+static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	lock_release(&ctx->dep_map, 0, _THIS_IP_);
+#endif
+#ifdef CONFIG_DEBUG_MUTEXES
+	DEBUG_LOCKS_WARN_ON(ctx->acquired);
+	if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
+		/*
+		 * lockdep will normally handle this,
+		 * but fail without anyway
+		 */
+		ctx->done_acquire = 1;
+
+	if (!IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC))
+		/* ensure ww_acquire_fini will still fail if called twice */
+		ctx->acquired = ~0U;
+#endif
+}
+
+
+static inline void ww_mutex_init(struct ww_mutex *ww,
+				 struct ww_class *ww_class)
+{
+  ww->my_class = ww_class;
+  ww->ctx = NULL;
+  init_waitqueue_head(&ww->queue);
+  mutex_init(&ww->base);
+}
+
+static inline bool ww_mutex_is_locked(struct ww_mutex *ww)
+{
+	return mutex_is_locked(&ww->base);
+}
+
+int ww_mutex_trylock(struct ww_mutex *lock);
+
+int ww_mutex_lock_interruptible(struct ww_mutex *lock,
+				struct ww_acquire_ctx *ctx);
+
+#define ww_mutex_lock_slow_interruptible ww_mutex_lock_interruptible
+
+int ww_mutex_lock(struct ww_mutex *lock,
+		  struct ww_acquire_ctx *ctx);
+
+#define ww_mutex_lock_slow ww_mutex_lock
+
+void ww_mutex_unlock(struct ww_mutex *lock);
+
+static inline void ww_mutex_destroy(struct ww_mutex *lock)
+{
+	mutex_destroy(&lock->base);
+}
+
+
+#endif
diff --git a/ww_mutex_test.c b/ww_mutex_test.c
new file mode 100644
index 0000000..8ebfdc9
--- /dev/null
+++ b/ww_mutex_test.c
@@ -0,0 +1,229 @@
+/*
+ * Wound/Wait Mutex testing module.
+ *  Copyright (C) 2016-2018 VMWare Inc.
+ */
+
+#include "ww_mutex_test.h"
+
+#ifdef WW_BUILTIN
+#include <linux/ww_mutex.h>
+#define ww_acquire_batch_begin(_a)
+#define ww_acquire_batch_end(_a)
+#else
+#include "ww_mutex.h"
+#endif
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/delay.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Hellström");
+MODULE_DESCRIPTION("Test module for ww_mutex performance");
+MODULE_VERSION("1.0.0");
+
+struct ww_mutex_item  {
+	struct ww_mutex mutex;
+	/* Align to a typical cache line */
+	u8 pad[64 - sizeof(struct ww_mutex) % 64];
+};
+
+struct wwt_device {
+	struct ww_mutex_item *locks;
+	int *used;
+	int num_locks;
+};
+
+struct wwt_thread {
+	struct wwt_device *dev;
+	struct ww_mutex **p_locks;
+	u8 *used;
+	int num_locks;
+	int thread;
+	u32 num_rollbacks;
+	struct work_struct work;
+};
+
+
+
+
+static struct wwt_device g_dev;
+static struct wwt_thread threads[WWT_NUM_THREADS];
+static DEFINE_WW_CLASS(wwt_class);
+
+static void wwt_thread_rollback(struct wwt_thread *t, int upto)
+{
+	while(upto--)
+		ww_mutex_unlock(t->p_locks[upto]);
+	t->num_rollbacks++;
+}
+
+static int wwt_thread_lock_sequence(struct wwt_thread *t)
+{
+	struct ww_acquire_ctx ctx;
+	int ret = 0;
+	int i;
+
+	ww_acquire_init(&ctx, &wwt_class);
+	ww_acquire_batch_begin(&ctx);
+restart:
+	for (i = 0; i < t->num_locks; ++i) {
+		ret = ww_mutex_lock(t->p_locks[i], &ctx);
+		if (ret) {
+			WARN_ON(i == 0);
+			wwt_thread_rollback(t, i);
+			if (ret == -EDEADLK) {
+				swap(t->p_locks[0], t->p_locks[i]);
+				goto restart;
+			} else
+				goto out_err;
+		}
+	}
+	ww_acquire_batch_end(&ctx);
+	ww_acquire_done(&ctx);
+
+	udelay(WWT_CS_UDELAY);
+
+	ww_acquire_batch_begin(&ctx);
+	wwt_thread_rollback(t, t->num_locks);
+	t->num_rollbacks--;
+out_err:
+	ww_acquire_batch_end(&ctx);
+	ww_acquire_fini(&ctx);
+	return ret;
+}
+
+static void wwt_lock_work(struct work_struct *work) {
+	struct wwt_thread *t = container_of(work, struct wwt_thread, work);
+	int i, ret;
+
+	printk(KERN_INFO "thread %d: Starting work.\n", t->thread);
+	for (i = 0; i < WWT_NUM_SUB; ++i) {
+		ret = wwt_thread_lock_sequence(t);
+		if (ret)
+			break;
+	}
+
+	printk(KERN_INFO "thread %d; Fininshed with %u rollbacks. ret is %d\n",
+	       t->thread, t->num_rollbacks, ret);
+}
+
+
+static int wwt_thread_setup(struct wwt_thread *t, int thread)
+{
+	struct wwt_device *dev = &g_dev;
+	u32 idx;
+	int i;
+
+	t->dev = dev;
+	t->p_locks = vmalloc(WWT_NUM_T_LOCKS * sizeof(*t->p_locks));
+	t->num_rollbacks = 0;
+	t->thread = thread;
+	INIT_WORK(&t->work, wwt_lock_work);
+#ifndef WWT_NO_SHARED
+	memset(dev->used, 0, dev->num_locks * sizeof(*dev->used));
+#endif
+	if (!t->p_locks)
+		return -ENOMEM;
+
+	for (i = 0; i < WWT_NUM_T_LOCKS; ++i) {
+		get_random_bytes(&idx, sizeof(idx));
+		idx %= WWT_NUM_LOCKS;
+		while(dev->used[idx]) {
+			idx++;
+			idx %= WWT_NUM_LOCKS;
+		}
+		dev->used[idx] = 1;
+		t->p_locks[i] = &dev->locks[idx].mutex;
+	}
+	t->num_locks = WWT_NUM_T_LOCKS;
+
+	return 0;
+}
+
+static void wwt_thread_destroy(struct wwt_thread *t)
+{
+	cancel_work_sync(&t->work);
+	if (t->p_locks)
+		vfree(t->p_locks);
+}
+
+static void wwt_device_fini(struct wwt_device *wdev)
+{
+	int i;
+
+	if (wdev->used)
+		vfree(wdev->used);
+
+	for (i = 0; i < ARRAY_SIZE(threads); ++i)
+		wwt_thread_destroy(&threads[i]);
+
+	for (i = 0; i < wdev->num_locks; ++i)
+		ww_mutex_destroy(&wdev->locks[i].mutex);
+
+	vfree(wdev->locks);
+	wdev->num_locks = 0;
+}
+
+
+static int wwt_device_init(struct wwt_device *wdev, int num_locks)
+{
+	int i, ret;
+
+	wdev->num_locks = 0;
+	wdev->locks = vzalloc(num_locks * sizeof(*wdev->locks));
+	if (!wdev->locks)
+		return -ENOMEM;
+
+	wdev->used = vzalloc(num_locks * sizeof(*wdev->used));
+	if (!wdev->locks)
+		goto out_thread_setup;
+
+	for (i = 0; i < num_locks; ++i)
+		ww_mutex_init(&wdev->locks[i].mutex, &wwt_class);
+
+	wdev->num_locks = num_locks;
+
+	printk(KERN_INFO "Allocated %d mutexes. Alignment is %lu %lu\n",
+	       num_locks, ((unsigned long) wdev->locks) % 64,
+	       sizeof(struct ww_mutex));
+
+	for (i = 0; i < ARRAY_SIZE(threads); ++i) {
+		ret = wwt_thread_setup(&threads[i], i);
+		if (ret)
+			goto out_thread_setup;
+	}
+
+	printk(KERN_INFO "Set up %lu threads\n", ARRAY_SIZE(threads));
+	usleep_range(10000, 20000);
+	for (i = 0; i < ARRAY_SIZE(threads); ++i) {
+#ifndef WWT_SEQUENTIAL
+	  queue_work(system_unbound_wq, &threads[i].work);
+#else
+	  wwt_lock_work(&threads[i].work);
+#endif
+	}
+
+	return 0;
+
+out_thread_setup:
+	wwt_device_fini(&g_dev);
+	return ret;
+}
+
+static int __init wwt_init(void) {
+
+	printk(KERN_INFO "Hello, World!\n");
+	return wwt_device_init(&g_dev, WWT_NUM_LOCKS);
+}
+
+static void __exit wwt_exit(void) {
+	wwt_device_fini(&g_dev);
+	printk(KERN_INFO "Goodbye, World!\n");
+}
+
+module_init(wwt_init);
+module_exit(wwt_exit);
diff --git a/ww_mutex_test.h b/ww_mutex_test.h
new file mode 100644
index 0000000..8911ea7
--- /dev/null
+++ b/ww_mutex_test.h
@@ -0,0 +1,20 @@
+/*
+ * Wound/Wait Mutex testing module.
+ *  Copyright (C) 2016-2018 VMWare Inc.
+ */
+
+#ifndef _WW_MUTEX_TEST_H_
+#define _WW_MUTEX_TEST_H_
+
+#undef WW_BUILTIN            /* Use kernel builtin ww mutexes */
+#define WW_WAITDIE true      /* Use wait-die, not wound-wait */
+#define WW_BATCHING          /* Batch locks and unlocks */
+#define WWT_NUM_LOCKS 100000  /* Number of global locks */
+#define WWT_NUM_T_LOCKS 800  /* Number of locks per thread out of the global */
+#define WWT_NUM_THREADS 16    /* Number of locking threads to fire off */
+#undef WWT_SEQUENTIAL        /* Fire off locking threads sequentially */
+#define WWT_CS_UDELAY 000    /* Command submission udelay */
+#define WWT_NUM_SUB 10000    /* Number of command submissions */
+#undef WWT_NO_SHARED         /* No shared mutexes - No rollbacks */
+
+#endif
author	Thomas Hellstrom <thellstrom@vmware.com>	2018-04-11 09:33:05 +0200
committer	Thomas Hellstrom <thellstrom@vmware.com>	2018-04-11 09:33:05 +0200
commit	eebaa7f86212d7ebab3c87aae1f9d68cade1b49e (patch)
tree	38b556f915ea612375202be6ec434f9f75c98b6a