summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile13
-rw-r--r--README59
-rw-r--r--ww_mutex.c583
-rw-r--r--ww_mutex.h187
-rw-r--r--ww_mutex_test.c229
-rw-r--r--ww_mutex_test.h20
6 files changed, 1091 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..16f7b2f
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+wtest-y := ww_mutex_test.o ww_mutex.o
+
+obj-m += wtest.o
+
+all:
+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
+
+clean:
+ make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
+
+install:
+ cp wtest.ko /lib/modules/$(shell uname -r)/extra
+ depmod -a
diff --git a/README b/README
new file mode 100644
index 0000000..77a732f
--- /dev/null
+++ b/README
@@ -0,0 +1,59 @@
+The ww_mutex_test kernel module
+
+Compile using
+make
+make install
+
+Start a test sequence using
+sudo modprobe wtest
+
+After test sequence use
+sudo rmmod wtest
+
+The test sequence simulates a number of gpu command submissions taking
+ww_mutex_locks. The module maintains a number of global locks and each thread
+is required to lock a local number of locks that are randomly determined from
+the set of global locks. Typically threads will sometimes try to acquire
+the same lock and the ww mutex rollback will be initialized. Defines to
+tune this behaviour (ww_mutex_test.h):
+#define WWT_NUM_LOCKS 100000 /* Number of global locks *
+#define WWT_NUM_T_LOCKS 800 /* Number of locks per thread out of the global */
+#define WWT_NUM_THREADS 16 /* Number of locking threads to fire off */
+
+Each thread performs a number of simulated command submissions with the same
+locks. Each command submission consists of
+*) Taking the locks
+*) Busy-wait for a while (Mimicing time use to submit GPU commands)
+*) Releasing the locks.
+The busy wait makes it more difficult to judge how much time was used for
+the actual locking, but would on the other hand give more real-world-like
+results for the number of rollbacks. Related defines:
+#define WWT_NUM_SUB 10000 /* Number of command submissions */
+#define WWT_CS_UDELAY 000 /* Command submission udelay */
+
+The results can be wiewed as starting and ending time for each thread in
+"dmesg". Each thread also prints the number of rollbacks it had to do.
+There are two ways to have zero rollbacks: One is to fire off the threads
+sequentially in which case there will be no contention. The other one is to
+make sure there are no common locks between threads. Be careful with the latter
+option so that there are enough global locks to accommodate the requests for
+all threads. Otherwise module loading may lock up.
+Related defines:
+#define WWT_NO_SHARED /* No shared mutexes - No rollbacks */
+#define WWT_SEQUENTIAL /* Fire off locking threads sequentially */
+
+The module can either use the kernel built-in ww mutex implementation or a
+replacement drop-in implementation. The drop in replacement implements a
+choice of algorithms: Wait-Die and Wound-Wait. It's also possible to batch
+mutex locks and unlocks, significantly reducing the number of locked CPU cycles.
+Note that the drop-in replacement manipulates locking state under a class
+global spinlock instead of the builtin atomic operation manipulation. This
+is slightly slower in cases where the global spinlock is not contended, and
+significantly slower in cases where the global spinlock is contended, but
+it allows for batching locks and unlocks in a single global spinlock
+critical section.
+
+Related defines:
+#define WW_BUILTIN /* Use kernel builtin ww mutexes */
+#define WW_WAITDIE true /* Use wait-die, not wound-wait */
+#define WW_BATCHING /* Batch locks and unlocks */
diff --git a/ww_mutex.c b/ww_mutex.c
new file mode 100644
index 0000000..6858949
--- /dev/null
+++ b/ww_mutex.c
@@ -0,0 +1,583 @@
+/*
+ * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance
+ *
+ * Original mutex implementation started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Wound/wait implementation:
+ * Copyright (C) 2013 Canonical Ltd.
+ *
+ * Wound/ wait drop-in, actual wound-wait semantics and batching:
+ * Copyright (C) 2016-2018 VMWare Inc.
+ */
+
+#include "ww_mutex.h"
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+
+#ifndef WW_BUILTIN
+
+#undef EXPORT_SYMBOL_GPL
+#define EXPORT_SYMBOL_GPL(_a)
+#define MUTEX_FLAGS 0x07
+
+#ifndef CONFIG_DEBUG_MUTEXES
+#define debug_ww_mutex_add_waiter(_a, _b, _c)
+#define debug_ww_mutex_wake_waiter(_a, _b)
+#define debug_ww_mutex_lock_common(_a, _b)
+#define mutex_remove_waiter(__lock, __waiter, __task) \
+ __list_del((__waiter)->list.prev, (__waiter)->list.next)
+#else
+static void debug_ww_mutex_add_waiter(struct ww_mutex *ww,
+ struct mutex_waiter *waiter,
+ struct task_struct *task)
+{
+ SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
+ task->blocked_on = waiter;
+}
+
+static void debug_ww_mutex_wake_waiter(struct ww_mutex *ww,
+ struct mutex_waiter *waiter)
+{
+ struct mutex *lock = &ww->base;
+
+ SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock));
+ DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+ DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
+ DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+}
+
+static void debug_ww_mutex_lock_common(struct ww_mutex *ww,
+ struct mutex_waiter *waiter)
+{
+ memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
+ waiter->magic = waiter;
+ INIT_LIST_HEAD(&waiter->list);
+}
+
+static void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+ struct task_struct *task)
+{
+ DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+ DEBUG_LOCKS_WARN_ON(waiter->task != task);
+ DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
+ task->blocked_on = NULL;
+
+ list_del_init(&waiter->list);
+ waiter->task = NULL;
+}
+#endif
+
+
+/**
+ * ww_acquire_class_lock - Lock the global class spinlock unless batching
+ *
+ * @ww: The ww mutex.
+ * @ww_ctx: The acquire context.
+ *
+ * Take the global class spinlock unless we're batching in which case the
+ * global class spinlock was taken during batch begin.
+ */
+static void ww_acquire_class_lock(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ if (!ww_ctx || !ww_ctx->batched)
+ spin_lock(&ww->my_class->lock);
+}
+
+
+/**
+ * ww_acquire_class_unlock - Unlock the global class spinlock unless batching
+ *
+ * @ww: The ww mutex.
+ * @ww_ctx: The acquire context.
+ *
+ * Free the global class spinlock unless we're batching in which case the
+ * global class spinlock is freed at batch end.
+ */
+static void ww_acquire_class_unlock(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ if (!ww_ctx || !ww_ctx->batched)
+ spin_unlock(&ww->my_class->lock);
+}
+
+static bool __mutex_waiter_is_first(struct mutex *lock,
+ struct mutex_waiter *waiter)
+{
+ return list_first_entry(&lock->wait_list, struct mutex_waiter, list) ==
+ waiter;
+}
+
+
+/**
+ * __ww_mutex_trylock - Trylock a ww_mutex
+ *
+ * @ww: The mutex to trylock
+ * @ww_ctx: The acquire_ctx to register as locker or NULL.
+ * @waiter: The waiter if a waiter is trying to lock, or NULL.
+ * Return: true if lock succeeded. false otherwise.
+ */
+static bool __ww_mutex_trylock(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx,
+ struct mutex_waiter *waiter)
+{
+ struct mutex *lock = &ww->base;
+
+ lockdep_assert_held(&ww->my_class->lock);
+
+ if (atomic_long_read(&lock->owner))
+ return false;
+
+ /*
+ * No lock stealing for now. If there are waiters, only the first
+ * waiter is allowed to lock.
+ */
+ if (!list_empty(&lock->wait_list) &&
+ !__mutex_waiter_is_first(lock, waiter))
+ return false;
+
+ atomic_long_set(&lock->owner, (unsigned long) current);
+ ww->ctx = ww_ctx;
+ if (ww_ctx)
+ ww_ctx->acquired++;
+
+ return true;
+}
+
+static bool
+__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
+{
+ return a->stamp - b->stamp <= LONG_MAX &&
+ (a->stamp != b->stamp || a > b);
+}
+
+static struct task_struct *__owner_task(unsigned long owner)
+{
+ return (struct task_struct *)(owner & ~MUTEX_FLAGS);
+}
+
+
+/**
+ * ww_mutex_waiter_backoff - Whether to backoff if younger
+ *
+ * @us: Our acquire_ctx.
+ * @other: other waiter or lock owner acquire_ctx.
+ * Return: Whether to back off for another waiter or lock owner if we're
+ * younger than the lock owner or other waiter.
+ */
+static bool ww_mutex_waiter_backoff(struct ww_acquire_ctx *us,
+ struct ww_acquire_ctx *other)
+{
+ return (us->my_class->is_wait_die || us->wounded) && us->acquired > 0 &&
+ !other->done_acquire;
+}
+
+
+/**
+ * ww_mutex_backoff - Whether to backoff
+ *
+ * @us: Our acquire_ctx.
+ * @other: other waiter or lock owner acquire_ctx.
+ * Return: Whether to back off for another waiter or lock owner.
+ */
+static bool ww_mutex_backoff(struct ww_acquire_ctx *us,
+ struct ww_acquire_ctx *other)
+{
+ return other && ww_mutex_waiter_backoff(us, other) &&
+ us != other && __ww_ctx_stamp_after(us, other);
+}
+
+
+/**
+ * ww_mutex_lock_backoff - Check backoff for lock owner and all other waiters.
+ *
+ * @us: Our acquire_ctx.
+ * @ww: The ww_mutex considered.
+ * Return: Whether to back off for any other waiters or lock owner.
+ */
+static bool ww_mutex_lock_backoff(struct ww_acquire_ctx *us,
+ struct ww_mutex *ww)
+{
+ struct mutex_waiter *cur;
+
+ if (!us)
+ return false;
+
+ /*
+ * Wounded contexts lazy-preempt before first wounded lock wait, so that
+ * we have a lock to wait on after backoff.
+ */
+ if (us->wounded)
+ return true;
+
+ /* Backoff for lock owner? */
+ if (ww_mutex_backoff(us, ww->ctx))
+ return true;
+
+ /* Backoff for other waiters? */
+ list_for_each_entry(cur, &ww->base.wait_list, list) {
+ if (ww_mutex_backoff(us, cur->ww_ctx))
+ return true;
+ }
+
+ return false;
+}
+
+static int
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+ struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex *lock = &ww->base;
+ struct mutex_waiter *cur;
+ struct list_head *pos;
+ bool is_wait_die = ww->my_class->is_wait_die;
+
+ waiter->task = current;
+ waiter->ww_ctx = ww_ctx;
+
+ if (!ww_ctx) {
+ list_add_tail(&waiter->list, &lock->wait_list);
+ return 0;
+ }
+
+ /*
+ * Add the waiter before the first waiter with a higher stamp.
+ * Waiters without a context are skipped to avoid starving
+ * them.
+ */
+ pos = &lock->wait_list;
+ list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+ if (!cur->ww_ctx)
+ continue;
+
+ /* Early backoff for other waiter */
+ if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+ if (ww_mutex_waiter_backoff(ww_ctx, cur->ww_ctx))
+ return -EDEADLK;
+
+ break;
+ }
+
+ pos = &cur->list;
+
+ /*
+ * Other waiter needs to backoff for us.
+ * Wake up the waiter so that it gets a chance to back
+ * off.
+ */
+ if (ww_mutex_waiter_backoff(cur->ww_ctx, ww_ctx)) {
+ debug_ww_mutex_wake_waiter(ww, cur);
+ wake_up_process(cur->task);
+ }
+ }
+
+ list_add_tail(&waiter->list, pos);
+
+ /* Need to wound the lock owner? */
+ if (!is_wait_die && ww->ctx && __ww_ctx_stamp_after(ww->ctx, ww_ctx) &&
+ ww_ctx->acquired > 0){
+ ww->ctx->wounded = true;
+
+ /*
+ * Wake up the lock owner in case it's sleeping on
+ * another ww_mutex..
+ */
+ wake_up_process(__owner_task(atomic_long_read(&lock->owner)));
+ }
+
+ return 0;
+}
+
+
+/**
+ * ww_mutex_wake_first_waiter - Wake first waiter if any.
+ *
+ * @ww: The ww_mutex on which to wake first waiter.
+ */
+static void ww_mutex_wake_first_waiter(struct ww_mutex *ww)
+{
+ struct mutex_waiter *cur;
+ struct mutex *lock = &ww->base;
+
+ if (!list_empty(&lock->wait_list)) {
+ cur = list_first_entry(&lock->wait_list, struct mutex_waiter,
+ list);
+ debug_ww_mutex_wake_waiter(ww, cur);
+ wake_up_process(cur->task);
+ }
+}
+
+static int __ww_mutex_lock(struct ww_mutex *ww, long state,
+ unsigned int subclass,
+ struct lockdep_map *nest_lock,
+ unsigned long ip,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex_waiter waiter;
+ int ret = 0;
+
+ lockdep_assert_held(&ww->my_class->lock);
+
+ if (ww_ctx) {
+ /*
+ * If we've backed off when wounded, there are no more
+ * acquired locks, and we can clear the wounded flag.
+ */
+ if (ww_ctx->acquired == 0)
+ ww_ctx->wounded = false;
+
+ if (ww->ctx == ww_ctx) {
+ ret = -EALREADY;
+ goto err_early_backoff;
+ }
+ }
+
+ if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
+ goto skip_wait;
+
+ debug_ww_mutex_lock_common(ww, &waiter);
+ debug_ww_mutex_add_waiter(ww, &waiter, current);
+
+ lock_contended(&ww->base.dep_map, ip);
+
+ ret = __ww_mutex_add_waiter(&waiter, ww, ww_ctx);
+ if (ret)
+ goto err_early_backoff;
+
+ set_current_state(state);
+ for (;;) {
+ if (__ww_mutex_trylock(ww, ww_ctx, &waiter))
+ break;
+
+ if (unlikely(signal_pending_state(state, current))) {
+ ret = -EINTR;
+ goto err;
+ }
+
+ if (ww_mutex_lock_backoff(ww_ctx, ww)) {
+ ret = -EDEADLK;
+ goto err;
+ }
+
+ spin_unlock(&ww->my_class->lock);
+ schedule();
+ spin_lock(&ww->my_class->lock);
+ set_current_state(state);
+ }
+ set_current_state(TASK_RUNNING);
+ mutex_remove_waiter(&ww->base, &waiter, current);
+skip_wait:
+ lock_acquired(&ww->base.dep_map, ip);
+ return 0;
+
+err:
+ set_current_state(TASK_RUNNING);
+ mutex_remove_waiter(&ww->base, &waiter, current);
+ ww_mutex_wake_first_waiter(ww);
+err_early_backoff:
+ lock_release(&ww->base.dep_map, !!ww_ctx, ip);
+ return ret;
+}
+
+static void __ww_mutex_unlock(struct ww_mutex *ww, unsigned long ip)
+{
+ struct mutex *lock = &ww->base;
+
+ lockdep_assert_held(&ww->my_class->lock);
+ lock_release(&lock->dep_map, !!ww->ctx, ip);
+ if (ww->ctx)
+ ww->ctx->acquired--;
+ ww->ctx = NULL;
+ atomic_long_set(&lock->owner, 0);
+ ww_mutex_wake_first_waiter(ww);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static int ww_mutex_deadlock_injection(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+ unsigned tmp;
+
+ if (ww_ctx->deadlock_inject_countdown-- == 0) {
+ tmp = ww_ctx->deadlock_inject_interval;
+ if (tmp > UINT_MAX/4)
+ tmp = UINT_MAX;
+ else
+ tmp = tmp*2 + tmp + tmp/2;
+
+ ww_ctx->deadlock_inject_interval = tmp;
+ ww_ctx->deadlock_inject_countdown = tmp;
+
+ ww_mutex_unlock(ww);
+
+ return -EDEADLK;
+ }
+
+ return 0;
+#endif /* CONFIG_DEBUG_WW_MUTEX_SLOWPATH */
+}
+
+/**
+ * ww_class_lock_annotate - lockdep annotation at locking time.
+ *
+ * @ww: The ww_mutex
+ * @ww_ctx: The acquire ctx or NULL
+ * @ip: The caller stack
+ */
+static void ww_class_lock_annotate(struct ww_mutex *ww,
+ struct ww_acquire_ctx *ww_ctx,
+ unsigned long ip)
+{
+ if (!ww_ctx || !ww_ctx->batched) {
+ /* Annotate wait lock before the spinlock. */
+ lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
+ ww_ctx ? &ww_ctx->dep_map : NULL, ip);
+ } else {
+ /*
+ * OK, We'd like to annotate ww trylocks under the class
+ * spinlock, but since each trylock becomes a lockdep level,
+ * we'd quickly run out of levels. And we can't annotate
+ * nested trylocks, since apparently lockdep can't cope
+ * with that. So cheat lockdep and fake a class spinlock
+ * release and annotate a wating ww lock...
+ */
+ lock_release(&ww->my_class->lock.dep_map, 0, ip);
+ lock_acquire(&ww->base.dep_map, 0, 0, 0, 1,
+ &ww_ctx->dep_map, ip);
+ lock_acquire(&ww->my_class->lock.dep_map, 0, 1, 0, 1, NULL, ip);
+ }
+}
+
+int
+ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
+ ww_acquire_class_lock(ww, ww_ctx);
+ ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE,
+ 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
+ ww_ctx);
+ ww_acquire_class_unlock(ww, ww_ctx);
+ if (!ret && ww_ctx && ww_ctx->acquired > 1)
+ return ww_mutex_deadlock_injection(ww, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
+
+
+int
+ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ ww_class_lock_annotate(ww, ww_ctx, _RET_IP_);
+ ww_acquire_class_lock(ww, ww_ctx);
+ ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE,
+ 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_,
+ ww_ctx);
+ ww_acquire_class_unlock(ww, ww_ctx);
+ if (!ret && ww_ctx && ww_ctx->acquired > 1)
+ return ww_mutex_deadlock_injection(ww, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
+
+#else /* CONFIG_DEBUG_LOCK_ALLOC */
+
+int
+ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ ww_acquire_class_lock(ww, ww_ctx);
+ ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
+ ww_acquire_class_unlock(ww, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
+
+
+int
+ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+ int ret;
+
+ ww_acquire_class_lock(ww, ww_ctx);
+ ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx);
+ ww_acquire_class_unlock(ww, ww_ctx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
+
+#endif /* CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * ww_mutex_unlock_batched - Replacement to be used for batched unlock
+ *
+ * @ww: The mutex to unlock
+ */
+void
+ww_mutex_unlock_batched(struct ww_mutex *ww)
+{
+
+ __ww_mutex_unlock(ww, _RET_IP_);
+}
+EXPORT_SYMBOL_GPL(ww_mutex_unlock_batched);
+
+void
+ww_mutex_unlock(struct ww_mutex *ww)
+{
+ struct ww_acquire_ctx *ww_ctx = ww->ctx;
+
+ ww_acquire_class_lock(ww, ww_ctx);
+ __ww_mutex_unlock(ww, _RET_IP_);
+ ww_acquire_class_unlock(ww, ww_ctx);
+}
+EXPORT_SYMBOL_GPL(ww_mutex_unlock);
+
+#ifdef WW_BATCHING
+void ww_acquire_batch_begin(struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+ WARN_ON(ww_ctx->batched);
+#endif
+ spin_lock(&ww_ctx->my_class->lock);
+ ww_ctx->batched = true;
+}
+EXPORT_SYMBOL_GPL(ww_acquire_batch_begin);
+
+void ww_acquire_batch_end(struct ww_acquire_ctx *ww_ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+ WARN_ON(!ww_ctx->batched);
+#endif
+ ww_ctx->batched = false;
+ spin_unlock(&ww_ctx->my_class->lock);
+}
+EXPORT_SYMBOL_GPL(ww_acquire_batch_end);
+#endif
+
+int ww_mutex_trylock(struct ww_mutex *ww)
+{
+ bool locked;
+
+ spin_lock(&ww->my_class->lock);
+ locked = __ww_mutex_trylock(ww, NULL, NULL);
+ spin_unlock(&ww->my_class->lock);
+ if (locked)
+ lock_acquire(&ww->base.dep_map, 0, 1, 0, 1, NULL, _RET_IP_);
+
+ return locked;
+}
+EXPORT_SYMBOL_GPL(ww_mutex_trylock);
+
+#endif
diff --git a/ww_mutex.h b/ww_mutex.h
new file mode 100644
index 0000000..177f107
--- /dev/null
+++ b/ww_mutex.h
@@ -0,0 +1,187 @@
+/*
+ * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance
+ *
+ * Original mutex implementation started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Wound/wait implementation:
+ * Copyright (C) 2013 Canonical Ltd.
+ *
+ * Wound/ wait drop-in, actual wound-wait semantics and batching:
+ * Copyright (C) 2016-2018 VMWare Inc.
+ *
+ * This file contains the main data structure and API definitions.
+ */
+
+#ifndef _WW_MUTEX_H_
+#define _WW_MUTEX_H_
+
+#include "ww_mutex_test.h"
+#include <linux/kernel.h>
+#include <asm/atomic.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+
+struct ww_class {
+ spinlock_t lock;
+ atomic_long_t stamp;
+ struct lock_class_key acquire_key;
+ struct lock_class_key mutex_key;
+ const char *acquire_name;
+ const char *mutex_name;
+ bool is_wait_die;
+};
+
+struct ww_acquire_ctx {
+ struct ww_class *my_class;
+ unsigned long stamp;
+ unsigned long acquired;
+ u32 batched : 1;
+ u32 wounded : 1;
+ u32 done_acquire : 1;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+ struct lockdep_map batch_map;
+#endif
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+ unsigned deadlock_inject_interval;
+ unsigned deadlock_inject_countdown;
+#endif
+};
+
+struct ww_mutex {
+ struct mutex base;
+ struct ww_class *my_class;
+ wait_queue_head_t queue;
+ struct ww_acquire_ctx *ctx;
+};
+
+#define __WW_CLASS_INITIALIZER(ww_class) { \
+ .lock = __SPIN_LOCK_UNLOCKED(ww_class.lock), \
+ .stamp = ATOMIC_LONG_INIT(0), \
+ .acquire_name = #ww_class "_acquire", \
+ .mutex_name = #ww_class "_mutex", \
+ .is_wait_die = WW_WAITDIE, \
+}
+
+#define DEFINE_WW_CLASS(classname) \
+ struct ww_class classname = __WW_CLASS_INITIALIZER(classname)
+
+#ifdef WW_BATCHING
+void ww_acquire_batch_begin(struct ww_acquire_ctx *ctx);
+void ww_acquire_batch_end(struct ww_acquire_ctx *ctx);
+#else
+#define ww_acquire_batch_begin(_a)
+#define ww_acquire_batch_end(_a)
+#endif
+
+static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
+ struct ww_class *ww_class)
+{
+ ctx->my_class = ww_class;
+ ctx->stamp = atomic_long_inc_return(&ww_class->stamp);
+ ctx->acquired = 0L;
+ ctx->batched = false;
+ ctx->wounded = false;
+ ctx->done_acquire = 0;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_init_map(&ctx->dep_map, ww_class->acquire_name,
+ &ww_class->acquire_key, 0);
+ lockdep_init_map(&ctx->batch_map, ww_class->mutex_name,
+ &ww_class->mutex_key, 0);
+ lock_acquire(&ctx->dep_map, 0, 0, 0, 1, NULL, _RET_IP_);
+ lock_acquired(&ctx->dep_map, _RET_IP_);
+#endif
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+ ctx->deadlock_inject_interval = 1L;
+ ctx->deadlock_inject_countdown = ctx->stamp & 0x0f;
+#endif
+}
+
+/**
+ * ww_acquire_done - marks the end of the acquire phase
+ * @ctx: the acquire context
+ *
+ * Marks the end of the acquire phase, any further w/w mutex lock calls using
+ * this context are forbidden.
+ *
+ * Calling this function is optional, it is just useful to document w/w mutex
+ * code and clearly designated the acquire phase from actually using the locked
+ * data structures.
+ */
+static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
+{
+#ifdef CONFIG_DEBUG_MUTEXES
+ lockdep_assert_held(ctx);
+
+ WARN_ON(ctx->batched);
+ DEBUG_LOCKS_WARN_ON(ctx->done_acquire);
+#endif
+ ctx->done_acquire = 1;
+}
+
+/**
+ * ww_acquire_fini - releases a w/w acquire context
+ * @ctx: the acquire context to free
+ *
+ * Releases a w/w acquire context. This must be called _after_ all acquired w/w
+ * mutexes have been released with ww_mutex_unlock.
+ */
+static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lock_release(&ctx->dep_map, 0, _THIS_IP_);
+#endif
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(ctx->acquired);
+ if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
+ /*
+ * lockdep will normally handle this,
+ * but fail without anyway
+ */
+ ctx->done_acquire = 1;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC))
+ /* ensure ww_acquire_fini will still fail if called twice */
+ ctx->acquired = ~0U;
+#endif
+}
+
+
+static inline void ww_mutex_init(struct ww_mutex *ww,
+ struct ww_class *ww_class)
+{
+ ww->my_class = ww_class;
+ ww->ctx = NULL;
+ init_waitqueue_head(&ww->queue);
+ mutex_init(&ww->base);
+}
+
+static inline bool ww_mutex_is_locked(struct ww_mutex *ww)
+{
+ return mutex_is_locked(&ww->base);
+}
+
+int ww_mutex_trylock(struct ww_mutex *lock);
+
+int ww_mutex_lock_interruptible(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ctx);
+
+#define ww_mutex_lock_slow_interruptible ww_mutex_lock_interruptible
+
+int ww_mutex_lock(struct ww_mutex *lock,
+ struct ww_acquire_ctx *ctx);
+
+#define ww_mutex_lock_slow ww_mutex_lock
+
+void ww_mutex_unlock(struct ww_mutex *lock);
+
+static inline void ww_mutex_destroy(struct ww_mutex *lock)
+{
+ mutex_destroy(&lock->base);
+}
+
+
+#endif
diff --git a/ww_mutex_test.c b/ww_mutex_test.c
new file mode 100644
index 0000000..8ebfdc9
--- /dev/null
+++ b/ww_mutex_test.c
@@ -0,0 +1,229 @@
+/*
+ * Wound/Wait Mutex testing module.
+ * Copyright (C) 2016-2018 VMWare Inc.
+ */
+
+#include "ww_mutex_test.h"
+
+#ifdef WW_BUILTIN
+#include <linux/ww_mutex.h>
+#define ww_acquire_batch_begin(_a)
+#define ww_acquire_batch_end(_a)
+#else
+#include "ww_mutex.h"
+#endif
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/delay.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Hellström");
+MODULE_DESCRIPTION("Test module for ww_mutex performance");
+MODULE_VERSION("1.0.0");
+
+struct ww_mutex_item {
+ struct ww_mutex mutex;
+ /* Align to a typical cache line */
+ u8 pad[64 - sizeof(struct ww_mutex) % 64];
+};
+
+struct wwt_device {
+ struct ww_mutex_item *locks;
+ int *used;
+ int num_locks;
+};
+
+struct wwt_thread {
+ struct wwt_device *dev;
+ struct ww_mutex **p_locks;
+ u8 *used;
+ int num_locks;
+ int thread;
+ u32 num_rollbacks;
+ struct work_struct work;
+};
+
+
+
+
+static struct wwt_device g_dev;
+static struct wwt_thread threads[WWT_NUM_THREADS];
+static DEFINE_WW_CLASS(wwt_class);
+
+static void wwt_thread_rollback(struct wwt_thread *t, int upto)
+{
+ while(upto--)
+ ww_mutex_unlock(t->p_locks[upto]);
+ t->num_rollbacks++;
+}
+
+static int wwt_thread_lock_sequence(struct wwt_thread *t)
+{
+ struct ww_acquire_ctx ctx;
+ int ret = 0;
+ int i;
+
+ ww_acquire_init(&ctx, &wwt_class);
+ ww_acquire_batch_begin(&ctx);
+restart:
+ for (i = 0; i < t->num_locks; ++i) {
+ ret = ww_mutex_lock(t->p_locks[i], &ctx);
+ if (ret) {
+ WARN_ON(i == 0);
+ wwt_thread_rollback(t, i);
+ if (ret == -EDEADLK) {
+ swap(t->p_locks[0], t->p_locks[i]);
+ goto restart;
+ } else
+ goto out_err;
+ }
+ }
+ ww_acquire_batch_end(&ctx);
+ ww_acquire_done(&ctx);
+
+ udelay(WWT_CS_UDELAY);
+
+ ww_acquire_batch_begin(&ctx);
+ wwt_thread_rollback(t, t->num_locks);
+ t->num_rollbacks--;
+out_err:
+ ww_acquire_batch_end(&ctx);
+ ww_acquire_fini(&ctx);
+ return ret;
+}
+
+static void wwt_lock_work(struct work_struct *work) {
+ struct wwt_thread *t = container_of(work, struct wwt_thread, work);
+ int i, ret;
+
+ printk(KERN_INFO "thread %d: Starting work.\n", t->thread);
+ for (i = 0; i < WWT_NUM_SUB; ++i) {
+ ret = wwt_thread_lock_sequence(t);
+ if (ret)
+ break;
+ }
+
+ printk(KERN_INFO "thread %d; Fininshed with %u rollbacks. ret is %d\n",
+ t->thread, t->num_rollbacks, ret);
+}
+
+
+static int wwt_thread_setup(struct wwt_thread *t, int thread)
+{
+ struct wwt_device *dev = &g_dev;
+ u32 idx;
+ int i;
+
+ t->dev = dev;
+ t->p_locks = vmalloc(WWT_NUM_T_LOCKS * sizeof(*t->p_locks));
+ t->num_rollbacks = 0;
+ t->thread = thread;
+ INIT_WORK(&t->work, wwt_lock_work);
+#ifndef WWT_NO_SHARED
+ memset(dev->used, 0, dev->num_locks * sizeof(*dev->used));
+#endif
+ if (!t->p_locks)
+ return -ENOMEM;
+
+ for (i = 0; i < WWT_NUM_T_LOCKS; ++i) {
+ get_random_bytes(&idx, sizeof(idx));
+ idx %= WWT_NUM_LOCKS;
+ while(dev->used[idx]) {
+ idx++;
+ idx %= WWT_NUM_LOCKS;
+ }
+ dev->used[idx] = 1;
+ t->p_locks[i] = &dev->locks[idx].mutex;
+ }
+ t->num_locks = WWT_NUM_T_LOCKS;
+
+ return 0;
+}
+
+static void wwt_thread_destroy(struct wwt_thread *t)
+{
+ cancel_work_sync(&t->work);
+ if (t->p_locks)
+ vfree(t->p_locks);
+}
+
+static void wwt_device_fini(struct wwt_device *wdev)
+{
+ int i;
+
+ if (wdev->used)
+ vfree(wdev->used);
+
+ for (i = 0; i < ARRAY_SIZE(threads); ++i)
+ wwt_thread_destroy(&threads[i]);
+
+ for (i = 0; i < wdev->num_locks; ++i)
+ ww_mutex_destroy(&wdev->locks[i].mutex);
+
+ vfree(wdev->locks);
+ wdev->num_locks = 0;
+}
+
+
+static int wwt_device_init(struct wwt_device *wdev, int num_locks)
+{
+ int i, ret;
+
+ wdev->num_locks = 0;
+ wdev->locks = vzalloc(num_locks * sizeof(*wdev->locks));
+ if (!wdev->locks)
+ return -ENOMEM;
+
+ wdev->used = vzalloc(num_locks * sizeof(*wdev->used));
+ if (!wdev->locks)
+ goto out_thread_setup;
+
+ for (i = 0; i < num_locks; ++i)
+ ww_mutex_init(&wdev->locks[i].mutex, &wwt_class);
+
+ wdev->num_locks = num_locks;
+
+ printk(KERN_INFO "Allocated %d mutexes. Alignment is %lu %lu\n",
+ num_locks, ((unsigned long) wdev->locks) % 64,
+ sizeof(struct ww_mutex));
+
+ for (i = 0; i < ARRAY_SIZE(threads); ++i) {
+ ret = wwt_thread_setup(&threads[i], i);
+ if (ret)
+ goto out_thread_setup;
+ }
+
+ printk(KERN_INFO "Set up %lu threads\n", ARRAY_SIZE(threads));
+ usleep_range(10000, 20000);
+ for (i = 0; i < ARRAY_SIZE(threads); ++i) {
+#ifndef WWT_SEQUENTIAL
+ queue_work(system_unbound_wq, &threads[i].work);
+#else
+ wwt_lock_work(&threads[i].work);
+#endif
+ }
+
+ return 0;
+
+out_thread_setup:
+ wwt_device_fini(&g_dev);
+ return ret;
+}
+
+static int __init wwt_init(void) {
+
+ printk(KERN_INFO "Hello, World!\n");
+ return wwt_device_init(&g_dev, WWT_NUM_LOCKS);
+}
+
+static void __exit wwt_exit(void) {
+ wwt_device_fini(&g_dev);
+ printk(KERN_INFO "Goodbye, World!\n");
+}
+
+module_init(wwt_init);
+module_exit(wwt_exit);
diff --git a/ww_mutex_test.h b/ww_mutex_test.h
new file mode 100644
index 0000000..8911ea7
--- /dev/null
+++ b/ww_mutex_test.h
@@ -0,0 +1,20 @@
+/*
+ * Wound/Wait Mutex testing module.
+ * Copyright (C) 2016-2018 VMWare Inc.
+ */
+
+#ifndef _WW_MUTEX_TEST_H_
+#define _WW_MUTEX_TEST_H_
+
+#undef WW_BUILTIN /* Use kernel builtin ww mutexes */
+#define WW_WAITDIE true /* Use wait-die, not wound-wait */
+#define WW_BATCHING /* Batch locks and unlocks */
+#define WWT_NUM_LOCKS 100000 /* Number of global locks */
+#define WWT_NUM_T_LOCKS 800 /* Number of locks per thread out of the global */
+#define WWT_NUM_THREADS 16 /* Number of locking threads to fire off */
+#undef WWT_SEQUENTIAL /* Fire off locking threads sequentially */
+#define WWT_CS_UDELAY 000 /* Command submission udelay */
+#define WWT_NUM_SUB 10000 /* Number of command submissions */
+#undef WWT_NO_SHARED /* No shared mutexes - No rollbacks */
+
+#endif