diff options
-rw-r--r-- | Makefile | 13 | ||||
-rw-r--r-- | README | 59 | ||||
-rw-r--r-- | ww_mutex.c | 583 | ||||
-rw-r--r-- | ww_mutex.h | 187 | ||||
-rw-r--r-- | ww_mutex_test.c | 229 | ||||
-rw-r--r-- | ww_mutex_test.h | 20 |
6 files changed, 1091 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..16f7b2f --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +wtest-y := ww_mutex_test.o ww_mutex.o + +obj-m += wtest.o + +all: + make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules + +clean: + make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean + +install: + cp wtest.ko /lib/modules/$(shell uname -r)/extra + depmod -a @@ -0,0 +1,59 @@ +The ww_mutex_test kernel module + +Compile using +make +make install + +Start a test sequence using +sudo modprobe wtest + +After test sequence use +sudo rmmod wtest + +The test sequence simulates a number of gpu command submissions taking +ww_mutex_locks. The module maintains a number of global locks and each thread +is required to lock a local number of locks that are randomly determined from +the set of global locks. Typically threads will sometimes try to acquire +the same lock and the ww mutex rollback will be initialized. Defines to +tune this behaviour (ww_mutex_test.h): +#define WWT_NUM_LOCKS 100000 /* Number of global locks * +#define WWT_NUM_T_LOCKS 800 /* Number of locks per thread out of the global */ +#define WWT_NUM_THREADS 16 /* Number of locking threads to fire off */ + +Each thread performs a number of simulated command submissions with the same +locks. Each command submission consists of +*) Taking the locks +*) Busy-wait for a while (Mimicing time use to submit GPU commands) +*) Releasing the locks. +The busy wait makes it more difficult to judge how much time was used for +the actual locking, but would on the other hand give more real-world-like +results for the number of rollbacks. Related defines: +#define WWT_NUM_SUB 10000 /* Number of command submissions */ +#define WWT_CS_UDELAY 000 /* Command submission udelay */ + +The results can be wiewed as starting and ending time for each thread in +"dmesg". Each thread also prints the number of rollbacks it had to do. +There are two ways to have zero rollbacks: One is to fire off the threads +sequentially in which case there will be no contention. The other one is to +make sure there are no common locks between threads. Be careful with the latter +option so that there are enough global locks to accommodate the requests for +all threads. Otherwise module loading may lock up. +Related defines: +#define WWT_NO_SHARED /* No shared mutexes - No rollbacks */ +#define WWT_SEQUENTIAL /* Fire off locking threads sequentially */ + +The module can either use the kernel built-in ww mutex implementation or a +replacement drop-in implementation. The drop in replacement implements a +choice of algorithms: Wait-Die and Wound-Wait. It's also possible to batch +mutex locks and unlocks, significantly reducing the number of locked CPU cycles. +Note that the drop-in replacement manipulates locking state under a class +global spinlock instead of the builtin atomic operation manipulation. This +is slightly slower in cases where the global spinlock is not contended, and +significantly slower in cases where the global spinlock is contended, but +it allows for batching locks and unlocks in a single global spinlock +critical section. + +Related defines: +#define WW_BUILTIN /* Use kernel builtin ww mutexes */ +#define WW_WAITDIE true /* Use wait-die, not wound-wait */ +#define WW_BATCHING /* Batch locks and unlocks */ diff --git a/ww_mutex.c b/ww_mutex.c new file mode 100644 index 0000000..6858949 --- /dev/null +++ b/ww_mutex.c @@ -0,0 +1,583 @@ +/* + * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance + * + * Original mutex implementation started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * Wound/wait implementation: + * Copyright (C) 2013 Canonical Ltd. + * + * Wound/ wait drop-in, actual wound-wait semantics and batching: + * Copyright (C) 2016-2018 VMWare Inc. + */ + +#include "ww_mutex.h" +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> + +#ifndef WW_BUILTIN + +#undef EXPORT_SYMBOL_GPL +#define EXPORT_SYMBOL_GPL(_a) +#define MUTEX_FLAGS 0x07 + +#ifndef CONFIG_DEBUG_MUTEXES +#define debug_ww_mutex_add_waiter(_a, _b, _c) +#define debug_ww_mutex_wake_waiter(_a, _b) +#define debug_ww_mutex_lock_common(_a, _b) +#define mutex_remove_waiter(__lock, __waiter, __task) \ + __list_del((__waiter)->list.prev, (__waiter)->list.next) +#else +static void debug_ww_mutex_add_waiter(struct ww_mutex *ww, + struct mutex_waiter *waiter, + struct task_struct *task) +{ + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock)); + task->blocked_on = waiter; +} + +static void debug_ww_mutex_wake_waiter(struct ww_mutex *ww, + struct mutex_waiter *waiter) +{ + struct mutex *lock = &ww->base; + + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&ww->my_class->lock)); + DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); + DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); +} + +static void debug_ww_mutex_lock_common(struct ww_mutex *ww, + struct mutex_waiter *waiter) +{ + memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); + waiter->magic = waiter; + INIT_LIST_HEAD(&waiter->list); +} + +static void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct task_struct *task) +{ + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); + DEBUG_LOCKS_WARN_ON(waiter->task != task); + DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); + task->blocked_on = NULL; + + list_del_init(&waiter->list); + waiter->task = NULL; +} +#endif + + +/** + * ww_acquire_class_lock - Lock the global class spinlock unless batching + * + * @ww: The ww mutex. + * @ww_ctx: The acquire context. + * + * Take the global class spinlock unless we're batching in which case the + * global class spinlock was taken during batch begin. + */ +static void ww_acquire_class_lock(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ + if (!ww_ctx || !ww_ctx->batched) + spin_lock(&ww->my_class->lock); +} + + +/** + * ww_acquire_class_unlock - Unlock the global class spinlock unless batching + * + * @ww: The ww mutex. + * @ww_ctx: The acquire context. + * + * Free the global class spinlock unless we're batching in which case the + * global class spinlock is freed at batch end. + */ +static void ww_acquire_class_unlock(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ + if (!ww_ctx || !ww_ctx->batched) + spin_unlock(&ww->my_class->lock); +} + +static bool __mutex_waiter_is_first(struct mutex *lock, + struct mutex_waiter *waiter) +{ + return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == + waiter; +} + + +/** + * __ww_mutex_trylock - Trylock a ww_mutex + * + * @ww: The mutex to trylock + * @ww_ctx: The acquire_ctx to register as locker or NULL. + * @waiter: The waiter if a waiter is trying to lock, or NULL. + * Return: true if lock succeeded. false otherwise. + */ +static bool __ww_mutex_trylock(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx, + struct mutex_waiter *waiter) +{ + struct mutex *lock = &ww->base; + + lockdep_assert_held(&ww->my_class->lock); + + if (atomic_long_read(&lock->owner)) + return false; + + /* + * No lock stealing for now. If there are waiters, only the first + * waiter is allowed to lock. + */ + if (!list_empty(&lock->wait_list) && + !__mutex_waiter_is_first(lock, waiter)) + return false; + + atomic_long_set(&lock->owner, (unsigned long) current); + ww->ctx = ww_ctx; + if (ww_ctx) + ww_ctx->acquired++; + + return true; +} + +static bool +__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) +{ + return a->stamp - b->stamp <= LONG_MAX && + (a->stamp != b->stamp || a > b); +} + +static struct task_struct *__owner_task(unsigned long owner) +{ + return (struct task_struct *)(owner & ~MUTEX_FLAGS); +} + + +/** + * ww_mutex_waiter_backoff - Whether to backoff if younger + * + * @us: Our acquire_ctx. + * @other: other waiter or lock owner acquire_ctx. + * Return: Whether to back off for another waiter or lock owner if we're + * younger than the lock owner or other waiter. + */ +static bool ww_mutex_waiter_backoff(struct ww_acquire_ctx *us, + struct ww_acquire_ctx *other) +{ + return (us->my_class->is_wait_die || us->wounded) && us->acquired > 0 && + !other->done_acquire; +} + + +/** + * ww_mutex_backoff - Whether to backoff + * + * @us: Our acquire_ctx. + * @other: other waiter or lock owner acquire_ctx. + * Return: Whether to back off for another waiter or lock owner. + */ +static bool ww_mutex_backoff(struct ww_acquire_ctx *us, + struct ww_acquire_ctx *other) +{ + return other && ww_mutex_waiter_backoff(us, other) && + us != other && __ww_ctx_stamp_after(us, other); +} + + +/** + * ww_mutex_lock_backoff - Check backoff for lock owner and all other waiters. + * + * @us: Our acquire_ctx. + * @ww: The ww_mutex considered. + * Return: Whether to back off for any other waiters or lock owner. + */ +static bool ww_mutex_lock_backoff(struct ww_acquire_ctx *us, + struct ww_mutex *ww) +{ + struct mutex_waiter *cur; + + if (!us) + return false; + + /* + * Wounded contexts lazy-preempt before first wounded lock wait, so that + * we have a lock to wait on after backoff. + */ + if (us->wounded) + return true; + + /* Backoff for lock owner? */ + if (ww_mutex_backoff(us, ww->ctx)) + return true; + + /* Backoff for other waiters? */ + list_for_each_entry(cur, &ww->base.wait_list, list) { + if (ww_mutex_backoff(us, cur->ww_ctx)) + return true; + } + + return false; +} + +static int +__ww_mutex_add_waiter(struct mutex_waiter *waiter, + struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ + struct mutex *lock = &ww->base; + struct mutex_waiter *cur; + struct list_head *pos; + bool is_wait_die = ww->my_class->is_wait_die; + + waiter->task = current; + waiter->ww_ctx = ww_ctx; + + if (!ww_ctx) { + list_add_tail(&waiter->list, &lock->wait_list); + return 0; + } + + /* + * Add the waiter before the first waiter with a higher stamp. + * Waiters without a context are skipped to avoid starving + * them. + */ + pos = &lock->wait_list; + list_for_each_entry_reverse(cur, &lock->wait_list, list) { + if (!cur->ww_ctx) + continue; + + /* Early backoff for other waiter */ + if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { + if (ww_mutex_waiter_backoff(ww_ctx, cur->ww_ctx)) + return -EDEADLK; + + break; + } + + pos = &cur->list; + + /* + * Other waiter needs to backoff for us. + * Wake up the waiter so that it gets a chance to back + * off. + */ + if (ww_mutex_waiter_backoff(cur->ww_ctx, ww_ctx)) { + debug_ww_mutex_wake_waiter(ww, cur); + wake_up_process(cur->task); + } + } + + list_add_tail(&waiter->list, pos); + + /* Need to wound the lock owner? */ + if (!is_wait_die && ww->ctx && __ww_ctx_stamp_after(ww->ctx, ww_ctx) && + ww_ctx->acquired > 0){ + ww->ctx->wounded = true; + + /* + * Wake up the lock owner in case it's sleeping on + * another ww_mutex.. + */ + wake_up_process(__owner_task(atomic_long_read(&lock->owner))); + } + + return 0; +} + + +/** + * ww_mutex_wake_first_waiter - Wake first waiter if any. + * + * @ww: The ww_mutex on which to wake first waiter. + */ +static void ww_mutex_wake_first_waiter(struct ww_mutex *ww) +{ + struct mutex_waiter *cur; + struct mutex *lock = &ww->base; + + if (!list_empty(&lock->wait_list)) { + cur = list_first_entry(&lock->wait_list, struct mutex_waiter, + list); + debug_ww_mutex_wake_waiter(ww, cur); + wake_up_process(cur->task); + } +} + +static int __ww_mutex_lock(struct ww_mutex *ww, long state, + unsigned int subclass, + struct lockdep_map *nest_lock, + unsigned long ip, + struct ww_acquire_ctx *ww_ctx) +{ + struct mutex_waiter waiter; + int ret = 0; + + lockdep_assert_held(&ww->my_class->lock); + + if (ww_ctx) { + /* + * If we've backed off when wounded, there are no more + * acquired locks, and we can clear the wounded flag. + */ + if (ww_ctx->acquired == 0) + ww_ctx->wounded = false; + + if (ww->ctx == ww_ctx) { + ret = -EALREADY; + goto err_early_backoff; + } + } + + if (__ww_mutex_trylock(ww, ww_ctx, &waiter)) + goto skip_wait; + + debug_ww_mutex_lock_common(ww, &waiter); + debug_ww_mutex_add_waiter(ww, &waiter, current); + + lock_contended(&ww->base.dep_map, ip); + + ret = __ww_mutex_add_waiter(&waiter, ww, ww_ctx); + if (ret) + goto err_early_backoff; + + set_current_state(state); + for (;;) { + if (__ww_mutex_trylock(ww, ww_ctx, &waiter)) + break; + + if (unlikely(signal_pending_state(state, current))) { + ret = -EINTR; + goto err; + } + + if (ww_mutex_lock_backoff(ww_ctx, ww)) { + ret = -EDEADLK; + goto err; + } + + spin_unlock(&ww->my_class->lock); + schedule(); + spin_lock(&ww->my_class->lock); + set_current_state(state); + } + set_current_state(TASK_RUNNING); + mutex_remove_waiter(&ww->base, &waiter, current); +skip_wait: + lock_acquired(&ww->base.dep_map, ip); + return 0; + +err: + set_current_state(TASK_RUNNING); + mutex_remove_waiter(&ww->base, &waiter, current); + ww_mutex_wake_first_waiter(ww); +err_early_backoff: + lock_release(&ww->base.dep_map, !!ww_ctx, ip); + return ret; +} + +static void __ww_mutex_unlock(struct ww_mutex *ww, unsigned long ip) +{ + struct mutex *lock = &ww->base; + + lockdep_assert_held(&ww->my_class->lock); + lock_release(&lock->dep_map, !!ww->ctx, ip); + if (ww->ctx) + ww->ctx->acquired--; + ww->ctx = NULL; + atomic_long_set(&lock->owner, 0); + ww_mutex_wake_first_waiter(ww); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static int ww_mutex_deadlock_injection(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + unsigned tmp; + + if (ww_ctx->deadlock_inject_countdown-- == 0) { + tmp = ww_ctx->deadlock_inject_interval; + if (tmp > UINT_MAX/4) + tmp = UINT_MAX; + else + tmp = tmp*2 + tmp + tmp/2; + + ww_ctx->deadlock_inject_interval = tmp; + ww_ctx->deadlock_inject_countdown = tmp; + + ww_mutex_unlock(ww); + + return -EDEADLK; + } + + return 0; +#endif /* CONFIG_DEBUG_WW_MUTEX_SLOWPATH */ +} + +/** + * ww_class_lock_annotate - lockdep annotation at locking time. + * + * @ww: The ww_mutex + * @ww_ctx: The acquire ctx or NULL + * @ip: The caller stack + */ +static void ww_class_lock_annotate(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx, + unsigned long ip) +{ + if (!ww_ctx || !ww_ctx->batched) { + /* Annotate wait lock before the spinlock. */ + lock_acquire(&ww->base.dep_map, 0, 0, 0, 1, + ww_ctx ? &ww_ctx->dep_map : NULL, ip); + } else { + /* + * OK, We'd like to annotate ww trylocks under the class + * spinlock, but since each trylock becomes a lockdep level, + * we'd quickly run out of levels. And we can't annotate + * nested trylocks, since apparently lockdep can't cope + * with that. So cheat lockdep and fake a class spinlock + * release and annotate a wating ww lock... + */ + lock_release(&ww->my_class->lock.dep_map, 0, ip); + lock_acquire(&ww->base.dep_map, 0, 0, 0, 1, + &ww_ctx->dep_map, ip); + lock_acquire(&ww->my_class->lock.dep_map, 0, 1, 0, 1, NULL, ip); + } +} + +int +ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + int ret; + + ww_class_lock_annotate(ww, ww_ctx, _RET_IP_); + ww_acquire_class_lock(ww, ww_ctx); + ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, + 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_, + ww_ctx); + ww_acquire_class_unlock(ww, ww_ctx); + if (!ret && ww_ctx && ww_ctx->acquired > 1) + return ww_mutex_deadlock_injection(ww, ww_ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock); + + +int +ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + int ret; + + ww_class_lock_annotate(ww, ww_ctx, _RET_IP_); + ww_acquire_class_lock(ww, ww_ctx); + ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, + 0, ww_ctx ? &ww_ctx->dep_map : NULL, _RET_IP_, + ww_ctx); + ww_acquire_class_unlock(ww, ww_ctx); + if (!ret && ww_ctx && ww_ctx->acquired > 1) + return ww_mutex_deadlock_injection(ww, ww_ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); + +#else /* CONFIG_DEBUG_LOCK_ALLOC */ + +int +ww_mutex_lock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + int ret; + + ww_acquire_class_lock(ww, ww_ctx); + ret = __ww_mutex_lock(ww, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx); + ww_acquire_class_unlock(ww, ww_ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock); + + +int +ww_mutex_lock_interruptible(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) +{ + int ret; + + ww_acquire_class_lock(ww, ww_ctx); + ret = __ww_mutex_lock(ww, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_, ww_ctx); + ww_acquire_class_unlock(ww, ww_ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); + +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ + +/** + * ww_mutex_unlock_batched - Replacement to be used for batched unlock + * + * @ww: The mutex to unlock + */ +void +ww_mutex_unlock_batched(struct ww_mutex *ww) +{ + + __ww_mutex_unlock(ww, _RET_IP_); +} +EXPORT_SYMBOL_GPL(ww_mutex_unlock_batched); + +void +ww_mutex_unlock(struct ww_mutex *ww) +{ + struct ww_acquire_ctx *ww_ctx = ww->ctx; + + ww_acquire_class_lock(ww, ww_ctx); + __ww_mutex_unlock(ww, _RET_IP_); + ww_acquire_class_unlock(ww, ww_ctx); +} +EXPORT_SYMBOL_GPL(ww_mutex_unlock); + +#ifdef WW_BATCHING +void ww_acquire_batch_begin(struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + WARN_ON(ww_ctx->batched); +#endif + spin_lock(&ww_ctx->my_class->lock); + ww_ctx->batched = true; +} +EXPORT_SYMBOL_GPL(ww_acquire_batch_begin); + +void ww_acquire_batch_end(struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + WARN_ON(!ww_ctx->batched); +#endif + ww_ctx->batched = false; + spin_unlock(&ww_ctx->my_class->lock); +} +EXPORT_SYMBOL_GPL(ww_acquire_batch_end); +#endif + +int ww_mutex_trylock(struct ww_mutex *ww) +{ + bool locked; + + spin_lock(&ww->my_class->lock); + locked = __ww_mutex_trylock(ww, NULL, NULL); + spin_unlock(&ww->my_class->lock); + if (locked) + lock_acquire(&ww->base.dep_map, 0, 1, 0, 1, NULL, _RET_IP_); + + return locked; +} +EXPORT_SYMBOL_GPL(ww_mutex_trylock); + +#endif diff --git a/ww_mutex.h b/ww_mutex.h new file mode 100644 index 0000000..177f107 --- /dev/null +++ b/ww_mutex.h @@ -0,0 +1,187 @@ +/* + * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance + * + * Original mutex implementation started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * Wound/wait implementation: + * Copyright (C) 2013 Canonical Ltd. + * + * Wound/ wait drop-in, actual wound-wait semantics and batching: + * Copyright (C) 2016-2018 VMWare Inc. + * + * This file contains the main data structure and API definitions. + */ + +#ifndef _WW_MUTEX_H_ +#define _WW_MUTEX_H_ + +#include "ww_mutex_test.h" +#include <linux/kernel.h> +#include <asm/atomic.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/spinlock.h> + +struct ww_class { + spinlock_t lock; + atomic_long_t stamp; + struct lock_class_key acquire_key; + struct lock_class_key mutex_key; + const char *acquire_name; + const char *mutex_name; + bool is_wait_die; +}; + +struct ww_acquire_ctx { + struct ww_class *my_class; + unsigned long stamp; + unsigned long acquired; + u32 batched : 1; + u32 wounded : 1; + u32 done_acquire : 1; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + struct lockdep_map batch_map; +#endif +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + unsigned deadlock_inject_interval; + unsigned deadlock_inject_countdown; +#endif +}; + +struct ww_mutex { + struct mutex base; + struct ww_class *my_class; + wait_queue_head_t queue; + struct ww_acquire_ctx *ctx; +}; + +#define __WW_CLASS_INITIALIZER(ww_class) { \ + .lock = __SPIN_LOCK_UNLOCKED(ww_class.lock), \ + .stamp = ATOMIC_LONG_INIT(0), \ + .acquire_name = #ww_class "_acquire", \ + .mutex_name = #ww_class "_mutex", \ + .is_wait_die = WW_WAITDIE, \ +} + +#define DEFINE_WW_CLASS(classname) \ + struct ww_class classname = __WW_CLASS_INITIALIZER(classname) + +#ifdef WW_BATCHING +void ww_acquire_batch_begin(struct ww_acquire_ctx *ctx); +void ww_acquire_batch_end(struct ww_acquire_ctx *ctx); +#else +#define ww_acquire_batch_begin(_a) +#define ww_acquire_batch_end(_a) +#endif + +static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, + struct ww_class *ww_class) +{ + ctx->my_class = ww_class; + ctx->stamp = atomic_long_inc_return(&ww_class->stamp); + ctx->acquired = 0L; + ctx->batched = false; + ctx->wounded = false; + ctx->done_acquire = 0; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, + &ww_class->acquire_key, 0); + lockdep_init_map(&ctx->batch_map, ww_class->mutex_name, + &ww_class->mutex_key, 0); + lock_acquire(&ctx->dep_map, 0, 0, 0, 1, NULL, _RET_IP_); + lock_acquired(&ctx->dep_map, _RET_IP_); +#endif +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + ctx->deadlock_inject_interval = 1L; + ctx->deadlock_inject_countdown = ctx->stamp & 0x0f; +#endif +} + +/** + * ww_acquire_done - marks the end of the acquire phase + * @ctx: the acquire context + * + * Marks the end of the acquire phase, any further w/w mutex lock calls using + * this context are forbidden. + * + * Calling this function is optional, it is just useful to document w/w mutex + * code and clearly designated the acquire phase from actually using the locked + * data structures. + */ +static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + lockdep_assert_held(ctx); + + WARN_ON(ctx->batched); + DEBUG_LOCKS_WARN_ON(ctx->done_acquire); +#endif + ctx->done_acquire = 1; +} + +/** + * ww_acquire_fini - releases a w/w acquire context + * @ctx: the acquire context to free + * + * Releases a w/w acquire context. This must be called _after_ all acquired w/w + * mutexes have been released with ww_mutex_unlock. + */ +static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lock_release(&ctx->dep_map, 0, _THIS_IP_); +#endif +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(ctx->acquired); + if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) + /* + * lockdep will normally handle this, + * but fail without anyway + */ + ctx->done_acquire = 1; + + if (!IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC)) + /* ensure ww_acquire_fini will still fail if called twice */ + ctx->acquired = ~0U; +#endif +} + + +static inline void ww_mutex_init(struct ww_mutex *ww, + struct ww_class *ww_class) +{ + ww->my_class = ww_class; + ww->ctx = NULL; + init_waitqueue_head(&ww->queue); + mutex_init(&ww->base); +} + +static inline bool ww_mutex_is_locked(struct ww_mutex *ww) +{ + return mutex_is_locked(&ww->base); +} + +int ww_mutex_trylock(struct ww_mutex *lock); + +int ww_mutex_lock_interruptible(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); + +#define ww_mutex_lock_slow_interruptible ww_mutex_lock_interruptible + +int ww_mutex_lock(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); + +#define ww_mutex_lock_slow ww_mutex_lock + +void ww_mutex_unlock(struct ww_mutex *lock); + +static inline void ww_mutex_destroy(struct ww_mutex *lock) +{ + mutex_destroy(&lock->base); +} + + +#endif diff --git a/ww_mutex_test.c b/ww_mutex_test.c new file mode 100644 index 0000000..8ebfdc9 --- /dev/null +++ b/ww_mutex_test.c @@ -0,0 +1,229 @@ +/* + * Wound/Wait Mutex testing module. + * Copyright (C) 2016-2018 VMWare Inc. + */ + +#include "ww_mutex_test.h" + +#ifdef WW_BUILTIN +#include <linux/ww_mutex.h> +#define ww_acquire_batch_begin(_a) +#define ww_acquire_batch_end(_a) +#else +#include "ww_mutex.h" +#endif + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/random.h> +#include <linux/delay.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Thomas Hellström"); +MODULE_DESCRIPTION("Test module for ww_mutex performance"); +MODULE_VERSION("1.0.0"); + +struct ww_mutex_item { + struct ww_mutex mutex; + /* Align to a typical cache line */ + u8 pad[64 - sizeof(struct ww_mutex) % 64]; +}; + +struct wwt_device { + struct ww_mutex_item *locks; + int *used; + int num_locks; +}; + +struct wwt_thread { + struct wwt_device *dev; + struct ww_mutex **p_locks; + u8 *used; + int num_locks; + int thread; + u32 num_rollbacks; + struct work_struct work; +}; + + + + +static struct wwt_device g_dev; +static struct wwt_thread threads[WWT_NUM_THREADS]; +static DEFINE_WW_CLASS(wwt_class); + +static void wwt_thread_rollback(struct wwt_thread *t, int upto) +{ + while(upto--) + ww_mutex_unlock(t->p_locks[upto]); + t->num_rollbacks++; +} + +static int wwt_thread_lock_sequence(struct wwt_thread *t) +{ + struct ww_acquire_ctx ctx; + int ret = 0; + int i; + + ww_acquire_init(&ctx, &wwt_class); + ww_acquire_batch_begin(&ctx); +restart: + for (i = 0; i < t->num_locks; ++i) { + ret = ww_mutex_lock(t->p_locks[i], &ctx); + if (ret) { + WARN_ON(i == 0); + wwt_thread_rollback(t, i); + if (ret == -EDEADLK) { + swap(t->p_locks[0], t->p_locks[i]); + goto restart; + } else + goto out_err; + } + } + ww_acquire_batch_end(&ctx); + ww_acquire_done(&ctx); + + udelay(WWT_CS_UDELAY); + + ww_acquire_batch_begin(&ctx); + wwt_thread_rollback(t, t->num_locks); + t->num_rollbacks--; +out_err: + ww_acquire_batch_end(&ctx); + ww_acquire_fini(&ctx); + return ret; +} + +static void wwt_lock_work(struct work_struct *work) { + struct wwt_thread *t = container_of(work, struct wwt_thread, work); + int i, ret; + + printk(KERN_INFO "thread %d: Starting work.\n", t->thread); + for (i = 0; i < WWT_NUM_SUB; ++i) { + ret = wwt_thread_lock_sequence(t); + if (ret) + break; + } + + printk(KERN_INFO "thread %d; Fininshed with %u rollbacks. ret is %d\n", + t->thread, t->num_rollbacks, ret); +} + + +static int wwt_thread_setup(struct wwt_thread *t, int thread) +{ + struct wwt_device *dev = &g_dev; + u32 idx; + int i; + + t->dev = dev; + t->p_locks = vmalloc(WWT_NUM_T_LOCKS * sizeof(*t->p_locks)); + t->num_rollbacks = 0; + t->thread = thread; + INIT_WORK(&t->work, wwt_lock_work); +#ifndef WWT_NO_SHARED + memset(dev->used, 0, dev->num_locks * sizeof(*dev->used)); +#endif + if (!t->p_locks) + return -ENOMEM; + + for (i = 0; i < WWT_NUM_T_LOCKS; ++i) { + get_random_bytes(&idx, sizeof(idx)); + idx %= WWT_NUM_LOCKS; + while(dev->used[idx]) { + idx++; + idx %= WWT_NUM_LOCKS; + } + dev->used[idx] = 1; + t->p_locks[i] = &dev->locks[idx].mutex; + } + t->num_locks = WWT_NUM_T_LOCKS; + + return 0; +} + +static void wwt_thread_destroy(struct wwt_thread *t) +{ + cancel_work_sync(&t->work); + if (t->p_locks) + vfree(t->p_locks); +} + +static void wwt_device_fini(struct wwt_device *wdev) +{ + int i; + + if (wdev->used) + vfree(wdev->used); + + for (i = 0; i < ARRAY_SIZE(threads); ++i) + wwt_thread_destroy(&threads[i]); + + for (i = 0; i < wdev->num_locks; ++i) + ww_mutex_destroy(&wdev->locks[i].mutex); + + vfree(wdev->locks); + wdev->num_locks = 0; +} + + +static int wwt_device_init(struct wwt_device *wdev, int num_locks) +{ + int i, ret; + + wdev->num_locks = 0; + wdev->locks = vzalloc(num_locks * sizeof(*wdev->locks)); + if (!wdev->locks) + return -ENOMEM; + + wdev->used = vzalloc(num_locks * sizeof(*wdev->used)); + if (!wdev->locks) + goto out_thread_setup; + + for (i = 0; i < num_locks; ++i) + ww_mutex_init(&wdev->locks[i].mutex, &wwt_class); + + wdev->num_locks = num_locks; + + printk(KERN_INFO "Allocated %d mutexes. Alignment is %lu %lu\n", + num_locks, ((unsigned long) wdev->locks) % 64, + sizeof(struct ww_mutex)); + + for (i = 0; i < ARRAY_SIZE(threads); ++i) { + ret = wwt_thread_setup(&threads[i], i); + if (ret) + goto out_thread_setup; + } + + printk(KERN_INFO "Set up %lu threads\n", ARRAY_SIZE(threads)); + usleep_range(10000, 20000); + for (i = 0; i < ARRAY_SIZE(threads); ++i) { +#ifndef WWT_SEQUENTIAL + queue_work(system_unbound_wq, &threads[i].work); +#else + wwt_lock_work(&threads[i].work); +#endif + } + + return 0; + +out_thread_setup: + wwt_device_fini(&g_dev); + return ret; +} + +static int __init wwt_init(void) { + + printk(KERN_INFO "Hello, World!\n"); + return wwt_device_init(&g_dev, WWT_NUM_LOCKS); +} + +static void __exit wwt_exit(void) { + wwt_device_fini(&g_dev); + printk(KERN_INFO "Goodbye, World!\n"); +} + +module_init(wwt_init); +module_exit(wwt_exit); diff --git a/ww_mutex_test.h b/ww_mutex_test.h new file mode 100644 index 0000000..8911ea7 --- /dev/null +++ b/ww_mutex_test.h @@ -0,0 +1,20 @@ +/* + * Wound/Wait Mutex testing module. + * Copyright (C) 2016-2018 VMWare Inc. + */ + +#ifndef _WW_MUTEX_TEST_H_ +#define _WW_MUTEX_TEST_H_ + +#undef WW_BUILTIN /* Use kernel builtin ww mutexes */ +#define WW_WAITDIE true /* Use wait-die, not wound-wait */ +#define WW_BATCHING /* Batch locks and unlocks */ +#define WWT_NUM_LOCKS 100000 /* Number of global locks */ +#define WWT_NUM_T_LOCKS 800 /* Number of locks per thread out of the global */ +#define WWT_NUM_THREADS 16 /* Number of locking threads to fire off */ +#undef WWT_SEQUENTIAL /* Fire off locking threads sequentially */ +#define WWT_CS_UDELAY 000 /* Command submission udelay */ +#define WWT_NUM_SUB 10000 /* Number of command submissions */ +#undef WWT_NO_SHARED /* No shared mutexes - No rollbacks */ + +#endif |