summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-06 13:50:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-06 13:50:15 -0700
commit007dc78fea62610bf06829e38f1d8c69b6ea5af6 (patch)
tree683af90696ed7a237dedd48030bfd649e5822955
parent2f1835dffa949f560dfa3ed63c0bfc10944b461c (diff)
parentd671002be6bdd7f77a771e23bf3e95d1f16775e6 (diff)
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "Here are the locking changes in this cycle: - rwsem unification and simpler micro-optimizations to prepare for more intrusive (and more lucrative) scalability improvements in v5.3 (Waiman Long) - Lockdep irq state tracking flag usage cleanups (Frederic Weisbecker) - static key improvements (Jakub Kicinski, Peter Zijlstra) - misc updates, cleanups and smaller fixes" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits) locking/lockdep: Remove unnecessary unlikely() locking/static_key: Don't take sleeping locks in __static_key_slow_dec_deferred() locking/static_key: Factor out the fast path of static_key_slow_dec() locking/static_key: Add support for deferred static branches locking/lockdep: Test all incompatible scenarios at once in check_irq_usage() locking/lockdep: Avoid bogus Clang warning locking/lockdep: Generate LOCKF_ bit composites locking/lockdep: Use expanded masks on find_usage_*() functions locking/lockdep: Map remaining magic numbers to lock usage mask names locking/lockdep: Move valid_state() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING locking/rwsem: Prevent unneeded warning during locking selftest locking/rwsem: Optimize rwsem structure for uncontended lock acquisition locking/rwsem: Enable lock event counting locking/lock_events: Don't show pvqspinlock events on bare metal locking/lock_events: Make lock_events available for all archs & other locks locking/qspinlock_stat: Introduce generic lockevent_*() counting APIs locking/rwsem: Enhance DEBUG_RWSEMS_WARN_ON() macro locking/rwsem: Add debug check for __down_read*() locking/rwsem: Micro-optimize rwsem_try_read_lock_unqueued() locking/rwsem: Move rwsem internal function declarations to rwsem-xadd.h ...
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/Kconfig9
-rw-r--r--arch/alpha/Kconfig7
-rw-r--r--arch/alpha/include/asm/rwsem.h211
-rw-r--r--arch/arc/Kconfig3
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/c6x/Kconfig3
-rw-r--r--arch/csky/Kconfig3
-rw-r--r--arch/h8300/Kconfig3
-rw-r--r--arch/hexagon/Kconfig6
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/ia64/include/asm/rwsem.h172
-rw-r--r--arch/m68k/Kconfig7
-rw-r--r--arch/microblaze/Kconfig6
-rw-r--r--arch/mips/Kconfig7
-rw-r--r--arch/nds32/Kconfig3
-rw-r--r--arch/nios2/Kconfig3
-rw-r--r--arch/openrisc/Kconfig6
-rw-r--r--arch/parisc/Kconfig6
-rw-r--r--arch/powerpc/Kconfig7
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/riscv/Kconfig3
-rw-r--r--arch/s390/Kconfig6
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/sh/Kconfig6
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sparc/Kconfig8
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/unicore32/Kconfig6
-rw-r--r--arch/x86/Kconfig11
-rw-r--r--arch/x86/include/asm/rwsem.h237
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/rwsem.S156
-rw-r--r--arch/x86/um/Kconfig6
-rw-r--r--arch/x86/um/Makefile4
-rw-r--r--arch/xtensa/Kconfig3
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--include/asm-generic/rwsem.h140
-rw-r--r--include/linux/jump_label_ratelimit.h64
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/rwsem-spinlock.h47
-rw-r--r--include/linux/rwsem.h37
-rw-r--r--kernel/Kconfig.locks2
-rw-r--r--kernel/jump_label.c63
-rw-r--r--kernel/locking/Makefile5
-rw-r--r--kernel/locking/lock_events.c179
-rw-r--r--kernel/locking/lock_events.h59
-rw-r--r--kernel/locking/lock_events_list.h67
-rw-r--r--kernel/locking/lockdep.c267
-rw-r--r--kernel/locking/lockdep_internals.h34
-rw-r--r--kernel/locking/percpu-rwsem.c2
-rw-r--r--kernel/locking/qspinlock.c8
-rw-r--r--kernel/locking/qspinlock_paravirt.h19
-rw-r--r--kernel/locking/qspinlock_stat.h242
-rw-r--r--kernel/locking/rwsem-spinlock.c339
-rw-r--r--kernel/locking/rwsem-xadd.c204
-rw-r--r--kernel/locking/rwsem.c25
-rw-r--r--kernel/locking/rwsem.h174
62 files changed, 983 insertions, 1925 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 52cd9341e03c..7be412e1a380 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9100,7 +9100,6 @@ F: arch/*/include/asm/spinlock*.h
F: include/linux/rwlock*.h
F: include/linux/mutex*.h
F: include/linux/rwsem*.h
-F: arch/*/include/asm/rwsem.h
F: include/linux/seqlock.h
F: lib/locking*.[ch]
F: kernel/locking/
diff --git a/arch/Kconfig b/arch/Kconfig
index a826843470ed..3ab446bd12ef 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -907,6 +907,15 @@ config HAVE_ARCH_PREL32_RELOCATIONS
config ARCH_USE_MEMREMAP_PROT
bool
+config LOCK_EVENT_COUNTS
+ bool "Locking event counts collection"
+ depends on DEBUG_FS
+ ---help---
+ Enable light-weight counting of various locking related events
+ in the system with minimal performance impact. This reduces
+ the chance of application behavior change because of timing
+ differences. The counts are reported via debugfs.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index c7c976eb6407..f7b19b813a70 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -50,13 +50,6 @@ config MMU
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config ARCH_HAS_ILOG2_U32
bool
default n
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
deleted file mode 100644
index cf8fc8f9a2ed..000000000000
--- a/arch/alpha/include/asm/rwsem.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ALPHA_RWSEM_H
-#define _ALPHA_RWSEM_H
-
-/*
- * Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001.
- * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
- */
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-
-#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
-#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
-#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
-#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-static inline int ___down_read(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
-#endif
- return (oldcount < 0);
-}
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
- if (unlikely(___down_read(sem)))
- rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (unlikely(___down_read(sem)))
- if (IS_ERR(rwsem_down_read_failed_killable(sem)))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- long old, new, res;
-
- res = atomic_long_read(&sem->count);
- do {
- new = res + RWSEM_ACTIVE_READ_BIAS;
- if (new <= 0)
- break;
- old = res;
- res = atomic_long_cmpxchg(&sem->count, old, new);
- } while (res != old);
- return res >= 0 ? 1 : 0;
-}
-
-static inline long ___down_write(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
-#endif
- return oldcount;
-}
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
- if (unlikely(___down_write(sem)))
- rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- if (unlikely(___down_write(sem))) {
- if (IS_ERR(rwsem_down_write_failed_killable(sem)))
- return -EINTR;
- }
-
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- if (ret == RWSEM_UNLOCKED_VALUE)
- return 1;
- return 0;
-}
-
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- " mb\n"
- "1: ldq_l %0,%1\n"
- " subq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(oldcount < 0))
- if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0)
- rwsem_wake(sem);
-}
-
-static inline void __up_write(struct rw_semaphore *sem)
-{
- long count;
-#ifndef CONFIG_SMP
- sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
- count = sem->count.counter;
-#else
- long temp;
- __asm__ __volatile__(
- " mb\n"
- "1: ldq_l %0,%1\n"
- " subq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " subq %0,%3,%0\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (count), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(count))
- if ((int)count == 0)
- rwsem_wake(sem);
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter -= RWSEM_WAITING_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (-RWSEM_WAITING_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(oldcount < 0))
- rwsem_downgrade_wake(sem);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c781e45d1d99..23e063df5d2c 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER
config GENERIC_CSUM
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config ARCH_DISCONTIGMEM_ENABLE
def_bool n
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9aed25a6019b..dc9855c4a3b4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -178,10 +178,6 @@ config TRACE_IRQFLAGS_SUPPORT
bool
default !CPU_V7M
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index a8a4eb7f6dae..8fb51b7bf1d5 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += mm-arch-hooks.h
generic-y += msi.h
generic-y += parport.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += seccomp.h
generic-y += segment.h
generic-y += serial.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 78d9fafac983..d81adca1b04d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -236,9 +236,6 @@ config LOCKDEP_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_BUG
def_bool y
depends on BUG
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 1e17ea5c372b..60a933b07001 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += mm-arch-hooks.h
generic-y += msi.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += rwsem.h
generic-y += segment.h
generic-y += serial.h
generic-y += set_memory.h
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 3bb75e674161..eeb0471268a0 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -28,9 +28,6 @@ config MMU
config FPU
def_bool n
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_CALIBRATE_DELAY
def_bool y
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 725a115759c9..6555d1781132 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -92,9 +92,6 @@ config GENERIC_HWEIGHT
config MMU
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config STACKTRACE_SUPPORT
def_bool y
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index c071da34e081..61c01db6c292 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -27,9 +27,6 @@ config H8300
config CPU_BIG_ENDIAN
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index ac441680dcc0..3e54a53208d5 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -65,12 +65,6 @@ config GENERIC_CSUM
config GENERIC_IRQ_PROBE
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool n
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index d046e8ccdf78..3ff5f297acda 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -27,7 +27,6 @@ generic-y += mm-arch-hooks.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8d7396bd1790..73a26f04644e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -83,10 +83,6 @@ config STACKTRACE_SUPPORT
config GENERIC_LOCKBREAK
def_bool n
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config HUGETLB_PAGE_SIZE_VARIABLE
bool
depends on HUGETLB_PAGE
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
deleted file mode 100644
index 917910607e0e..000000000000
--- a/arch/ia64/include/asm/rwsem.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * R/W semaphores for ia64
- *
- * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
- * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2005 Christoph Lameter <cl@linux.com>
- *
- * Based on asm-i386/rwsem.h and other architecture implementation.
- *
- * The MSW of the count is the negated number of active writers and
- * waiting lockers, and the LSW is the total number of active locks.
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
- * the case of an uncontended lock. Readers increment by 1 and see a positive
- * value when uncontended, negative if there are writers (and maybe) readers
- * waiting (in which case it goes to sleep).
- */
-
-#ifndef _ASM_IA64_RWSEM_H
-#define _ASM_IA64_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#include <asm/intrinsics.h>
-
-#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
-#define RWSEM_ACTIVE_BIAS (1L)
-#define RWSEM_ACTIVE_MASK (0xffffffffL)
-#define RWSEM_WAITING_BIAS (-0x100000000L)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline int
-___down_read (struct rw_semaphore *sem)
-{
- long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
-
- return (result < 0);
-}
-
-static inline void
-__down_read (struct rw_semaphore *sem)
-{
- if (___down_read(sem))
- rwsem_down_read_failed(sem);
-}
-
-static inline int
-__down_read_killable (struct rw_semaphore *sem)
-{
- if (___down_read(sem))
- if (IS_ERR(rwsem_down_read_failed_killable(sem)))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * lock for writing
- */
-static inline long
-___down_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old + RWSEM_ACTIVE_WRITE_BIAS;
- } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
-
- return old;
-}
-
-static inline void
-__down_write (struct rw_semaphore *sem)
-{
- if (___down_write(sem))
- rwsem_down_write_failed(sem);
-}
-
-static inline int
-__down_write_killable (struct rw_semaphore *sem)
-{
- if (___down_write(sem)) {
- if (IS_ERR(rwsem_down_write_failed_killable(sem)))
- return -EINTR;
- }
-
- return 0;
-}
-
-/*
- * unlock after reading
- */
-static inline void
-__up_read (struct rw_semaphore *sem)
-{
- long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
-
- if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void
-__up_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old - RWSEM_ACTIVE_WRITE_BIAS;
- } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
-
- if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
- rwsem_wake(sem);
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int
-__down_read_trylock (struct rw_semaphore *sem)
-{
- long tmp;
- while ((tmp = atomic_long_read(&sem->count)) >= 0) {
- if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int
-__down_write_trylock (struct rw_semaphore *sem)
-{
- long tmp = atomic_long_cmpxchg_acquire(&sem->count,
- RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
- return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void
-__downgrade_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old - RWSEM_WAITING_BIAS;
- } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
-
- if (old < 0)
- rwsem_downgrade_wake(sem);
-}
-
-#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 4e37efbc9296..735b9679fe6f 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -33,13 +33,6 @@ config M68K
config CPU_BIG_ENDIAN
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 321e398ab6b5..adb179f519f9 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -59,15 +59,9 @@ config CPU_LITTLE_ENDIAN
endchoice
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config ZONE_DMA
def_bool y
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
def_bool n
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4a5f5b0ee9a9..b9c48b27162d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1037,13 +1037,6 @@ source "arch/mips/paravirt/Kconfig"
endmenu
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config GENERIC_HWEIGHT
bool
default y
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index addb7f5f5264..55559ca0efe4 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -60,9 +60,6 @@ config GENERIC_LOCKBREAK
def_bool y
depends on PREEMPT
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config TRACE_IRQFLAGS_SUPPORT
def_bool y
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 3633f8144367..ea37394ff3ea 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -41,9 +41,6 @@ config NO_IOPORT_MAP
config FPU
def_bool n
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config TRACE_IRQFLAGS_SUPPORT
def_bool n
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index c6cf8a49a0ab..7cfb20555b10 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -44,12 +44,6 @@ config CPU_BIG_ENDIAN
config MMU
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool n
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index c8e621296092..f1ed8ddfe486 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -75,12 +75,6 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
default n
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8e1e2abf17eb..fab0bf4259c7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -103,13 +103,6 @@ config LOCKDEP_SUPPORT
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config GENERIC_LOCKBREAK
bool
default y
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a0c132bedfae..36bda391e549 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -8,6 +8,5 @@ generic-y += irq_regs.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += vtime.h
generic-y += msi.h
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index eb56c82d8aa1..0582260fb6c2 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -69,9 +69,6 @@ config STACKTRACE_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_BUG
def_bool y
depends on BUG
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index cf06e313e103..97b555e772d7 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -14,12 +14,6 @@ config LOCKDEP_SUPPORT
config STACKTRACE_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config ARCH_HAS_ILOG2_U32
def_bool n
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 12d77cb11fe5..d5fadefea33c 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
-generic-y += rwsem.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += word-at-a-time.h
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b1c91ea9a958..0be08d586d40 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -90,12 +90,6 @@ config ARCH_DEFCONFIG
default "arch/sh/configs/shx3_defconfig" if SUPERH32
default "arch/sh/configs/cayman_defconfig" if SUPERH64
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config GENERIC_BUG
def_bool y
depends on BUG && SUPERH32
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 7bf2cb680d32..73fff39a0122 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += mm-arch-hooks.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += serial.h
generic-y += sizes.h
generic-y += trace_clock.h
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index db79290ed6d5..f6421c9ce5d3 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -192,14 +192,6 @@ config NR_CPUS
source "kernel/Kconfig.hz"
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y if SPARC32
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y if SPARC64
-
config GENERIC_HWEIGHT
bool
default y
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a22cfd5c0ee8..2ca3200d3616 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -18,7 +18,6 @@ generic-y += mm-arch-hooks.h
generic-y += module.h
generic-y += msi.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index d83c8f70900d..2445dfcf6444 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -39,12 +39,6 @@ config STACKTRACE_SUPPORT
config LOCKDEP_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f1162df4a805..90e2640ade75 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -268,9 +268,6 @@ config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -783,14 +780,6 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
-config QUEUED_LOCK_STAT
- bool "Paravirt queued spinlock statistics"
- depends on PARAVIRT_SPINLOCKS && DEBUG_FS
- ---help---
- Enable the collection of statistical data on the slowpath
- behavior of paravirtualized queued spinlocks and report
- them on debugfs.
-
source "arch/x86/xen/Kconfig"
config KVM_GUEST
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
deleted file mode 100644
index 4c25cf6caefa..000000000000
--- a/arch/x86/include/asm/rwsem.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
- *
- * Written by David Howells (dhowells@redhat.com).
- *
- * Derived from asm-x86/semaphore.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consecutive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _ASM_X86_RWSEM_H
-#define _ASM_X86_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-#include <asm/asm.h>
-
-/*
- * The bias values and the counter type limits the number of
- * potential readers/writers to 32767 for 32 bits and 2147483647
- * for 64 bits.
- */
-
-#ifdef CONFIG_X86_64
-# define RWSEM_ACTIVE_MASK 0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK 0x0000ffffL
-#endif
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000L
-#define RWSEM_ACTIVE_BIAS 0x00000001L
-#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-#define ____down_read(sem, slow_path) \
-({ \
- struct rw_semaphore* ret; \
- asm volatile("# beginning down_read\n\t" \
- LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \
- /* adds 0x00000001 */ \
- " jns 1f\n" \
- " call " slow_path "\n" \
- "1:\n\t" \
- "# ending down_read\n\t" \
- : "+m" (sem->count), "=a" (ret), \
- ASM_CALL_CONSTRAINT \
- : [sem] "a" (sem) \
- : "memory", "cc"); \
- ret; \
-})
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
- ____down_read(sem, "call_rwsem_down_read_failed");
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable")))
- return -EINTR;
- return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_read_trylock(struct rw_semaphore *sem)
-{
- long result, tmp;
- asm volatile("# beginning __down_read_trylock\n\t"
- " mov %[count],%[result]\n\t"
- "1:\n\t"
- " mov %[result],%[tmp]\n\t"
- " add %[inc],%[tmp]\n\t"
- " jle 2f\n\t"
- LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- "# ending __down_read_trylock\n\t"
- : [count] "+m" (sem->count), [result] "=&a" (result),
- [tmp] "=&r" (tmp)
- : [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
- return result >= 0;
-}
-
-/*
- * lock for writing
- */
-#define ____down_write(sem, slow_path) \
-({ \
- long tmp; \
- struct rw_semaphore* ret; \
- \
- asm volatile("# beginning down_write\n\t" \
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \
- /* adds 0xffff0001, returns the old value */ \
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
- /* was the active mask 0 before? */\
- " jz 1f\n" \
- " call " slow_path "\n" \
- "1:\n" \
- "# ending down_write" \
- : "+m" (sem->count), [tmp] "=d" (tmp), \
- "=a" (ret), ASM_CALL_CONSTRAINT \
- : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \
- : "memory", "cc"); \
- ret; \
-})
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
- ____down_write(sem, "call_rwsem_down_write_failed");
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_write_trylock(struct rw_semaphore *sem)
-{
- bool result;
- long tmp0, tmp1;
- asm volatile("# beginning __down_write_trylock\n\t"
- " mov %[count],%[tmp0]\n\t"
- "1:\n\t"
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
- /* was the active mask 0 before? */
- " jnz 2f\n\t"
- " mov %[tmp0],%[tmp1]\n\t"
- " add %[inc],%[tmp1]\n\t"
- LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- CC_SET(e)
- "# ending __down_write_trylock\n\t"
- : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0),
- [tmp1] "=&r" (tmp1), CC_OUT(e) (result)
- : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS)
- : "memory");
- return result;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long tmp;
- asm volatile("# beginning __up_read\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 1, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n"
- "# ending __up_read\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- long tmp;
- asm volatile("# beginning __up_write\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 0xffff0001, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n\t"
- "# ending __up_write\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- asm volatile("# beginning __downgrade_write\n\t"
- LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t"
- /*
- * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
- * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
- */
- " jns 1f\n\t"
- " call call_rwsem_downgrade_wake\n"
- "1:\n\t"
- "# ending __downgrade_write\n"
- : "+m" (sem->count)
- : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS)
- : "memory", "cc");
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 3cb3af51ec89..5246db42de45 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -35,7 +35,6 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
-lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
deleted file mode 100644
index dc2ab6ea6768..000000000000
--- a/arch/x86/lib/rwsem.S
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * x86 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-#include <asm/frame.h>
-
-#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg)
-#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l)
-
-#ifdef CONFIG_X86_32
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax which is either a return
- * value or just gets clobbered. Same is true for %edx so make sure GCC
- * reloads it after the slow path, by making it hold a temporary, for
- * example see ____down_write().
- */
-
-#define save_common_regs \
- pushl %ecx
-
-#define restore_common_regs \
- popl %ecx
-
- /* Avoid uglifying the argument copying x86-64 needs to do. */
- .macro movq src, dst
- .endm
-
-#else
-
-/*
- * x86-64 rwsem wrappers
- *
- * This interfaces the inline asm code to the slow-path
- * C routines. We need to save the call-clobbered regs
- * that the asm does not mark as clobbered, and move the
- * argument from %rax to %rdi.
- *
- * NOTE! We don't need to save %rax, because the functions
- * will always return the semaphore pointer in %rax (which
- * is also the input argument to these helpers)
- *
- * The following can clobber %rdx because the asm clobbers it:
- * call_rwsem_down_write_failed
- * call_rwsem_wake
- * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
- */
-
-#define save_common_regs \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rcx; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11
-
-#define restore_common_regs \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rcx; \
- popq %rsi; \
- popq %rdi
-
-#endif
-
-/* Fix up special calling conventions */
-ENTRY(call_rwsem_down_read_failed)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_down_read_failed
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_read_failed)
-
-ENTRY(call_rwsem_down_read_failed_killable)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_down_read_failed_killable
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_read_failed_killable)
-
-ENTRY(call_rwsem_down_write_failed)
- FRAME_BEGIN
- save_common_regs
- movq %rax,%rdi
- call rwsem_down_write_failed
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_write_failed)
-
-ENTRY(call_rwsem_down_write_failed_killable)
- FRAME_BEGIN
- save_common_regs
- movq %rax,%rdi
- call rwsem_down_write_failed_killable
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_write_failed_killable)
-
-ENTRY(call_rwsem_wake)
- FRAME_BEGIN
- /* do nothing if still outstanding active readers */
- __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx)
- jnz 1f
- save_common_regs
- movq %rax,%rdi
- call rwsem_wake
- restore_common_regs
-1: FRAME_END
- ret
-ENDPROC(call_rwsem_wake)
-
-ENTRY(call_rwsem_downgrade_wake)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_downgrade_wake
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index a9e80e44178c..a8985e1f7432 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -32,12 +32,6 @@ config ARCH_DEFCONFIG
default "arch/um/configs/i386_defconfig" if X86_32
default "arch/um/configs/x86_64_defconfig" if X86_64
-config RWSEM_XCHGADD_ALGORITHM
- def_bool 64BIT
-
-config RWSEM_GENERIC_SPINLOCK
- def_bool !RWSEM_XCHGADD_ALGORITHM
-
config 3_LEVEL_PGTABLES
bool "Three-level pagetables" if !64BIT
default 64BIT
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 2d686ae54681..33c51c064c77 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -21,14 +21,12 @@ obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
-subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
else
obj-y += syscalls_64.o vdso/
-subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \
- ../lib/rwsem.o
+subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o
endif
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 4b9aafe766c5..35c8d91e6106 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -46,9 +46,6 @@ config XTENSA
with reasonable minimum requirements. The Xtensa Linux project has
a home page at <http://www.linux-xtensa.org/>.
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 3843198e03d4..4148090cafb0 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -25,7 +25,6 @@ generic-y += percpu.h
generic-y += preempt.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += rwsem.h
generic-y += sections.h
generic-y += socket.h
generic-y += topology.h
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
deleted file mode 100644
index 93e67a055a4d..000000000000
--- a/include/asm-generic/rwsem.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_GENERIC_RWSEM_H
-#define _ASM_GENERIC_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#ifdef __KERNEL__
-
-/*
- * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
- * Adapted largely from include/asm-i386/rwsem.h
- * by Paul Mackerras <paulus@samba.org>.
- */
-
-/*
- * the semaphore definition
- */
-#ifdef CONFIG_64BIT
-# define RWSEM_ACTIVE_MASK 0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK 0x0000ffffL
-#endif
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000L
-#define RWSEM_ACTIVE_BIAS 0x00000001L
-#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
- if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
- rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
- if (IS_ERR(rwsem_down_read_failed_killable(sem)))
- return -EINTR;
- }
-
- return 0;
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- long tmp;
-
- while ((tmp = atomic_long_read(&sem->count)) >= 0) {
- if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
- tmp + RWSEM_ACTIVE_READ_BIAS)) {
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
- &sem->count);
- if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
- rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
- &sem->count);
- if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
- if (IS_ERR(rwsem_down_write_failed_killable(sem)))
- return -EINTR;
- return 0;
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = atomic_long_dec_return_release(&sem->count);
- if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
- &sem->count) < 0))
- rwsem_wake(sem);
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- long tmp;
-
- /*
- * When downgrading from exclusive to shared ownership,
- * anything inside the write-locked region cannot leak
- * into the read side. In contrast, anything in the
- * read-locked region is ok to be re-ordered into the
- * write side. As such, rely on RELEASE semantics.
- */
- tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
- if (tmp < 0)
- rwsem_downgrade_wake(sem);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_GENERIC_RWSEM_H */
diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
index a49f2b45b3f0..42710d5949ba 100644
--- a/include/linux/jump_label_ratelimit.h
+++ b/include/linux/jump_label_ratelimit.h
@@ -12,21 +12,79 @@ struct static_key_deferred {
struct delayed_work work;
};
-extern void static_key_slow_dec_deferred(struct static_key_deferred *key);
-extern void static_key_deferred_flush(struct static_key_deferred *key);
+struct static_key_true_deferred {
+ struct static_key_true key;
+ unsigned long timeout;
+ struct delayed_work work;
+};
+
+struct static_key_false_deferred {
+ struct static_key_false key;
+ unsigned long timeout;
+ struct delayed_work work;
+};
+
+#define static_key_slow_dec_deferred(x) \
+ __static_key_slow_dec_deferred(&(x)->key, &(x)->work, (x)->timeout)
+#define static_branch_slow_dec_deferred(x) \
+ __static_key_slow_dec_deferred(&(x)->key.key, &(x)->work, (x)->timeout)
+
+#define static_key_deferred_flush(x) \
+ __static_key_deferred_flush((x), &(x)->work)
+
+extern void
+__static_key_slow_dec_deferred(struct static_key *key,
+ struct delayed_work *work,
+ unsigned long timeout);
+extern void __static_key_deferred_flush(void *key, struct delayed_work *work);
extern void
jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
+extern void jump_label_update_timeout(struct work_struct *work);
+
+#define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl) \
+ struct static_key_true_deferred name = { \
+ .key = { STATIC_KEY_INIT_TRUE }, \
+ .timeout = (rl), \
+ .work = __DELAYED_WORK_INITIALIZER((name).work, \
+ jump_label_update_timeout, \
+ 0), \
+ }
+
+#define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl) \
+ struct static_key_false_deferred name = { \
+ .key = { STATIC_KEY_INIT_FALSE }, \
+ .timeout = (rl), \
+ .work = __DELAYED_WORK_INITIALIZER((name).work, \
+ jump_label_update_timeout, \
+ 0), \
+ }
+
+#define static_branch_deferred_inc(x) static_branch_inc(&(x)->key)
+
#else /* !CONFIG_JUMP_LABEL */
struct static_key_deferred {
struct static_key key;
};
+struct static_key_true_deferred {
+ struct static_key_true key;
+};
+struct static_key_false_deferred {
+ struct static_key_false key;
+};
+#define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl) \
+ struct static_key_true_deferred name = { STATIC_KEY_TRUE_INIT }
+#define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl) \
+ struct static_key_false_deferred name = { STATIC_KEY_FALSE_INIT }
+
+#define static_branch_slow_dec_deferred(x) static_branch_dec(&(x)->key)
+
static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
{
STATIC_KEY_CHECK_USE(key);
static_key_slow_dec(&key->key);
}
-static inline void static_key_deferred_flush(struct static_key_deferred *key)
+static inline void static_key_deferred_flush(void *key)
{
STATIC_KEY_CHECK_USE(key);
}
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6f165d625320..6e2377e6c1d6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -476,7 +476,7 @@ struct pin_cookie { };
#define NIL_COOKIE (struct pin_cookie){ }
-#define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; })
+#define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; })
#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0)
#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0)
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
deleted file mode 100644
index e47568363e5e..000000000000
--- a/include/linux/rwsem-spinlock.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* rwsem-spinlock.h: fallback C implementation
- *
- * Copyright (c) 2001 David Howells (dhowells@redhat.com).
- * - Derived partially from ideas by Andrea Arcangeli <andrea@suse.de>
- * - Derived also from comments by Linus
- */
-
-#ifndef _LINUX_RWSEM_SPINLOCK_H
-#define _LINUX_RWSEM_SPINLOCK_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-/*
- * the rw-semaphore definition
- * - if count is 0 then there are no active readers or writers
- * - if count is +ve then that is the number of active readers
- * - if count is -1 then there is one active writer
- * - if wait_list is not empty, then there are processes waiting for the semaphore
- */
-struct rw_semaphore {
- __s32 count;
- raw_spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000
-
-extern void __down_read(struct rw_semaphore *sem);
-extern int __must_check __down_read_killable(struct rw_semaphore *sem);
-extern int __down_read_trylock(struct rw_semaphore *sem);
-extern void __down_write(struct rw_semaphore *sem);
-extern int __must_check __down_write_killable(struct rw_semaphore *sem);
-extern int __down_write_trylock(struct rw_semaphore *sem);
-extern void __up_read(struct rw_semaphore *sem);
-extern void __up_write(struct rw_semaphore *sem);
-extern void __downgrade_write(struct rw_semaphore *sem);
-extern int rwsem_is_locked(struct rw_semaphore *sem);
-
-#endif /* __KERNEL__ */
-#endif /* _LINUX_RWSEM_SPINLOCK_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 67dbb57508b1..2ea18a3def04 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -20,25 +20,30 @@
#include <linux/osq_lock.h>
#endif
-struct rw_semaphore;
-
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
-#include <linux/rwsem-spinlock.h> /* use a generic implementation */
-#define __RWSEM_INIT_COUNT(name) .count = RWSEM_UNLOCKED_VALUE
-#else
-/* All arch specific implementations share the same struct */
+/*
+ * For an uncontended rwsem, count and owner are the only fields a task
+ * needs to touch when acquiring the rwsem. So they are put next to each
+ * other to increase the chance that they will share the same cacheline.
+ *
+ * In a contended rwsem, the owner is likely the most frequently accessed
+ * field in the structure as the optimistic waiter that holds the osq lock
+ * will spin on owner. For an embedded rwsem, other hot fields in the
+ * containing structure should be moved further away from the rwsem to
+ * reduce the chance that they will share the same cacheline causing
+ * cacheline bouncing problem.
+ */
struct rw_semaphore {
atomic_long_t count;
- struct list_head wait_list;
- raw_spinlock_t wait_lock;
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
- struct optimistic_spin_queue osq; /* spinner MCS lock */
/*
* Write owner. Used as a speculative check to see
* if the owner is running on the cpu.
*/
struct task_struct *owner;
+ struct optimistic_spin_queue osq; /* spinner MCS lock */
#endif
+ raw_spinlock_t wait_lock;
+ struct list_head wait_list;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
@@ -50,24 +55,14 @@ struct rw_semaphore {
*/
#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L)
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/* Include the arch specific part */
-#include <asm/rwsem.h>
-
/* In all implementations count != 0 means locked */
static inline int rwsem_is_locked(struct rw_semaphore *sem)
{
return atomic_long_read(&sem->count) != 0;
}
+#define RWSEM_UNLOCKED_VALUE 0L
#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
-#endif
/* Common initializer macros and functions */
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index fbba478ae522..e335953fa704 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -229,7 +229,7 @@ config MUTEX_SPIN_ON_OWNER
config RWSEM_SPIN_ON_OWNER
def_bool y
- depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
+ depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW
config LOCK_SPIN_ON_OWNER
def_bool y
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index bad96b476eb6..de6efdecc70d 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -202,11 +202,13 @@ void static_key_disable(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_disable);
-static void __static_key_slow_dec_cpuslocked(struct static_key *key,
- unsigned long rate_limit,
- struct delayed_work *work)
+static bool static_key_slow_try_dec(struct static_key *key)
{
- lockdep_assert_cpus_held();
+ int val;
+
+ val = atomic_fetch_add_unless(&key->enabled, -1, 1);
+ if (val == 1)
+ return false;
/*
* The negative count check is valid even when a negative
@@ -215,63 +217,70 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key,
* returns is unbalanced, because all other static_key_slow_inc()
* instances block while the update is in progress.
*/
- if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
- WARN(atomic_read(&key->enabled) < 0,
- "jump label: negative count!\n");
+ WARN(val < 0, "jump label: negative count!\n");
+ return true;
+}
+
+static void __static_key_slow_dec_cpuslocked(struct static_key *key)
+{
+ lockdep_assert_cpus_held();
+
+ if (static_key_slow_try_dec(key))
return;
- }
- if (rate_limit) {
- atomic_inc(&key->enabled);
- schedule_delayed_work(work, rate_limit);
- } else {
+ jump_label_lock();
+ if (atomic_dec_and_test(&key->enabled))
jump_label_update(key);
- }
jump_label_unlock();
}
-static void __static_key_slow_dec(struct static_key *key,
- unsigned long rate_limit,
- struct delayed_work *work)
+static void __static_key_slow_dec(struct static_key *key)
{
cpus_read_lock();
- __static_key_slow_dec_cpuslocked(key, rate_limit, work);
+ __static_key_slow_dec_cpuslocked(key);
cpus_read_unlock();
}
-static void jump_label_update_timeout(struct work_struct *work)
+void jump_label_update_timeout(struct work_struct *work)
{
struct static_key_deferred *key =
container_of(work, struct static_key_deferred, work.work);
- __static_key_slow_dec(&key->key, 0, NULL);
+ __static_key_slow_dec(&key->key);
}
+EXPORT_SYMBOL_GPL(jump_label_update_timeout);
void static_key_slow_dec(struct static_key *key)
{
STATIC_KEY_CHECK_USE(key);
- __static_key_slow_dec(key, 0, NULL);
+ __static_key_slow_dec(key);
}
EXPORT_SYMBOL_GPL(static_key_slow_dec);
void static_key_slow_dec_cpuslocked(struct static_key *key)
{
STATIC_KEY_CHECK_USE(key);
- __static_key_slow_dec_cpuslocked(key, 0, NULL);
+ __static_key_slow_dec_cpuslocked(key);
}
-void static_key_slow_dec_deferred(struct static_key_deferred *key)
+void __static_key_slow_dec_deferred(struct static_key *key,
+ struct delayed_work *work,
+ unsigned long timeout)
{
STATIC_KEY_CHECK_USE(key);
- __static_key_slow_dec(&key->key, key->timeout, &key->work);
+
+ if (static_key_slow_try_dec(key))
+ return;
+
+ schedule_delayed_work(work, timeout);
}
-EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred);
+EXPORT_SYMBOL_GPL(__static_key_slow_dec_deferred);
-void static_key_deferred_flush(struct static_key_deferred *key)
+void __static_key_deferred_flush(void *key, struct delayed_work *work)
{
STATIC_KEY_CHECK_USE(key);
- flush_delayed_work(&key->work);
+ flush_delayed_work(work);
}
-EXPORT_SYMBOL_GPL(static_key_deferred_flush);
+EXPORT_SYMBOL_GPL(__static_key_deferred_flush);
void jump_label_rate_limit(struct static_key_deferred *key,
unsigned long rl)
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 392c7f23af76..6fe2f333aecb 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -3,7 +3,7 @@
# and is generally not a function of system call inputs.
KCOV_INSTRUMENT := n
-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
+obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
@@ -25,8 +25,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
-obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
-obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
+obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c
new file mode 100644
index 000000000000..fa2c2f951c6b
--- /dev/null
+++ b/kernel/locking/lock_events.c
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <waiman.long@hpe.com>
+ */
+
+/*
+ * Collect locking event counts
+ */
+#include <linux/debugfs.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/fs.h>
+
+#include "lock_events.h"
+
+#undef LOCK_EVENT
+#define LOCK_EVENT(name) [LOCKEVENT_ ## name] = #name,
+
+#define LOCK_EVENTS_DIR "lock_event_counts"
+
+/*
+ * When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different
+ * types of locks will be reported under the <debugfs>/lock_event_counts/
+ * directory. See lock_events_list.h for the list of available locking
+ * events.
+ *
+ * Writing to the special ".reset_counts" file will reset all the above
+ * locking event counts. This is a very slow operation and so should not
+ * be done frequently.
+ *
+ * These event counts are implemented as per-cpu variables which are
+ * summed and computed whenever the corresponding debugfs files are read. This
+ * minimizes added overhead making the counts usable even in a production
+ * environment.
+ */
+static const char * const lockevent_names[lockevent_num + 1] = {
+
+#include "lock_events_list.h"
+
+ [LOCKEVENT_reset_cnts] = ".reset_counts",
+};
+
+/*
+ * Per-cpu counts
+ */
+DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
+
+/*
+ * The lockevent_read() function can be overridden.
+ */
+ssize_t __weak lockevent_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buf[64];
+ int cpu, id, len;
+ u64 sum = 0;
+
+ /*
+ * Get the counter ID stored in file->f_inode->i_private
+ */
+ id = (long)file_inode(file)->i_private;
+
+ if (id >= lockevent_num)
+ return -EBADF;
+
+ for_each_possible_cpu(cpu)
+ sum += per_cpu(lockevents[id], cpu);
+ len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+/*
+ * Function to handle write request
+ *
+ * When idx = reset_cnts, reset all the counts.
+ */
+static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ int cpu;
+
+ /*
+ * Get the counter ID stored in file->f_inode->i_private
+ */
+ if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
+ return count;
+
+ for_each_possible_cpu(cpu) {
+ int i;
+ unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
+
+ for (i = 0 ; i < lockevent_num; i++)
+ WRITE_ONCE(ptr[i], 0);
+ }
+ return count;
+}
+
+/*
+ * Debugfs data structures
+ */
+static const struct file_operations fops_lockevent = {
+ .read = lockevent_read,
+ .write = lockevent_write,
+ .llseek = default_llseek,
+};
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#include <asm/paravirt.h>
+
+static bool __init skip_lockevent(const char *name)
+{
+ static int pv_on __initdata = -1;
+
+ if (pv_on < 0)
+ pv_on = !pv_is_native_spin_unlock();
+ /*
+ * Skip PV qspinlock events on bare metal.
+ */
+ if (!pv_on && !memcmp(name, "pv_", 3))
+ return true;
+ return false;
+}
+#else
+static inline bool skip_lockevent(const char *name)
+{
+ return false;
+}
+#endif
+
+/*
+ * Initialize debugfs for the locking event counts.
+ */
+static int __init init_lockevent_counts(void)
+{
+ struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL);
+ int i;
+
+ if (!d_counts)
+ goto out;
+
+ /*
+ * Create the debugfs files
+ *
+ * As reading from and writing to the stat files can be slow, only
+ * root is allowed to do the read/write to limit impact to system
+ * performance.
+ */
+ for (i = 0; i < lockevent_num; i++) {
+ if (skip_lockevent(lockevent_names[i]))
+ continue;
+ if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
+ (void *)(long)i, &fops_lockevent))
+ goto fail_undo;
+ }
+
+ if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
+ d_counts, (void *)(long)LOCKEVENT_reset_cnts,
+ &fops_lockevent))
+ goto fail_undo;
+
+ return 0;
+fail_undo:
+ debugfs_remove_recursive(d_counts);
+out:
+ pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR);
+ return -ENOMEM;
+}
+fs_initcall(init_lockevent_counts);
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h
new file mode 100644
index 000000000000..feb1acc54611
--- /dev/null
+++ b/kernel/locking/lock_events.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <longman@redhat.com>
+ */
+
+#ifndef __LOCKING_LOCK_EVENTS_H
+#define __LOCKING_LOCK_EVENTS_H
+
+enum lock_events {
+
+#include "lock_events_list.h"
+
+ lockevent_num, /* Total number of lock event counts */
+ LOCKEVENT_reset_cnts = lockevent_num,
+};
+
+#ifdef CONFIG_LOCK_EVENT_COUNTS
+/*
+ * Per-cpu counters
+ */
+DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
+
+/*
+ * Increment the PV qspinlock statistical counters
+ */
+static inline void __lockevent_inc(enum lock_events event, bool cond)
+{
+ if (cond)
+ __this_cpu_inc(lockevents[event]);
+}
+
+#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true)
+#define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c)
+
+static inline void __lockevent_add(enum lock_events event, int inc)
+{
+ __this_cpu_add(lockevents[event], inc);
+}
+
+#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)
+
+#else /* CONFIG_LOCK_EVENT_COUNTS */
+
+#define lockevent_inc(ev)
+#define lockevent_add(ev, c)
+#define lockevent_cond_inc(ev, c)
+
+#endif /* CONFIG_LOCK_EVENT_COUNTS */
+#endif /* __LOCKING_LOCK_EVENTS_H */
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
new file mode 100644
index 000000000000..ad7668cfc9da
--- /dev/null
+++ b/kernel/locking/lock_events_list.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <longman@redhat.com>
+ */
+
+#ifndef LOCK_EVENT
+#define LOCK_EVENT(name) LOCKEVENT_ ## name,
+#endif
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+/*
+ * Locking events for PV qspinlock.
+ */
+LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */
+LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */
+LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */
+LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */
+LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */
+LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */
+LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */
+LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */
+LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */
+LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */
+LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+/*
+ * Locking events for qspinlock
+ *
+ * Subtracting lock_use_node[234] from lock_slowpath will give you
+ * lock_use_node1.
+ */
+LOCK_EVENT(lock_pending) /* # of locking ops via pending code */
+LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */
+LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */
+LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */
+LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
+LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+/*
+ * Locking events for rwsem
+ */
+LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
+LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
+LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
+LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
+LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
+LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
+LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
+LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
+LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
+LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */
+LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
+LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
+LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 91c6b89f04df..27b992fe8cec 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -501,11 +501,11 @@ static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
{
char c = '.';
- if (class->usage_mask & lock_flag(bit + 2))
+ if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
c = '+';
if (class->usage_mask & lock_flag(bit)) {
c = '-';
- if (class->usage_mask & lock_flag(bit + 2))
+ if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
c = '?';
}
@@ -1666,19 +1666,25 @@ check_redundant(struct lock_list *root, struct lock_class *target,
}
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+
+static inline int usage_accumulate(struct lock_list *entry, void *mask)
+{
+ *(unsigned long *)mask |= entry->class->usage_mask;
+
+ return 0;
+}
+
/*
* Forwards and backwards subgraph searching, for the purposes of
* proving that two subgraphs can be connected by a new dependency
* without creating any illegal irq-safe -> irq-unsafe lock dependency.
*/
-static inline int usage_match(struct lock_list *entry, void *bit)
+static inline int usage_match(struct lock_list *entry, void *mask)
{
- return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit);
+ return entry->class->usage_mask & *(unsigned long *)mask;
}
-
-
/*
* Find a node in the forwards-direction dependency sub-graph starting
* at @root->class that matches @bit.
@@ -1690,14 +1696,14 @@ static inline int usage_match(struct lock_list *entry, void *bit)
* Return <0 on error.
*/
static int
-find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
+find_usage_forwards(struct lock_list *root, unsigned long usage_mask,
struct lock_list **target_entry)
{
int result;
debug_atomic_inc(nr_find_usage_forwards_checks);
- result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
+ result = __bfs_forwards(root, &usage_mask, usage_match, target_entry);
return result;
}
@@ -1713,14 +1719,14 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
* Return <0 on error.
*/
static int
-find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
+find_usage_backwards(struct lock_list *root, unsigned long usage_mask,
struct lock_list **target_entry)
{
int result;
debug_atomic_inc(nr_find_usage_backwards_checks);
- result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
+ result = __bfs_backwards(root, &usage_mask, usage_match, target_entry);
return result;
}
@@ -1912,39 +1918,6 @@ print_bad_irq_dependency(struct task_struct *curr,
return 0;
}
-static int
-check_usage(struct task_struct *curr, struct held_lock *prev,
- struct held_lock *next, enum lock_usage_bit bit_backwards,
- enum lock_usage_bit bit_forwards, const char *irqclass)
-{
- int ret;
- struct lock_list this, that;
- struct lock_list *uninitialized_var(target_entry);
- struct lock_list *uninitialized_var(target_entry1);
-
- this.parent = NULL;
-
- this.class = hlock_class(prev);
- ret = find_usage_backwards(&this, bit_backwards, &target_entry);
- if (ret < 0)
- return print_bfs_bug(ret);
- if (ret == 1)
- return ret;
-
- that.parent = NULL;
- that.class = hlock_class(next);
- ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
- if (ret < 0)
- return print_bfs_bug(ret);
- if (ret == 1)
- return ret;
-
- return print_bad_irq_dependency(curr, &this, &that,
- target_entry, target_entry1,
- prev, next,
- bit_backwards, bit_forwards, irqclass);
-}
-
static const char *state_names[] = {
#define LOCKDEP_STATE(__STATE) \
__stringify(__STATE),
@@ -1961,9 +1934,19 @@ static const char *state_rnames[] = {
static inline const char *state_name(enum lock_usage_bit bit)
{
- return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : state_names[bit >> 2];
+ if (bit & LOCK_USAGE_READ_MASK)
+ return state_rnames[bit >> LOCK_USAGE_DIR_MASK];
+ else
+ return state_names[bit >> LOCK_USAGE_DIR_MASK];
}
+/*
+ * The bit number is encoded like:
+ *
+ * bit0: 0 exclusive, 1 read lock
+ * bit1: 0 used in irq, 1 irq enabled
+ * bit2-n: state
+ */
static int exclusive_bit(int new_bit)
{
int state = new_bit & LOCK_USAGE_STATE_MASK;
@@ -1975,45 +1958,160 @@ static int exclusive_bit(int new_bit)
return state | (dir ^ LOCK_USAGE_DIR_MASK);
}
+/*
+ * Observe that when given a bitmask where each bitnr is encoded as above, a
+ * right shift of the mask transforms the individual bitnrs as -1 and
+ * conversely, a left shift transforms into +1 for the individual bitnrs.
+ *
+ * So for all bits whose number have LOCK_ENABLED_* set (bitnr1 == 1), we can
+ * create the mask with those bit numbers using LOCK_USED_IN_* (bitnr1 == 0)
+ * instead by subtracting the bit number by 2, or shifting the mask right by 2.
+ *
+ * Similarly, bitnr1 == 0 becomes bitnr1 == 1 by adding 2, or shifting left 2.
+ *
+ * So split the mask (note that LOCKF_ENABLED_IRQ_ALL|LOCKF_USED_IN_IRQ_ALL is
+ * all bits set) and recompose with bitnr1 flipped.
+ */
+static unsigned long invert_dir_mask(unsigned long mask)
+{
+ unsigned long excl = 0;
+
+ /* Invert dir */
+ excl |= (mask & LOCKF_ENABLED_IRQ_ALL) >> LOCK_USAGE_DIR_MASK;
+ excl |= (mask & LOCKF_USED_IN_IRQ_ALL) << LOCK_USAGE_DIR_MASK;
+
+ return excl;
+}
+
+/*
+ * As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all
+ * bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*).
+ * And then mask out all bitnr0.
+ */
+static unsigned long exclusive_mask(unsigned long mask)
+{
+ unsigned long excl = invert_dir_mask(mask);
+
+ /* Strip read */
+ excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK;
+ excl &= ~LOCKF_IRQ_READ;
+
+ return excl;
+}
+
+/*
+ * Retrieve the _possible_ original mask to which @mask is
+ * exclusive. Ie: this is the opposite of exclusive_mask().
+ * Note that 2 possible original bits can match an exclusive
+ * bit: one has LOCK_USAGE_READ_MASK set, the other has it
+ * cleared. So both are returned for each exclusive bit.
+ */
+static unsigned long original_mask(unsigned long mask)
+{
+ unsigned long excl = invert_dir_mask(mask);
+
+ /* Include read in existing usages */
+ excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK;
+
+ return excl;
+}
+
+/*
+ * Find the first pair of bit match between an original
+ * usage mask and an exclusive usage mask.
+ */
+static int find_exclusive_match(unsigned long mask,
+ unsigned long excl_mask,
+ enum lock_usage_bit *bitp,
+ enum lock_usage_bit *excl_bitp)
+{
+ int bit, excl;
+
+ for_each_set_bit(bit, &mask, LOCK_USED) {
+ excl = exclusive_bit(bit);
+ if (excl_mask & lock_flag(excl)) {
+ *bitp = bit;
+ *excl_bitp = excl;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+/*
+ * Prove that the new dependency does not connect a hardirq-safe(-read)
+ * lock with a hardirq-unsafe lock - to achieve this we search
+ * the backwards-subgraph starting at <prev>, and the
+ * forwards-subgraph starting at <next>:
+ */
static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
- struct held_lock *next, enum lock_usage_bit bit)
+ struct held_lock *next)
{
+ unsigned long usage_mask = 0, forward_mask, backward_mask;
+ enum lock_usage_bit forward_bit = 0, backward_bit = 0;
+ struct lock_list *uninitialized_var(target_entry1);
+ struct lock_list *uninitialized_var(target_entry);
+ struct lock_list this, that;
+ int ret;
+
/*
- * Prove that the new dependency does not connect a hardirq-safe
- * lock with a hardirq-unsafe lock - to achieve this we search
- * the backwards-subgraph starting at <prev>, and the
- * forwards-subgraph starting at <next>:
+ * Step 1: gather all hard/soft IRQs usages backward in an
+ * accumulated usage mask.
*/
- if (!check_usage(curr, prev, next, bit,
- exclusive_bit(bit), state_name(bit)))
- return 0;
+ this.parent = NULL;
+ this.class = hlock_class(prev);
+
+ ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
+ if (ret < 0)
+ return print_bfs_bug(ret);
- bit++; /* _READ */
+ usage_mask &= LOCKF_USED_IN_IRQ_ALL;
+ if (!usage_mask)
+ return 1;
/*
- * Prove that the new dependency does not connect a hardirq-safe-read
- * lock with a hardirq-unsafe lock - to achieve this we search
- * the backwards-subgraph starting at <prev>, and the
- * forwards-subgraph starting at <next>:
+ * Step 2: find exclusive uses forward that match the previous
+ * backward accumulated mask.
*/
- if (!check_usage(curr, prev, next, bit,
- exclusive_bit(bit), state_name(bit)))
- return 0;
+ forward_mask = exclusive_mask(usage_mask);
- return 1;
-}
+ that.parent = NULL;
+ that.class = hlock_class(next);
-static int
-check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
- struct held_lock *next)
-{
-#define LOCKDEP_STATE(__STATE) \
- if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
- return 0;
-#include "lockdep_states.h"
-#undef LOCKDEP_STATE
+ ret = find_usage_forwards(&that, forward_mask, &target_entry1);
+ if (ret < 0)
+ return print_bfs_bug(ret);
+ if (ret == 1)
+ return ret;
- return 1;
+ /*
+ * Step 3: we found a bad match! Now retrieve a lock from the backward
+ * list whose usage mask matches the exclusive usage mask from the
+ * lock found on the forward list.
+ */
+ backward_mask = original_mask(target_entry1->class->usage_mask);
+
+ ret = find_usage_backwards(&this, backward_mask, &target_entry);
+ if (ret < 0)
+ return print_bfs_bug(ret);
+ if (DEBUG_LOCKS_WARN_ON(ret == 1))
+ return 1;
+
+ /*
+ * Step 4: narrow down to a pair of incompatible usage bits
+ * and report it.
+ */
+ ret = find_exclusive_match(target_entry->class->usage_mask,
+ target_entry1->class->usage_mask,
+ &backward_bit, &forward_bit);
+ if (DEBUG_LOCKS_WARN_ON(ret == -1))
+ return 1;
+
+ return print_bad_irq_dependency(curr, &this, &that,
+ target_entry, target_entry1,
+ prev, next,
+ backward_bit, forward_bit,
+ state_name(backward_bit));
}
static void inc_chains(void)
@@ -2030,9 +2128,8 @@ static void inc_chains(void)
#else
-static inline int
-check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
- struct held_lock *next)
+static inline int check_irq_usage(struct task_struct *curr,
+ struct held_lock *prev, struct held_lock *next)
{
return 1;
}
@@ -2211,7 +2308,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
else if (unlikely(ret < 0))
return print_bfs_bug(ret);
- if (!check_prev_add_irq(curr, prev, next))
+ if (!check_irq_usage(curr, prev, next))
return 0;
/*
@@ -2773,6 +2870,12 @@ static void check_chain_key(struct task_struct *curr)
#endif
}
+static int mark_lock(struct task_struct *curr, struct held_lock *this,
+ enum lock_usage_bit new_bit);
+
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+
+
static void
print_usage_bug_scenario(struct held_lock *lock)
{
@@ -2842,10 +2945,6 @@ valid_state(struct task_struct *curr, struct held_lock *this,
return 1;
}
-static int mark_lock(struct task_struct *curr, struct held_lock *this,
- enum lock_usage_bit new_bit);
-
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* print irq inversion bug:
@@ -2925,7 +3024,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
root.parent = NULL;
root.class = hlock_class(this);
- ret = find_usage_forwards(&root, bit, &target_entry);
+ ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
@@ -2949,7 +3048,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
root.parent = NULL;
root.class = hlock_class(this);
- ret = find_usage_backwards(&root, bit, &target_entry);
+ ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
if (ret < 0)
return print_bfs_bug(ret);
if (ret == 1)
@@ -3004,7 +3103,7 @@ static int (*state_verbose_f[])(struct lock_class *class) = {
static inline int state_verbose(enum lock_usage_bit bit,
struct lock_class *class)
{
- return state_verbose_f[bit >> 2](class);
+ return state_verbose_f[bit >> LOCK_USAGE_DIR_MASK](class);
}
typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
@@ -3146,7 +3245,7 @@ void lockdep_hardirqs_on(unsigned long ip)
/*
* See the fine text that goes along with this variable definition.
*/
- if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
+ if (DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled))
return;
/*
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index d4c197425f68..150ec3f0c5b5 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -42,13 +42,35 @@ enum {
__LOCKF(USED)
};
-#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
-#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
+#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE |
+static const unsigned long LOCKF_ENABLED_IRQ =
+#include "lockdep_states.h"
+ 0;
+#undef LOCKDEP_STATE
+
+#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE |
+static const unsigned long LOCKF_USED_IN_IRQ =
+#include "lockdep_states.h"
+ 0;
+#undef LOCKDEP_STATE
+
+#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE##_READ |
+static const unsigned long LOCKF_ENABLED_IRQ_READ =
+#include "lockdep_states.h"
+ 0;
+#undef LOCKDEP_STATE
+
+#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE##_READ |
+static const unsigned long LOCKF_USED_IN_IRQ_READ =
+#include "lockdep_states.h"
+ 0;
+#undef LOCKDEP_STATE
+
+#define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ)
+#define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ)
-#define LOCKF_ENABLED_IRQ_READ \
- (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
-#define LOCKF_USED_IN_IRQ_READ \
- (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
+#define LOCKF_IRQ (LOCKF_ENABLED_IRQ | LOCKF_USED_IN_IRQ)
+#define LOCKF_IRQ_READ (LOCKF_ENABLED_IRQ_READ | LOCKF_USED_IN_IRQ_READ)
/*
* CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 883cf1b92d90..f17dad99eec8 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -7,6 +7,8 @@
#include <linux/sched.h>
#include <linux/errno.h>
+#include "rwsem.h"
+
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *rwsem_key)
{
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 5e9247dc2515..e14b32c69639 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* 0,1,0 -> 0,0,1
*/
clear_pending_set_locked(lock);
- qstat_inc(qstat_lock_pending, true);
+ lockevent_inc(lock_pending);
return;
/*
@@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* queuing.
*/
queue:
- qstat_inc(qstat_lock_slowpath, true);
+ lockevent_inc(lock_slowpath);
pv_queue:
node = this_cpu_ptr(&qnodes[0].mcs);
idx = node->count++;
@@ -419,7 +419,7 @@ pv_queue:
* simple enough.
*/
if (unlikely(idx >= MAX_NODES)) {
- qstat_inc(qstat_lock_no_node, true);
+ lockevent_inc(lock_no_node);
while (!queued_spin_trylock(lock))
cpu_relax();
goto release;
@@ -430,7 +430,7 @@ pv_queue:
/*
* Keep counts of non-zero index values:
*/
- qstat_inc(qstat_lock_use_node2 + idx - 1, idx);
+ lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
/*
* Ensure that we increment the head node->count before initialising
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 8f36c27c1794..89bab079e7a4 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
if (!(val & _Q_LOCKED_PENDING_MASK) &&
(cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
- qstat_inc(qstat_pv_lock_stealing, true);
+ lockevent_inc(pv_lock_stealing);
return true;
}
if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK))
@@ -219,7 +219,7 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
hopcnt++;
if (!cmpxchg(&he->lock, NULL, lock)) {
WRITE_ONCE(he->node, node);
- qstat_hop(hopcnt);
+ lockevent_pv_hop(hopcnt);
return &he->lock;
}
}
@@ -320,8 +320,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
smp_store_mb(pn->state, vcpu_halted);
if (!READ_ONCE(node->locked)) {
- qstat_inc(qstat_pv_wait_node, true);
- qstat_inc(qstat_pv_wait_early, wait_early);
+ lockevent_inc(pv_wait_node);
+ lockevent_cond_inc(pv_wait_early, wait_early);
pv_wait(&pn->state, vcpu_halted);
}
@@ -339,7 +339,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
* So it is better to spin for a while in the hope that the
* MCS lock will be released soon.
*/
- qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked));
+ lockevent_cond_inc(pv_spurious_wakeup,
+ !READ_ONCE(node->locked));
}
/*
@@ -416,7 +417,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
/*
* Tracking # of slowpath locking operations
*/
- qstat_inc(qstat_lock_slowpath, true);
+ lockevent_inc(lock_slowpath);
for (;; waitcnt++) {
/*
@@ -464,8 +465,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
}
}
WRITE_ONCE(pn->state, vcpu_hashed);
- qstat_inc(qstat_pv_wait_head, true);
- qstat_inc(qstat_pv_wait_again, waitcnt);
+ lockevent_inc(pv_wait_head);
+ lockevent_cond_inc(pv_wait_again, waitcnt);
pv_wait(&lock->locked, _Q_SLOW_VAL);
/*
@@ -528,7 +529,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
* vCPU is harmless other than the additional latency in completing
* the unlock.
*/
- qstat_inc(qstat_pv_kick_unlock, true);
+ lockevent_inc(pv_kick_unlock);
pv_kick(node->cpu);
}
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index d73f85388d5c..54152670ff24 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -9,262 +9,105 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * Authors: Waiman Long <waiman.long@hpe.com>
+ * Authors: Waiman Long <longman@redhat.com>
*/
-/*
- * When queued spinlock statistical counters are enabled, the following
- * debugfs files will be created for reporting the counter values:
- *
- * <debugfs>/qlockstat/
- * pv_hash_hops - average # of hops per hashing operation
- * pv_kick_unlock - # of vCPU kicks issued at unlock time
- * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
- * pv_latency_kick - average latency (ns) of vCPU kick operation
- * pv_latency_wake - average latency (ns) from vCPU kick to wakeup
- * pv_lock_stealing - # of lock stealing operations
- * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
- * pv_wait_again - # of wait's after a queue head vCPU kick
- * pv_wait_early - # of early vCPU wait's
- * pv_wait_head - # of vCPU wait's at the queue head
- * pv_wait_node - # of vCPU wait's at a non-head queue node
- * lock_pending - # of locking operations via pending code
- * lock_slowpath - # of locking operations via MCS lock queue
- * lock_use_node2 - # of locking operations that use 2nd per-CPU node
- * lock_use_node3 - # of locking operations that use 3rd per-CPU node
- * lock_use_node4 - # of locking operations that use 4th per-CPU node
- * lock_no_node - # of locking operations without using per-CPU node
- *
- * Subtracting lock_use_node[234] from lock_slowpath will give you
- * lock_use_node1.
- *
- * Writing to the "reset_counters" file will reset all the above counter
- * values.
- *
- * These statistical counters are implemented as per-cpu variables which are
- * summed and computed whenever the corresponding debugfs files are read. This
- * minimizes added overhead making the counters usable even in a production
- * environment.
- *
- * There may be slight difference between pv_kick_wake and pv_kick_unlock.
- */
-enum qlock_stats {
- qstat_pv_hash_hops,
- qstat_pv_kick_unlock,
- qstat_pv_kick_wake,
- qstat_pv_latency_kick,
- qstat_pv_latency_wake,
- qstat_pv_lock_stealing,
- qstat_pv_spurious_wakeup,
- qstat_pv_wait_again,
- qstat_pv_wait_early,
- qstat_pv_wait_head,
- qstat_pv_wait_node,
- qstat_lock_pending,
- qstat_lock_slowpath,
- qstat_lock_use_node2,
- qstat_lock_use_node3,
- qstat_lock_use_node4,
- qstat_lock_no_node,
- qstat_num, /* Total number of statistical counters */
- qstat_reset_cnts = qstat_num,
-};
+#include "lock_events.h"
-#ifdef CONFIG_QUEUED_LOCK_STAT
+#ifdef CONFIG_LOCK_EVENT_COUNTS
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
/*
- * Collect pvqspinlock statistics
+ * Collect pvqspinlock locking event counts
*/
-#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/fs.h>
-static const char * const qstat_names[qstat_num + 1] = {
- [qstat_pv_hash_hops] = "pv_hash_hops",
- [qstat_pv_kick_unlock] = "pv_kick_unlock",
- [qstat_pv_kick_wake] = "pv_kick_wake",
- [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
- [qstat_pv_latency_kick] = "pv_latency_kick",
- [qstat_pv_latency_wake] = "pv_latency_wake",
- [qstat_pv_lock_stealing] = "pv_lock_stealing",
- [qstat_pv_wait_again] = "pv_wait_again",
- [qstat_pv_wait_early] = "pv_wait_early",
- [qstat_pv_wait_head] = "pv_wait_head",
- [qstat_pv_wait_node] = "pv_wait_node",
- [qstat_lock_pending] = "lock_pending",
- [qstat_lock_slowpath] = "lock_slowpath",
- [qstat_lock_use_node2] = "lock_use_node2",
- [qstat_lock_use_node3] = "lock_use_node3",
- [qstat_lock_use_node4] = "lock_use_node4",
- [qstat_lock_no_node] = "lock_no_node",
- [qstat_reset_cnts] = "reset_counters",
-};
+#define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev]
/*
- * Per-cpu counters
+ * PV specific per-cpu counter
*/
-static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
static DEFINE_PER_CPU(u64, pv_kick_time);
/*
- * Function to read and return the qlock statistical counter values
+ * Function to read and return the PV qspinlock counts.
*
* The following counters are handled specially:
- * 1. qstat_pv_latency_kick
+ * 1. pv_latency_kick
* Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
- * 2. qstat_pv_latency_wake
+ * 2. pv_latency_wake
* Average wake latency (ns) = pv_latency_wake/pv_kick_wake
- * 3. qstat_pv_hash_hops
+ * 3. pv_hash_hops
* Average hops/hash = pv_hash_hops/pv_kick_unlock
*/
-static ssize_t qstat_read(struct file *file, char __user *user_buf,
- size_t count, loff_t *ppos)
+ssize_t lockevent_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
{
char buf[64];
- int cpu, counter, len;
- u64 stat = 0, kicks = 0;
+ int cpu, id, len;
+ u64 sum = 0, kicks = 0;
/*
* Get the counter ID stored in file->f_inode->i_private
*/
- counter = (long)file_inode(file)->i_private;
+ id = (long)file_inode(file)->i_private;
- if (counter >= qstat_num)
+ if (id >= lockevent_num)
return -EBADF;
for_each_possible_cpu(cpu) {
- stat += per_cpu(qstats[counter], cpu);
+ sum += per_cpu(lockevents[id], cpu);
/*
- * Need to sum additional counter for some of them
+ * Need to sum additional counters for some of them
*/
- switch (counter) {
+ switch (id) {
- case qstat_pv_latency_kick:
- case qstat_pv_hash_hops:
- kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
+ case LOCKEVENT_pv_latency_kick:
+ case LOCKEVENT_pv_hash_hops:
+ kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu);
break;
- case qstat_pv_latency_wake:
- kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
+ case LOCKEVENT_pv_latency_wake:
+ kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu);
break;
}
}
- if (counter == qstat_pv_hash_hops) {
+ if (id == LOCKEVENT_pv_hash_hops) {
u64 frac = 0;
if (kicks) {
- frac = 100ULL * do_div(stat, kicks);
+ frac = 100ULL * do_div(sum, kicks);
frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
}
/*
* Return a X.XX decimal number
*/
- len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
+ len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n",
+ sum, frac);
} else {
/*
* Round to the nearest ns
*/
- if ((counter == qstat_pv_latency_kick) ||
- (counter == qstat_pv_latency_wake)) {
+ if ((id == LOCKEVENT_pv_latency_kick) ||
+ (id == LOCKEVENT_pv_latency_wake)) {
if (kicks)
- stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
+ sum = DIV_ROUND_CLOSEST_ULL(sum, kicks);
}
- len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
+ len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
}
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
}
/*
- * Function to handle write request
- *
- * When counter = reset_cnts, reset all the counter values.
- * Since the counter updates aren't atomic, the resetting is done twice
- * to make sure that the counters are very likely to be all cleared.
- */
-static ssize_t qstat_write(struct file *file, const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- int cpu;
-
- /*
- * Get the counter ID stored in file->f_inode->i_private
- */
- if ((long)file_inode(file)->i_private != qstat_reset_cnts)
- return count;
-
- for_each_possible_cpu(cpu) {
- int i;
- unsigned long *ptr = per_cpu_ptr(qstats, cpu);
-
- for (i = 0 ; i < qstat_num; i++)
- WRITE_ONCE(ptr[i], 0);
- }
- return count;
-}
-
-/*
- * Debugfs data structures
- */
-static const struct file_operations fops_qstat = {
- .read = qstat_read,
- .write = qstat_write,
- .llseek = default_llseek,
-};
-
-/*
- * Initialize debugfs for the qspinlock statistical counters
- */
-static int __init init_qspinlock_stat(void)
-{
- struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
- int i;
-
- if (!d_qstat)
- goto out;
-
- /*
- * Create the debugfs files
- *
- * As reading from and writing to the stat files can be slow, only
- * root is allowed to do the read/write to limit impact to system
- * performance.
- */
- for (i = 0; i < qstat_num; i++)
- if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
- (void *)(long)i, &fops_qstat))
- goto fail_undo;
-
- if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
- (void *)(long)qstat_reset_cnts, &fops_qstat))
- goto fail_undo;
-
- return 0;
-fail_undo:
- debugfs_remove_recursive(d_qstat);
-out:
- pr_warn("Could not create 'qlockstat' debugfs entries\n");
- return -ENOMEM;
-}
-fs_initcall(init_qspinlock_stat);
-
-/*
- * Increment the PV qspinlock statistical counters
- */
-static inline void qstat_inc(enum qlock_stats stat, bool cond)
-{
- if (cond)
- this_cpu_inc(qstats[stat]);
-}
-
-/*
* PV hash hop count
*/
-static inline void qstat_hop(int hopcnt)
+static inline void lockevent_pv_hop(int hopcnt)
{
- this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
+ this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt);
}
/*
@@ -276,7 +119,7 @@ static inline void __pv_kick(int cpu)
per_cpu(pv_kick_time, cpu) = start;
pv_kick(cpu);
- this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
+ this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start);
}
/*
@@ -289,18 +132,19 @@ static inline void __pv_wait(u8 *ptr, u8 val)
*pkick_time = 0;
pv_wait(ptr, val);
if (*pkick_time) {
- this_cpu_add(qstats[qstat_pv_latency_wake],
+ this_cpu_add(EVENT_COUNT(pv_latency_wake),
sched_clock() - *pkick_time);
- qstat_inc(qstat_pv_kick_wake, true);
+ lockevent_inc(pv_kick_wake);
}
}
#define pv_kick(c) __pv_kick(c)
#define pv_wait(p, v) __pv_wait(p, v)
-#else /* CONFIG_QUEUED_LOCK_STAT */
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+#else /* CONFIG_LOCK_EVENT_COUNTS */
-static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
-static inline void qstat_hop(int hopcnt) { }
+static inline void lockevent_pv_hop(int hopcnt) { }
-#endif /* CONFIG_QUEUED_LOCK_STAT */
+#endif /* CONFIG_LOCK_EVENT_COUNTS */
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
deleted file mode 100644
index a7ffb2a96ede..000000000000
--- a/kernel/locking/rwsem-spinlock.c
+++ /dev/null
@@ -1,339 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
- * generic spinlock implementation
- *
- * Copyright (c) 2001 David Howells (dhowells@redhat.com).
- * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
- * - Derived also from comments by Linus
- */
-#include <linux/rwsem.h>
-#include <linux/sched/signal.h>
-#include <linux/sched/debug.h>
-#include <linux/export.h>
-
-enum rwsem_waiter_type {
- RWSEM_WAITING_FOR_WRITE,
- RWSEM_WAITING_FOR_READ
-};
-
-struct rwsem_waiter {
- struct list_head list;
- struct task_struct *task;
- enum rwsem_waiter_type type;
-};
-
-int rwsem_is_locked(struct rw_semaphore *sem)
-{
- int ret = 1;
- unsigned long flags;
-
- if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
- ret = (sem->count != 0);
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- }
- return ret;
-}
-EXPORT_SYMBOL(rwsem_is_locked);
-
-/*
- * initialise the semaphore
- */
-void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /*
- * Make sure we are not reinitializing a held semaphore:
- */
- debug_check_no_locks_freed((void *)sem, sizeof(*sem));
- lockdep_init_map(&sem->dep_map, name, key, 0);
-#endif
- sem->count = 0;
- raw_spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-EXPORT_SYMBOL(__init_rwsem);
-
-/*
- * handle the lock release when processes blocked on it that can now run
- * - if we come here, then:
- * - the 'active count' _reached_ zero
- * - the 'waiting count' is non-zero
- * - the spinlock must be held by the caller
- * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if wakewrite is non-zero
- */
-static inline struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
-{
- struct rwsem_waiter *waiter;
- struct task_struct *tsk;
- int woken;
-
- waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-
- if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
- if (wakewrite)
- /* Wake up a writer. Note that we do not grant it the
- * lock - it will have to acquire it when it runs. */
- wake_up_process(waiter->task);
- goto out;
- }
-
- /* grant an infinite number of read locks to the front of the queue */
- woken = 0;
- do {
- struct list_head *next = waiter->list.next;
-
- list_del(&waiter->list);
- tsk = waiter->task;
- /*
- * Make sure we do not wakeup the next reader before
- * setting the nil condition to grant the next reader;
- * otherwise we could miss the wakeup on the other
- * side and end up sleeping again. See the pairing
- * in rwsem_down_read_failed().
- */
- smp_mb();
- waiter->task = NULL;
- wake_up_process(tsk);
- put_task_struct(tsk);
- woken++;
- if (next == &sem->wait_list)
- break;
- waiter = list_entry(next, struct rwsem_waiter, list);
- } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
- sem->count += woken;
-
- out:
- return sem;
-}
-
-/*
- * wake a single writer
- */
-static inline struct rw_semaphore *
-__rwsem_wake_one_writer(struct rw_semaphore *sem)
-{
- struct rwsem_waiter *waiter;
-
- waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
- wake_up_process(waiter->task);
-
- return sem;
-}
-
-/*
- * get a read lock on the semaphore
- */
-int __sched __down_read_common(struct rw_semaphore *sem, int state)
-{
- struct rwsem_waiter waiter;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
- if (sem->count >= 0 && list_empty(&sem->wait_list)) {
- /* granted */
- sem->count++;
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- goto out;
- }
-
- /* set up my own style of waitqueue */
- waiter.task = current;
- waiter.type = RWSEM_WAITING_FOR_READ;
- get_task_struct(current);
-
- list_add_tail(&waiter.list, &sem->wait_list);
-
- /* wait to be given the lock */
- for (;;) {
- if (!waiter.task)
- break;
- if (signal_pending_state(state, current))
- goto out_nolock;
- set_current_state(state);
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- schedule();
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
- }
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- out:
- return 0;
-
-out_nolock:
- /*
- * We didn't take the lock, so that there is a writer, which
- * is owner or the first waiter of the sem. If it's a waiter,
- * it will be woken by current owner. Not need to wake anybody.
- */
- list_del(&waiter.list);
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- return -EINTR;
-}
-
-void __sched __down_read(struct rw_semaphore *sem)
-{
- __down_read_common(sem, TASK_UNINTERRUPTIBLE);
-}
-
-int __sched __down_read_killable(struct rw_semaphore *sem)
-{
- return __down_read_common(sem, TASK_KILLABLE);
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-int __down_read_trylock(struct rw_semaphore *sem)
-{
- unsigned long flags;
- int ret = 0;
-
-
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
- if (sem->count >= 0 && list_empty(&sem->wait_list)) {
- /* granted */
- sem->count++;
- ret = 1;
- }
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- return ret;
-}
-
-/*
- * get a write lock on the semaphore
- */
-int __sched __down_write_common(struct rw_semaphore *sem, int state)
-{
- struct rwsem_waiter waiter;
- unsigned long flags;
- int ret = 0;
-
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
- /* set up my own style of waitqueue */
- waiter.task = current;
- waiter.type = RWSEM_WAITING_FOR_WRITE;
- list_add_tail(&waiter.list, &sem->wait_list);
-
- /* wait for someone to release the lock */
- for (;;) {
- /*
- * That is the key to support write lock stealing: allows the
- * task already on CPU to get the lock soon rather than put
- * itself into sleep and waiting for system woke it or someone
- * else in the head of the wait list up.
- */
- if (sem->count == 0)
- break;
- if (signal_pending_state(state, current))
- goto out_nolock;
-
- set_current_state(state);
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- schedule();
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
- }
- /* got the lock */
- sem->count = -1;
- list_del(&waiter.list);
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- return ret;
-
-out_nolock:
- list_del(&waiter.list);
- if (!list_empty(&sem->wait_list) && sem->count >= 0)
- __rwsem_do_wake(sem, 0);
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- return -EINTR;
-}
-
-void __sched __down_write(struct rw_semaphore *sem)
-{
- __down_write_common(sem, TASK_UNINTERRUPTIBLE);
-}
-
-int __sched __down_write_killable(struct rw_semaphore *sem)
-{
- return __down_write_common(sem, TASK_KILLABLE);
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-int __down_write_trylock(struct rw_semaphore *sem)
-{
- unsigned long flags;
- int ret = 0;
-
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
- if (sem->count == 0) {
- /* got the lock */
- sem->count = -1;
- ret = 1;
- }
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
- return ret;
-}
-
-/*
- * release a read lock on the semaphore
- */
-void __up_read(struct rw_semaphore *sem)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
- if (--sem->count == 0 && !list_empty(&sem->wait_list))
- sem = __rwsem_wake_one_writer(sem);
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-/*
- * release a write lock on the semaphore
- */
-void __up_write(struct rw_semaphore *sem)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
- sem->count = 0;
- if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 1);
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
-/*
- * downgrade a write lock into a read lock
- * - just wake up any readers at the front of the queue
- */
-void __downgrade_write(struct rw_semaphore *sem)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&sem->wait_lock, flags);
-
- sem->count = 1;
- if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, 0);
-
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-}
-
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index fbe96341beee..6b3ee9948bf1 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
* will notice the queued writer.
*/
wake_q_add(wake_q, waiter->task);
+ lockevent_inc(rwsem_wake_writer);
}
return;
@@ -176,9 +177,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
goto try_reader_grant;
}
/*
- * It is not really necessary to set it to reader-owned here,
- * but it gives the spinners an early indication that the
- * readers now have the lock.
+ * Set it to reader-owned to give spinners an early
+ * indication that readers now have the lock.
*/
__rwsem_set_reader_owned(sem, waiter->task);
}
@@ -215,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
}
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+ lockevent_cond_inc(rwsem_wake_reader, woken);
if (list_empty(&sem->wait_list)) {
/* hit end of list above */
adjustment -= RWSEM_WAITING_BIAS;
@@ -225,92 +226,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
}
/*
- * Wait for the read lock to be granted
- */
-static inline struct rw_semaphore __sched *
-__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
-{
- long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
- struct rwsem_waiter waiter;
- DEFINE_WAKE_Q(wake_q);
-
- waiter.task = current;
- waiter.type = RWSEM_WAITING_FOR_READ;
-
- raw_spin_lock_irq(&sem->wait_lock);
- if (list_empty(&sem->wait_list)) {
- /*
- * In case the wait queue is empty and the lock isn't owned
- * by a writer, this reader can exit the slowpath and return
- * immediately as its RWSEM_ACTIVE_READ_BIAS has already
- * been set in the count.
- */
- if (atomic_long_read(&sem->count) >= 0) {
- raw_spin_unlock_irq(&sem->wait_lock);
- return sem;
- }
- adjustment += RWSEM_WAITING_BIAS;
- }
- list_add_tail(&waiter.list, &sem->wait_list);
-
- /* we're now waiting on the lock, but no longer actively locking */
- count = atomic_long_add_return(adjustment, &sem->count);
-
- /*
- * If there are no active locks, wake the front queued process(es).
- *
- * If there are no writers and we are first in the queue,
- * wake our own waiter to join the existing active readers !
- */
- if (count == RWSEM_WAITING_BIAS ||
- (count > RWSEM_WAITING_BIAS &&
- adjustment != -RWSEM_ACTIVE_READ_BIAS))
- __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
-
- raw_spin_unlock_irq(&sem->wait_lock);
- wake_up_q(&wake_q);
-
- /* wait to be given the lock */
- while (true) {
- set_current_state(state);
- if (!waiter.task)
- break;
- if (signal_pending_state(state, current)) {
- raw_spin_lock_irq(&sem->wait_lock);
- if (waiter.task)
- goto out_nolock;
- raw_spin_unlock_irq(&sem->wait_lock);
- break;
- }
- schedule();
- }
-
- __set_current_state(TASK_RUNNING);
- return sem;
-out_nolock:
- list_del(&waiter.list);
- if (list_empty(&sem->wait_list))
- atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
- raw_spin_unlock_irq(&sem->wait_lock);
- __set_current_state(TASK_RUNNING);
- return ERR_PTR(-EINTR);
-}
-
-__visible struct rw_semaphore * __sched
-rwsem_down_read_failed(struct rw_semaphore *sem)
-{
- return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
-}
-EXPORT_SYMBOL(rwsem_down_read_failed);
-
-__visible struct rw_semaphore * __sched
-rwsem_down_read_failed_killable(struct rw_semaphore *sem)
-{
- return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
-}
-EXPORT_SYMBOL(rwsem_down_read_failed_killable);
-
-/*
* This function must be called with the sem->wait_lock held to prevent
* race conditions between checking the rwsem wait list and setting the
* sem->count accordingly.
@@ -346,21 +261,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
*/
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{
- long old, count = atomic_long_read(&sem->count);
-
- while (true) {
- if (!(count == 0 || count == RWSEM_WAITING_BIAS))
- return false;
+ long count = atomic_long_read(&sem->count);
- old = atomic_long_cmpxchg_acquire(&sem->count, count,
- count + RWSEM_ACTIVE_WRITE_BIAS);
- if (old == count) {
+ while (!count || count == RWSEM_WAITING_BIAS) {
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
+ count + RWSEM_ACTIVE_WRITE_BIAS)) {
rwsem_set_owner(sem);
+ lockevent_inc(rwsem_opt_wlock);
return true;
}
-
- count = old;
}
+ return false;
}
static inline bool owner_on_cpu(struct task_struct *owner)
@@ -481,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
osq_unlock(&sem->osq);
done:
preempt_enable();
+ lockevent_cond_inc(rwsem_opt_fail, !taken);
return taken;
}
@@ -505,6 +417,97 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
#endif
/*
+ * Wait for the read lock to be granted
+ */
+static inline struct rw_semaphore __sched *
+__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
+{
+ long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
+ struct rwsem_waiter waiter;
+ DEFINE_WAKE_Q(wake_q);
+
+ waiter.task = current;
+ waiter.type = RWSEM_WAITING_FOR_READ;
+
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (list_empty(&sem->wait_list)) {
+ /*
+ * In case the wait queue is empty and the lock isn't owned
+ * by a writer, this reader can exit the slowpath and return
+ * immediately as its RWSEM_ACTIVE_READ_BIAS has already
+ * been set in the count.
+ */
+ if (atomic_long_read(&sem->count) >= 0) {
+ raw_spin_unlock_irq(&sem->wait_lock);
+ rwsem_set_reader_owned(sem);
+ lockevent_inc(rwsem_rlock_fast);
+ return sem;
+ }
+ adjustment += RWSEM_WAITING_BIAS;
+ }
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we're now waiting on the lock, but no longer actively locking */
+ count = atomic_long_add_return(adjustment, &sem->count);
+
+ /*
+ * If there are no active locks, wake the front queued process(es).
+ *
+ * If there are no writers and we are first in the queue,
+ * wake our own waiter to join the existing active readers !
+ */
+ if (count == RWSEM_WAITING_BIAS ||
+ (count > RWSEM_WAITING_BIAS &&
+ adjustment != -RWSEM_ACTIVE_READ_BIAS))
+ __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+
+ raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
+
+ /* wait to be given the lock */
+ while (true) {
+ set_current_state(state);
+ if (!waiter.task)
+ break;
+ if (signal_pending_state(state, current)) {
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (waiter.task)
+ goto out_nolock;
+ raw_spin_unlock_irq(&sem->wait_lock);
+ break;
+ }
+ schedule();
+ lockevent_inc(rwsem_sleep_reader);
+ }
+
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(rwsem_rlock);
+ return sem;
+out_nolock:
+ list_del(&waiter.list);
+ if (list_empty(&sem->wait_list))
+ atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(rwsem_rlock_fail);
+ return ERR_PTR(-EINTR);
+}
+
+__visible struct rw_semaphore * __sched
+rwsem_down_read_failed(struct rw_semaphore *sem)
+{
+ return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(rwsem_down_read_failed);
+
+__visible struct rw_semaphore * __sched
+rwsem_down_read_failed_killable(struct rw_semaphore *sem)
+{
+ return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(rwsem_down_read_failed_killable);
+
+/*
* Wait until we successfully acquire the write lock
*/
static inline struct rw_semaphore *
@@ -580,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
goto out_nolock;
schedule();
+ lockevent_inc(rwsem_sleep_writer);
set_current_state(state);
} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
@@ -588,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
__set_current_state(TASK_RUNNING);
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
+ lockevent_inc(rwsem_wlock);
return ret;
@@ -601,6 +606,7 @@ out_nolock:
__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
wake_up_q(&wake_q);
+ lockevent_inc(rwsem_wlock_fail);
return ERR_PTR(-EINTR);
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index e586f0d03ad3..ccbf18f560ff 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem)
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
- rwsem_set_reader_owned(sem);
}
EXPORT_SYMBOL(down_read);
@@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
return -EINTR;
}
- rwsem_set_reader_owned(sem);
return 0;
}
@@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem)
{
int ret = __down_read_trylock(sem);
- if (ret == 1) {
+ if (ret == 1)
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
- rwsem_set_reader_owned(sem);
- }
return ret;
}
@@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem)
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
- rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write);
@@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem)
return -EINTR;
}
- rwsem_set_owner(sem);
return 0;
}
@@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem)
{
int ret = __down_write_trylock(sem);
- if (ret == 1) {
+ if (ret == 1)
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
- rwsem_set_owner(sem);
- }
return ret;
}
@@ -117,9 +109,7 @@ EXPORT_SYMBOL(down_write_trylock);
void up_read(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
- DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
- rwsem_clear_reader_owned(sem);
__up_read(sem);
}
@@ -131,9 +121,7 @@ EXPORT_SYMBOL(up_read);
void up_write(struct rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, 1, _RET_IP_);
- DEBUG_RWSEMS_WARN_ON(sem->owner != current);
- rwsem_clear_owner(sem);
__up_write(sem);
}
@@ -145,9 +133,7 @@ EXPORT_SYMBOL(up_write);
void downgrade_write(struct rw_semaphore *sem)
{
lock_downgrade(&sem->dep_map, _RET_IP_);
- DEBUG_RWSEMS_WARN_ON(sem->owner != current);
- rwsem_set_reader_owned(sem);
__downgrade_write(sem);
}
@@ -161,7 +147,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
- rwsem_set_reader_owned(sem);
}
EXPORT_SYMBOL(down_read_nested);
@@ -172,7 +157,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
- rwsem_set_owner(sem);
}
EXPORT_SYMBOL(_down_write_nest_lock);
@@ -193,7 +177,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
- rwsem_set_owner(sem);
}
EXPORT_SYMBOL(down_write_nested);
@@ -208,7 +191,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
return -EINTR;
}
- rwsem_set_owner(sem);
return 0;
}
@@ -216,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested);
void up_read_non_owner(struct rw_semaphore *sem)
{
- DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
+ DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
+ sem);
__up_read(sem);
}
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index bad2bca0268b..64877f5294e3 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -23,15 +23,44 @@
* is involved. Ideally we would like to track all the readers that own
* a rwsem, but the overhead is simply too big.
*/
+#include "lock_events.h"
+
#define RWSEM_READER_OWNED (1UL << 0)
#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
#ifdef CONFIG_DEBUG_RWSEMS
-# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c)
+# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
+ if (!debug_locks_silent && \
+ WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
+ #c, atomic_long_read(&(sem)->count), \
+ (long)((sem)->owner), (long)current, \
+ list_empty(&(sem)->wait_list) ? "" : "not ")) \
+ debug_locks_off(); \
+ } while (0)
+#else
+# define DEBUG_RWSEMS_WARN_ON(c, sem)
+#endif
+
+/*
+ * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
+ * Adapted largely from include/asm-i386/rwsem.h
+ * by Paul Mackerras <paulus@samba.org>.
+ */
+
+/*
+ * the semaphore definition
+ */
+#ifdef CONFIG_64BIT
+# define RWSEM_ACTIVE_MASK 0xffffffffL
#else
-# define DEBUG_RWSEMS_WARN_ON(c)
+# define RWSEM_ACTIVE_MASK 0x0000ffffL
#endif
+#define RWSEM_ACTIVE_BIAS 0x00000001L
+#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
+#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
/*
* All writes to owner are protected by WRITE_ONCE() to make sure that
@@ -132,3 +161,144 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{
}
#endif
+
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
+
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+ if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
+ rwsem_down_read_failed(sem);
+ DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
+ RWSEM_READER_OWNED), sem);
+ } else {
+ rwsem_set_reader_owned(sem);
+ }
+}
+
+static inline int __down_read_killable(struct rw_semaphore *sem)
+{
+ if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
+ if (IS_ERR(rwsem_down_read_failed_killable(sem)))
+ return -EINTR;
+ DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
+ RWSEM_READER_OWNED), sem);
+ } else {
+ rwsem_set_reader_owned(sem);
+ }
+ return 0;
+}
+
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+ /*
+ * Optimize for the case when the rwsem is not locked at all.
+ */
+ long tmp = RWSEM_UNLOCKED_VALUE;
+
+ lockevent_inc(rwsem_rtrylock);
+ do {
+ if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
+ tmp + RWSEM_ACTIVE_READ_BIAS)) {
+ rwsem_set_reader_owned(sem);
+ return 1;
+ }
+ } while (tmp >= 0);
+ return 0;
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write(struct rw_semaphore *sem)
+{
+ long tmp;
+
+ tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
+ &sem->count);
+ if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+ rwsem_down_write_failed(sem);
+ rwsem_set_owner(sem);
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+ long tmp;
+
+ tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
+ &sem->count);
+ if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+ if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+ return -EINTR;
+ rwsem_set_owner(sem);
+ return 0;
+}
+
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+ long tmp;
+
+ lockevent_inc(rwsem_wtrylock);
+ tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS);
+ if (tmp == RWSEM_UNLOCKED_VALUE) {
+ rwsem_set_owner(sem);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+ long tmp;
+
+ DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
+ sem);
+ rwsem_clear_reader_owned(sem);
+ tmp = atomic_long_dec_return_release(&sem->count);
+ if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
+ rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+ DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
+ rwsem_clear_owner(sem);
+ if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
+ &sem->count) < 0))
+ rwsem_wake(sem);
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+ long tmp;
+
+ /*
+ * When downgrading from exclusive to shared ownership,
+ * anything inside the write-locked region cannot leak
+ * into the read side. In contrast, anything in the
+ * read-locked region is ok to be re-ordered into the
+ * write side. As such, rely on RELEASE semantics.
+ */
+ DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
+ tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
+ rwsem_set_reader_owned(sem);
+ if (tmp < 0)
+ rwsem_downgrade_wake(sem);
+}