diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-25 12:41:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-25 12:41:29 -0700 |
commit | c86ad14d305d2429c3da19462440bac50c183def (patch) | |
tree | bd794cd72476661faf82c440063c217bb978ce44 /arch | |
parent | a2303849a6b4b7ba59667091e00d6bb194071d9a (diff) | |
parent | f06628638cf6e75f179742b6c1b35076965b9fdd (diff) |
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
"The locking tree was busier in this cycle than the usual pattern - a
couple of major projects happened to coincide.
The main changes are:
- implement the atomic_fetch_{add,sub,and,or,xor}() API natively
across all SMP architectures (Peter Zijlstra)
- add atomic_fetch_{inc/dec}() as well, using the generic primitives
(Davidlohr Bueso)
- optimize various aspects of rwsems (Jason Low, Davidlohr Bueso,
Waiman Long)
- optimize smp_cond_load_acquire() on arm64 and implement LSE based
atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
on arm64 (Will Deacon)
- introduce smp_acquire__after_ctrl_dep() and fix various barrier
mis-uses and bugs (Peter Zijlstra)
- after discovering ancient spin_unlock_wait() barrier bugs in its
implementation and usage, strengthen its semantics and update/fix
usage sites (Peter Zijlstra)
- optimize mutex_trylock() fastpath (Peter Zijlstra)
- ... misc fixes and cleanups"
* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (67 commits)
locking/atomic: Introduce inc/dec variants for the atomic_fetch_$op() API
locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire()
locking/static_keys: Fix non static symbol Sparse warning
locking/qspinlock: Use __this_cpu_dec() instead of full-blown this_cpu_dec()
locking/atomic, arch/tile: Fix tilepro build
locking/atomic, arch/m68k: Remove comment
locking/atomic, arch/arc: Fix build
locking/Documentation: Clarify limited control-dependency scope
locking/atomic, arch/rwsem: Employ atomic_long_fetch_add()
locking/atomic, arch/qrwlock: Employ atomic_fetch_add_acquire()
locking/atomic, arch/mips: Convert to _relaxed atomics
locking/atomic, arch/alpha: Convert to _relaxed atomics
locking/atomic: Remove the deprecated atomic_{set,clear}_mask() functions
locking/atomic: Remove linux/atomic.h:atomic_fetch_or()
locking/atomic: Implement atomic{,64,_long}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
locking/atomic: Fix atomic64_relaxed() bits
locking/atomic, arch/xtensa: Implement atomic_fetch_{add,sub,and,or,xor}()
locking/atomic, arch/x86: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()
locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()
locking/atomic, arch/sparc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()
...
Diffstat (limited to 'arch')
72 files changed, 2063 insertions, 609 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 572b228c44c7..498933a7df97 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ } \ #define ATOMIC_OP_RETURN(op, asm_op) \ -static inline int atomic_##op##_return(int i, atomic_t *v) \ +static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldl_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -61,7 +60,23 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ + return result; \ +} + +#define ATOMIC_FETCH_OP(op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + long temp, result; \ + __asm__ __volatile__( \ + "1: ldl_l %2,%1\n" \ + " " #asm_op " %2,%3,%0\n" \ + " stl_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ return result; \ } @@ -82,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } \ #define ATOMIC64_OP_RETURN(op, asm_op) \ -static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ { \ long temp, result; \ - smp_mb(); \ __asm__ __volatile__( \ "1: ldq_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -97,34 +111,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_mb(); \ + return result; \ +} + +#define ATOMIC64_FETCH_OP(op, asm_op) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +{ \ + long temp, result; \ + __asm__ __volatile__( \ + "1: ldq_l %2,%1\n" \ + " " #asm_op " %2,%3,%0\n" \ + " stq_c %0,%1\n" \ + " beq %0,2f\n" \ + ".subsection 2\n" \ + "2: br 1b\n" \ + ".previous" \ + :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ + :"Ir" (i), "m" (v->counter) : "memory"); \ return result; \ } #define ATOMIC_OPS(op) \ ATOMIC_OP(op, op##l) \ ATOMIC_OP_RETURN(op, op##l) \ + ATOMIC_FETCH_OP(op, op##l) \ ATOMIC64_OP(op, op##q) \ - ATOMIC64_OP_RETURN(op, op##q) + ATOMIC64_OP_RETURN(op, op##q) \ + ATOMIC64_FETCH_OP(op, op##q) ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, bis) -ATOMIC_OP(xor, xor) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, bis) -ATOMIC64_OP(xor, xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, asm) \ + ATOMIC_OP(op, asm) \ + ATOMIC_FETCH_OP(op, asm) \ + ATOMIC64_OP(op, asm) \ + ATOMIC64_FETCH_OP(op, asm) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, bis) +ATOMIC_OPS(xor, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 0131a7058778..77873d0ad293 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) { long old, new, res; - res = sem->count; + res = atomic_long_read(&sem->count); do { new = res + RWSEM_ACTIVE_READ_BIAS; if (new <= 0) break; old = res; - res = cmpxchg(&sem->count, old, new); + res = atomic_long_cmpxchg(&sem->count, old, new); } while (res != old); return res >= 0 ? 1 : 0; } @@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_WRITE_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS; #else long temp; __asm__ __volatile__( @@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) */ static inline int __down_write_trylock(struct rw_semaphore *sem) { - long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); if (ret == RWSEM_UNLOCKED_VALUE) return 1; @@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem) { long count; #ifndef CONFIG_SMP - sem->count -= RWSEM_ACTIVE_WRITE_BIAS; - count = sem->count; + sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS; + count = sem->count.counter; #else long temp; __asm__ __volatile__( @@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_WAITING_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_WAITING_BIAS; #else long temp; __asm__ __volatile__( @@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; -#else - long temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%2,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (sem->count) - :"Ir" (val), "m" (sem->count)); -#endif -} - -static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; - return sem->count; -#else - long ret, temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%3,%2\n" - " addq %0,%3,%0\n" - " stq_c %2,%1\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (ret), "=m" (sem->count), "=&r" (temp) - :"Ir" (val), "m" (sem->count)); - - return ret; -#endif -} - #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index fed9c6f44c19..a40b9fc0c6c3 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -3,6 +3,8 @@ #include <linux/kernel.h> #include <asm/current.h> +#include <asm/barrier.h> +#include <asm/processor.h> /* * Simple spin lock operations. There are two variants, one clears IRQ's @@ -13,8 +15,11 @@ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_is_locked(x) ((x)->lock != 0) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while ((x)->lock) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline int arch_spin_value_unlocked(arch_spinlock_t lock) { diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index dd683995bc9d..4e3c1b6b0806 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -67,6 +67,33 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int val, orig; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND thmeselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %[orig], [%[ctr]] \n" \ + " " #asm_op " %[val], %[orig], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " \n" \ + : [val] "=&r" (val), \ + [orig] "=&r" (orig) \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return orig; \ +} + #else /* !CONFIG_ARC_HAS_LLSC */ #ifndef CONFIG_SMP @@ -129,25 +156,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + unsigned long orig; \ + \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ + atomic_ops_lock(flags); \ + orig = v->counter; \ + v->counter c_op i; \ + atomic_ops_unlock(flags); \ + \ + return orig; \ +} + #endif /* !CONFIG_ARC_HAS_LLSC */ #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -ATOMIC_OP(and, &=, and) -ATOMIC_OP(andnot, &= ~, bic) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) -#undef SCOND_FAIL_RETRY_VAR_DEF -#undef SCOND_FAIL_RETRY_ASM -#undef SCOND_FAIL_RETRY_VARS +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) #else /* CONFIG_ARC_PLAT_EZNPS */ @@ -208,22 +254,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int temp = i; \ + \ + /* Explicit full memory barrier needed before/after */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + " mov r2, %0\n" \ + " mov r3, %1\n" \ + " .word %2\n" \ + " mov %0, r2" \ + : "+r"(temp) \ + : "r"(&v->counter), "i"(asm_op) \ + : "r2", "r3", "memory"); \ + \ + smp_mb(); \ + \ + return temp; \ +} + #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3) #define atomic_sub(i, v) atomic_add(-(i), (v)) #define atomic_sub_return(i, v) atomic_add_return(-(i), (v)) -ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3) #define atomic_andnot(mask, v) atomic_and(~(mask), (v)) -ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) -ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) +ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3) +ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) #endif /* CONFIG_ARC_PLAT_EZNPS */ #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index cded4a9b5438..233d5ffe6ec7 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -15,8 +15,11 @@ #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} #ifdef CONFIG_ARC_HAS_LLSC diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 9e10c4567eb4..66d0e215a773 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result, val; \ + \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic_fetch_" #op "\n" \ +"1: ldrex %0, [%4]\n" \ +" " #asm_op " %1, %0, %5\n" \ +" strex %2, %1, [%4]\n" \ +" teq %2, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "Ir" (i) \ + : "cc"); \ + \ + return result; \ +} + #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new) { @@ -159,6 +187,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return val; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int val; \ + \ + raw_local_irq_save(flags); \ + val = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return val; \ +} + static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -187,19 +229,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) #define atomic_andnot atomic_andnot -ATOMIC_OP(and, &=, and) -ATOMIC_OP(andnot, &= ~, bic) -ATOMIC_OP(or, |=, orr) -ATOMIC_OP(xor, ^=, eor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, orr) +ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -317,24 +366,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ return result; \ } +#define ATOMIC64_FETCH_OP(op, op1, op2) \ +static inline long long \ +atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \ +{ \ + long long result, val; \ + unsigned long tmp; \ + \ + prefetchw(&v->counter); \ + \ + __asm__ __volatile__("@ atomic64_fetch_" #op "\n" \ +"1: ldrexd %0, %H0, [%4]\n" \ +" " #op1 " %Q1, %Q0, %Q5\n" \ +" " #op2 " %R1, %R0, %R5\n" \ +" strexd %2, %1, %H1, [%4]\n" \ +" teq %2, #0\n" \ +" bne 1b" \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \ + : "r" (&v->counter), "r" (i) \ + : "cc"); \ + \ + return result; \ +} + #define ATOMIC64_OPS(op, op1, op2) \ ATOMIC64_OP(op, op1, op2) \ - ATOMIC64_OP_RETURN(op, op1, op2) + ATOMIC64_OP_RETURN(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) ATOMIC64_OPS(add, adds, adc) ATOMIC64_OPS(sub, subs, sbc) #define atomic64_add_return_relaxed atomic64_add_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, op1, op2) \ + ATOMIC64_OP(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) #define atomic64_andnot atomic64_andnot -ATOMIC64_OP(and, and, and) -ATOMIC64_OP(andnot, bic, bic) -ATOMIC64_OP(or, orr, orr) -ATOMIC64_OP(xor, eor, eor) +ATOMIC64_OPS(and, and, and) +ATOMIC64_OPS(andnot, bic, bic) +ATOMIC64_OPS(or, orr, orr) +ATOMIC64_OPS(xor, eor, eor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 0fa418463f49..4bec45442072 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -6,6 +6,8 @@ #endif #include <linux/prefetch.h> +#include <asm/barrier.h> +#include <asm/processor.h> /* * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K @@ -50,8 +52,21 @@ static inline void dsb_sev(void) * memory. */ -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u16 owner = READ_ONCE(lock->tickets.owner); + + for (;;) { + arch_spinlock_t tmp = READ_ONCE(*lock); + + if (tmp.tickets.owner == tmp.tickets.next || + tmp.tickets.owner != owner) + break; + + wfe(); + } + smp_acquire__after_ctrl_dep(); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index f3a3586a421c..c0235e0ff849 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -76,6 +76,36 @@ #define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) #define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_add_acquire atomic_fetch_add_acquire +#define atomic_fetch_add_release atomic_fetch_add_release +#define atomic_fetch_add atomic_fetch_add + +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed +#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#define atomic_fetch_sub_release atomic_fetch_sub_release +#define atomic_fetch_sub atomic_fetch_sub + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_and_acquire atomic_fetch_and_acquire +#define atomic_fetch_and_release atomic_fetch_and_release +#define atomic_fetch_and atomic_fetch_and + +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#define atomic_fetch_andnot atomic_fetch_andnot + +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_or_acquire atomic_fetch_or_acquire +#define atomic_fetch_or_release atomic_fetch_or_release +#define atomic_fetch_or atomic_fetch_or + +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed +#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#define atomic_fetch_xor_release atomic_fetch_xor_release +#define atomic_fetch_xor atomic_fetch_xor + #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) #define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) #define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) @@ -125,6 +155,36 @@ #define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#define atomic64_fetch_add_release atomic64_fetch_add_release +#define atomic64_fetch_add atomic64_fetch_add + +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#define atomic64_fetch_sub_release atomic64_fetch_sub_release +#define atomic64_fetch_sub atomic64_fetch_sub + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#define atomic64_fetch_and_release atomic64_fetch_and_release +#define atomic64_fetch_and atomic64_fetch_and + +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#define atomic64_fetch_andnot atomic64_fetch_andnot + +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#define atomic64_fetch_or_release atomic64_fetch_or_release +#define atomic64_fetch_or atomic64_fetch_or + +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#define atomic64_fetch_xor_release atomic64_fetch_xor_release +#define atomic64_fetch_xor atomic64_fetch_xor + #define atomic64_xchg_relaxed atomic_xchg_relaxed #define atomic64_xchg_acquire atomic_xchg_acquire #define atomic64_xchg_release atomic_xchg_release diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f61c84f6ba02..f819fdcff1ac 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ } \ __LL_SC_EXPORT(atomic_##op##_return##name); +#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int val, result; \ + \ + asm volatile("// atomic_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %w0, %3\n" \ +" " #asm_op " %w1, %w0, %w4\n" \ +" st" #rel "xr %w2, %w1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic_fetch_##op##name); + #define ATOMIC_OPS(...) \ ATOMIC_OP(__VA_ARGS__) \ - ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC_OPS_RLX(...) \ - ATOMIC_OPS(__VA_ARGS__) \ + ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\ ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\ ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\ - ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__) + ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OPS_RLX(add, add) -ATOMIC_OPS_RLX(sub, sub) +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) + +#undef ATOMIC_OPS +#define ATOMIC_OPS(...) \ + ATOMIC_OP(__VA_ARGS__) \ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, orr) -ATOMIC_OP(xor, eor) +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, orr) +ATOMIC_OPS(xor, eor) -#undef ATOMIC_OPS_RLX #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ } \ __LL_SC_EXPORT(atomic64_##op##_return##name); +#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE long \ +__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \ +{ \ + long result, val; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %0, %3\n" \ +" " #asm_op " %1, %0, %4\n" \ +" st" #rel "xr %w2, %1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic64_fetch_##op##name); + #define ATOMIC64_OPS(...) \ ATOMIC64_OP(__VA_ARGS__) \ - ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC64_OPS_RLX(...) \ - ATOMIC64_OPS(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \ ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \ ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \ - ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) + ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OPS_RLX(add, add) -ATOMIC64_OPS_RLX(sub, sub) +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(...) \ + ATOMIC64_OP(__VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, orr) -ATOMIC64_OP(xor, eor) +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(andnot, bic) +ATOMIC64_OPS(or, orr) +ATOMIC64_OPS(xor, eor) -#undef ATOMIC64_OPS_RLX #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 39c1d340fec5..b5890be8f257 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -26,54 +26,57 @@ #endif #define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) - -static inline void atomic_andnot(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), - " stclr %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \ +" " #asm_op " %w[i], %[v]\n") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic_or(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +ATOMIC_OP(andnot, stclr) +ATOMIC_OP(or, stset) +ATOMIC_OP(xor, steor) +ATOMIC_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), - " stset %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC_OP -static inline void atomic_xor(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), - " steor %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline int atomic_fetch_##op##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ } -static inline void atomic_add(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +#define ATOMIC_FETCH_OPS(op, asm_op) \ + ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC_FETCH_OP( , al, op, asm_op, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), - " stadd %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +ATOMIC_FETCH_OPS(andnot, ldclr) +ATOMIC_FETCH_OPS(or, ldset) +ATOMIC_FETCH_OPS(xor, ldeor) +ATOMIC_FETCH_OPS(add, ldadd) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_FETCH_OPS #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ @@ -119,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +static inline int atomic_fetch_and##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %w[i], %w[i]\n" \ + " ldclr" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +ATOMIC_FETCH_OP_AND(_relaxed, ) +ATOMIC_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC_FETCH_OP_AND(_release, l, "memory") +ATOMIC_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC_FETCH_OP_AND + static inline void atomic_sub(int i, atomic_t *v) { register int w0 asm ("w0") = i; @@ -164,57 +194,87 @@ ATOMIC_OP_SUB_RETURN(_release, l, "memory") ATOMIC_OP_SUB_RETURN( , al, "memory") #undef ATOMIC_OP_SUB_RETURN -#undef __LL_SC_ATOMIC - -#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) - -static inline void atomic64_andnot(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), - " stclr %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \ +static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ } -static inline void atomic64_or(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +ATOMIC_FETCH_OP_SUB(_relaxed, ) +ATOMIC_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC_FETCH_OP_SUB(_release, l, "memory") +ATOMIC_FETCH_OP_SUB( , al, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), - " stset %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#undef ATOMIC_FETCH_OP_SUB +#undef __LL_SC_ATOMIC + +#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) +#define ATOMIC64_OP(op, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \ +" " #asm_op " %[i], %[v]\n") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic64_xor(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +ATOMIC64_OP(andnot, stclr) +ATOMIC64_OP(or, stset) +ATOMIC64_OP(xor, steor) +ATOMIC64_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor), - " steor %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#undef ATOMIC64_OP + +#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC64(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ } -static inline void atomic64_add(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +#define ATOMIC64_FETCH_OPS(op, asm_op) \ + ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP( , al, op, asm_op, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add), - " stadd %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +ATOMIC64_FETCH_OPS(andnot, ldclr) +ATOMIC64_FETCH_OPS(or, ldset) +ATOMIC64_FETCH_OPS(xor, ldeor) +ATOMIC64_FETCH_OPS(add, ldadd) + +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_FETCH_OPS #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ @@ -260,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ +static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %[i], %[i]\n" \ + " ldclr" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_AND(_relaxed, ) +ATOMIC64_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC64_FETCH_OP_AND(_release, l, "memory") +ATOMIC64_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC64_FETCH_OP_AND + static inline void atomic64_sub(long i, atomic64_t *v) { register long x0 asm ("x0") = i; @@ -306,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #undef ATOMIC64_OP_SUB_RETURN +#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ +static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %[i], %[i]\n" \ + " ldadd" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_SUB(_relaxed, ) +ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC64_FETCH_OP_SUB(_release, l, "memory") +ATOMIC64_FETCH_OP_SUB( , al, "memory") + +#undef ATOMIC64_FETCH_OP_SUB + static inline long atomic64_dec_if_positive(atomic64_t *v) { register long x0 asm ("x0") = (long)v; diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index dae5c49618db..4eea7f618dce 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -91,6 +91,19 @@ do { \ __u.__val; \ }) +#define smp_cond_load_acquire(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + } \ + VAL; \ +}) + #include <asm-generic/barrier.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 510c7b404454..bd86a79491bc 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb) __ret; \ }) +#define __CMPWAIT_CASE(w, sz, name) \ +static inline void __cmpwait_case_##name(volatile void *ptr, \ + unsigned long val) \ +{ \ + unsigned long tmp; \ + \ + asm volatile( \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " cbnz %" #w "[tmp], 1f\n" \ + " wfe\n" \ + "1:" \ + : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \ + : [val] "r" (val)); \ +} + +__CMPWAIT_CASE(w, b, 1); +__CMPWAIT_CASE(w, h, 2); +__CMPWAIT_CASE(w, , 4); +__CMPWAIT_CASE( , , 8); + +#undef __CMPWAIT_CASE + +#define __CMPWAIT_GEN(sfx) \ +static inline void __cmpwait##sfx(volatile void *ptr, \ + unsigned long val, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpwait_case##sfx##_1(ptr, (u8)val); \ + case 2: \ + return __cmpwait_case##sfx##_2(ptr, (u16)val); \ + case 4: \ + return __cmpwait_case##sfx##_4(ptr, val); \ + case 8: \ + return __cmpwait_case##sfx##_8(ptr, val); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ +} + +__CMPWAIT_GEN() + +#undef __CMPWAIT_GEN + +#define __cmpwait_relaxed(ptr, val) \ + __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) + #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index d74fd8ce980a..3d5ce38a6f0b 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -41,21 +41,49 @@ static inline int __atomic_##op##_return(int i, atomic_t *v) \ return result; \ } +#define ATOMIC_FETCH_OP(op, asm_op, asm_con) \ +static inline int __atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int result, val; \ + \ + asm volatile( \ + "/* atomic_fetch_" #op " */\n" \ + "1: ssrf 5\n" \ + " ld.w %0, %3\n" \ + " mov %1, %0\n" \ + " " #asm_op " %1, %4\n" \ + " stcond %2, %1\n" \ + " brne 1b" \ + : "=&r" (result), "=&r" (val), "=o" (v->counter) \ + : "m" (v->counter), #asm_con (i) \ + : "cc"); \ + \ + return result; \ +} + ATOMIC_OP_RETURN(sub, sub, rKs21) ATOMIC_OP_RETURN(add, add, r) +ATOMIC_FETCH_OP (sub, sub, rKs21) +ATOMIC_FETCH_OP (add, add, r) -#define ATOMIC_OP(op, asm_op) \ +#define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP_RETURN(op, asm_op, r) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ (void)__atomic_##op##_return(i, v); \ +} \ +ATOMIC_FETCH_OP(op, asm_op, r) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __atomic_fetch_##op(i, v); \ } -ATOMIC_OP(and, and) -ATOMIC_OP(or, or) -ATOMIC_OP(xor, eor) +ATOMIC_OPS(and, and) +ATOMIC_OPS(or, or) +ATOMIC_OPS(xor, eor) -#undef ATOMIC_OP +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN /* @@ -87,6 +115,14 @@ static inline int atomic_add_return(int i, atomic_t *v) return __atomic_add_return(i, v); } +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + if (IS_21BIT_CONST(i)) + return __atomic_fetch_sub(-i, v); + + return __atomic_fetch_add(i, v); +} + /* * atomic_sub_return - subtract the atomic variable * @i: integer value to subtract @@ -102,6 +138,14 @@ static inline int atomic_sub_return(int i, atomic_t *v) return __atomic_add_return(-i, v); } +static inline int atomic_fetch_sub(int i, atomic_t *v) +{ + if (IS_21BIT_CONST(i)) + return __atomic_fetch_sub(i, v); + + return __atomic_fetch_add(-i, v); +} + /* * __atomic_add_unless - add unless the number is a given value * @v: pointer of type atomic_t diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index 1c1c42330c99..63c7deceeeb6 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -17,6 +17,7 @@ asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr); asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value); +asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value); asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value); asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value); @@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value); #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i) #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i)) +#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i) +#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i)) + #define atomic_or(i, v) (void)__raw_atomic_or_asm(&(v)->counter, i) #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i) #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i) +#define atomic_fetch_or(i, v) __raw_atomic_or_asm(&(v)->counter, i) +#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i) +#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i) + #endif #include <asm-generic/atomic.h> diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index 490c7caa02d9..c58f4a83ed6f 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -12,6 +12,8 @@ #else #include <linux/atomic.h> +#include <asm/processor.h> +#include <asm/barrier.h> asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr); asmlinkage void __raw_spin_lock_asm(volatile int *ptr); @@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { - while (arch_spin_is_locked(lock)) - cpu_relax(); + smp_cond_load_acquire(&lock->lock, !VAL); } static inline int arch_read_can_lock(arch_rwlock_t *rw) diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c index a401c27b69b4..68096e8f787f 100644 --- a/arch/blackfin/kernel/bfin_ksyms.c +++ b/arch/blackfin/kernel/bfin_ksyms.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16); #ifdef CONFIG_SMP EXPORT_SYMBOL(__raw_atomic_add_asm); +EXPORT_SYMBOL(__raw_atomic_xadd_asm); EXPORT_SYMBOL(__raw_atomic_and_asm); EXPORT_SYMBOL(__raw_atomic_or_asm); EXPORT_SYMBOL(__raw_atomic_xor_asm); diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S index 26fccb5568b9..1e2989c5d6b2 100644 --- a/arch/blackfin/mach-bf561/atomic.S +++ b/arch/blackfin/mach-bf561/atomic.S @@ -607,6 +607,28 @@ ENDPROC(___raw_atomic_add_asm) /* * r0 = ptr + * r1 = value + * + * ADD a signed value to a 32bit word and return the old value atomically. + * Clobbers: r3:0, p1:0 + */ +ENTRY(___raw_atomic_xadd_asm) + p1 = r0; + r3 = r1; + [--sp] = rets; + call _get_core_lock; + r3 = [p1]; + r2 = r3 + r2; + [p1] = r2; + r1 = p1; + call _put_core_lock; + r0 = r3; + rets = [sp++]; + rts; +ENDPROC(___raw_atomic_add_asm) + +/* + * r0 = ptr * r1 = mask * * AND the mask bits from a 32bit word and return the old 32bit value @@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 & r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 & r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; @@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 | r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 | r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; @@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm) r3 = r1; [--sp] = rets; call _get_core_lock; - r2 = [p1]; - r3 = r2 ^ r3; - [p1] = r3; - r3 = r2; + r3 = [p1]; + r2 = r2 ^ r3; + [p1] = r2; r1 = p1; call _put_core_lock; r0 = r3; diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 64f02d451aa8..1c2a5e264fc7 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v) return atomic_add_return(i, v) < 0; } -static inline void atomic_add(int i, atomic_t *v) -{ - atomic_add_return(i, v); -} - -static inline void atomic_sub(int i, atomic_t *v) -{ - atomic_sub_return(i, v); -} - static inline void atomic_inc(atomic_t *v) { atomic_inc_return(v); @@ -136,16 +126,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v) return atomic64_add_return(i, v) < 0; } -static inline void atomic64_add(long long i, atomic64_t *v) -{ - atomic64_add_return(i, v); -} - -static inline void atomic64_sub(long long i, atomic64_t *v) -{ - atomic64_sub_return(i, v); -} - static inline void atomic64_inc(atomic64_t *v) { atomic64_inc_return(v); @@ -182,11 +162,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) } #define ATOMIC_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __atomic32_fetch_##op(i, &v->counter); \ +} \ static inline void atomic_##op(int i, atomic_t *v) \ { \ (void)__atomic32_fetch_##op(i, &v->counter); \ } \ \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + return __atomic64_fetch_##op(i, &v->counter); \ +} \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ (void)__atomic64_fetch_##op(i, &v->counter); \ @@ -195,6 +183,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ ATOMIC_OP(or) ATOMIC_OP(and) ATOMIC_OP(xor) +ATOMIC_OP(add) +ATOMIC_OP(sub) #undef ATOMIC_OP diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h index 36e126d2f801..d4912c88b829 100644 --- a/arch/frv/include/asm/atomic_defs.h +++ b/arch/frv/include/asm/atomic_defs.h @@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op); ATOMIC_FETCH_OP(or) ATOMIC_FETCH_OP(and) ATOMIC_FETCH_OP(xor) +ATOMIC_FETCH_OP(add) +ATOMIC_FETCH_OP(sub) ATOMIC_OP_RETURN(add) ATOMIC_OP_RETURN(sub) diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 4435a445ae7e..349a47a918db 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + h8300flags flags; \ + int ret; \ + \ + flags = arch_local_irq_save(); \ + ret = v->counter; \ + v->counter c_op i; \ + arch_local_irq_restore(flags); \ + return ret; \ +} + #define ATOMIC_OP(op, c_op) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ @@ -41,17 +54,21 @@ static inline void atomic_##op(int i, atomic_t *v) \ ATOMIC_OP_RETURN(add, +=) ATOMIC_OP_RETURN(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) +ATOMIC_OPS(add, +=) +ATOMIC_OPS(sub, -=) +#undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define atomic_add(i, v) (void)atomic_add_return(i, v) #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) - -#define atomic_sub(i, v) (void)atomic_sub_return(i, v) #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) #define atomic_inc_return(v) atomic_add_return(1, v) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 55696c4100d4..a62ba368b27d 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ ); \ } \ -#define ATOMIC_OP_RETURN(op) \ +#define ATOMIC_OP_RETURN(op) \ static inline int atomic_##op##_return(int i, atomic_t *v) \ { \ int output; \ @@ -127,16 +127,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return output; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int output, val; \ + \ + __asm__ __volatile__ ( \ + "1: %0 = memw_locked(%2);\n" \ + " %1 = "#op "(%0,%3);\n" \ + " memw_locked(%2,P3)=%1;\n" \ + " if !P3 jump 1b;\n" \ + : "=&r" (output), "=&r" (val) \ + : "r" (&v->counter), "r" (i) \ + : "memory", "p3" \ + ); \ + return output; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index 12ca4ebc0338..a1c55788c5d6 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -23,6 +23,8 @@ #define _ASM_SPINLOCK_H #include <asm/irqflags.h> +#include <asm/barrier.h> +#include <asm/processor.h> /* * This file is pulled in for SMP builds. @@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) * SMP spinlocks are intended to allow only a single CPU at the lock */ #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(lock) \ - do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} + #define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 8dfb5f6f6c35..f565ad376142 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v) \ return new; \ } -ATOMIC_OP(add, +) -ATOMIC_OP(sub, -) +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int \ +ia64_atomic_fetch_##op (int i, atomic_t *v) \ +{ \ + __s32 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ + return old; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(add, +) +ATOMIC_OPS(sub, -) #define atomic_add_return(i,v) \ ({ \ @@ -69,14 +88,44 @@ ATOMIC_OP(sub, -) : ia64_atomic_sub(__ia64_asr_i, v); \ }) -ATOMIC_OP(and, &) -ATOMIC_OP(or, |) -ATOMIC_OP(xor, ^) +#define atomic_fetch_add(i,v) \ +({ \ + int __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_add(__ia64_aar_i, v); \ +}) + +#define atomic_fetch_sub(i,v) \ +({ \ + int __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_sub(__ia64_asr_i, v); \ +}) + +ATOMIC_FETCH_OP(and, &) +ATOMIC_FETCH_OP(or, |) +ATOMIC_FETCH_OP(xor, ^) + +#define atomic_and(i,v) (void)ia64_atomic_fetch_and(i,v) +#define atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v) +#define atomic_xor(i,v) (void)ia64_atomic_fetch_xor(i,v) -#define atomic_and(i,v) (void)ia64_atomic_and(i,v) -#define atomic_or(i,v) (void)ia64_atomic_or(i,v) -#define atomic_xor(i,v) (void)ia64_atomic_xor(i,v) +#define atomic_fetch_and(i,v) ia64_atomic_fetch_and(i,v) +#define atomic_fetch_or(i,v) ia64_atomic_fetch_or(i,v) +#define atomic_fetch_xor(i,v) ia64_atomic_fetch_xor(i,v) +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP #define ATOMIC64_OP(op, c_op) \ @@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \ return new; \ } -ATOMIC64_OP(add, +) -ATOMIC64_OP(sub, -) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ long \ +ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \ +{ \ + __s64 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = atomic64_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(add, +) +ATOMIC64_OPS(sub, -) #define atomic64_add_return(i,v) \ ({ \ @@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -) : ia64_atomic64_sub(__ia64_asr_i, v); \ }) -ATOMIC64_OP(and, &) -ATOMIC64_OP(or, |) -ATOMIC64_OP(xor, ^) +#define atomic64_fetch_add(i,v) \ +({ \ + long __ia64_aar_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ + || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ + || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ + || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ +}) + +#define atomic64_fetch_sub(i,v) \ +({ \ + long __ia64_asr_i = (i); \ + (__builtin_constant_p(i) \ + && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ + || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ + || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ + || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ +}) + +ATOMIC64_FETCH_OP(and, &) +ATOMIC64_FETCH_OP(or, |) +ATOMIC64_FETCH_OP(xor, ^) + +#define atomic64_and(i,v) (void)ia64_atomic64_fetch_and(i,v) +#define atomic64_or(i,v) (void)ia64_atomic64_fetch_or(i,v) +#define atomic64_xor(i,v) (void)ia64_atomic64_fetch_xor(i,v) -#define atomic64_and(i,v) (void)ia64_atomic64_and(i,v) -#define atomic64_or(i,v) (void)ia64_atomic64_or(i,v) -#define atomic64_xor(i,v) (void)ia64_atomic64_xor(i,v) +#define atomic64_fetch_and(i,v) ia64_atomic64_fetch_and(i,v) +#define atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v) +#define atomic64_fetch_xor(i,v) ia64_atomic64_fetch_xor(i,v) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h index f41e66d65e31..28cb819e0ff9 100644 --- a/arch/ia64/include/asm/mutex.h +++ b/arch/ia64/include/asm/mutex.h @@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (cmpxchg_acq(count, 1, 0) == 1) + if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1) return 1; return 0; } diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 8b23e070b844..8fa98dd303b4 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -40,7 +40,7 @@ static inline void __down_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); + long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1); if (result < 0) rwsem_down_read_failed(sem); @@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old + RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_acq(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old); return old; } @@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem) static inline void __up_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); + long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1); if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -115,8 +115,8 @@ static inline int __down_read_trylock (struct rw_semaphore *sem) { long tmp; - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) { + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) { return 1; } } @@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem) static inline int __down_write_trylock (struct rw_semaphore *sem) { - long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); + long tmp = atomic_long_cmpxchg_acquire(&sem->count, + RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } @@ -143,19 +143,12 @@ __downgrade_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_WAITING_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (old < 0) rwsem_downgrade_wake(sem); } -/* - * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1 - * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd. - */ -#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) -#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) - #endif /* _ASM_IA64_RWSEM_H */ diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 45698cd15b7b..ca9e76149a4a 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -15,6 +15,8 @@ #include <linux/atomic.h> #include <asm/intrinsics.h> +#include <asm/barrier.h> +#include <asm/processor.h> #define arch_spin_lock_init(x) ((x)->lock = 0) @@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock) return; cpu_relax(); } + + smp_acquire__after_ctrl_dep(); } static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index ea35160d632b..640cc1c7099f 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -89,16 +89,44 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int result, val; \ + \ + local_irq_save(flags); \ + __asm__ __volatile__ ( \ + "# atomic_fetch_" #op " \n\t" \ + DCACHE_CLEAR("%0", "r4", "%2") \ + M32R_LOCK" %1, @%2; \n\t" \ + "mv %0, %1 \n\t" \ + #op " %1, %3; \n\t" \ + M32R_UNLOCK" %1, @%2; \n\t" \ + : "=&r" (result), "=&r" (val) \ + : "r" (&v->counter), "r" (i) \ + : "memory" \ + __ATOMIC_CLOBBER \ + ); \ + local_irq_restore(flags); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index fa13694eaae3..323c7fc953cd 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include <linux/atomic.h> #include <asm/dcache_clear.h> #include <asm/page.h> +#include <asm/barrier.h> +#include <asm/processor.h> /* * Your basic SMP spinlocks, allowing only a single CPU anywhere @@ -27,8 +29,11 @@ #define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, VAL > 0); +} /** * arch_spin_trylock - Try spin lock and return a result diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 4858178260f9..cf4c3a7b1a45 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -53,6 +53,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return t; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int t, tmp; \ + \ + __asm__ __volatile__( \ + "1: movel %2,%1\n" \ + " " #asm_op "l %3,%1\n" \ + " casl %2,%1,%0\n" \ + " jne 1b" \ + : "+m" (*v), "=&d" (t), "=&d" (tmp) \ + : "g" (i), "2" (atomic_read(v))); \ + return tmp; \ +} + #else #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ @@ -68,20 +83,41 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return t; \ } +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned long flags; \ + int t; \ + \ + local_irq_save(flags); \ + t = v->counter; \ + v->counter c_op i; \ + local_irq_restore(flags); \ + \ + return t; \ +} + #endif /* CONFIG_RMW_INSNS */ #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -ATOMIC_OP(and, &=, and) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, eor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index 88fa25fae8bd..def2c642f053 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int result, temp; \ + \ + smp_mb(); \ + \ + asm volatile ( \ + "1: LNKGETD %1, [%2]\n" \ + " " #op " %0, %1, %3\n" \ + " LNKSETD [%2], %0\n" \ + " DEFR %0, TXSTAT\n" \ + " ANDT %0, %0, #HI(0x3f000000)\n" \ + " CMPT %0, #HI(0x02000000)\n" \ + " BNZ 1b\n" \ + : "=&d" (temp), "=&d" (result) \ + : "da" (&v->counter), "bd" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index 0295d9b8d5bf..6c1380a8a0d4 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h @@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return result; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long result; \ + unsigned long flags; \ + \ + __global_lock1(flags); \ + result = v->counter; \ + fence(); \ + v->counter c_op i; \ + __global_unlock1(flags); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) #undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h index 86a7cf3d1386..c0c7a22be1ae 100644 --- a/arch/metag/include/asm/spinlock.h +++ b/arch/metag/include/asm/spinlock.h @@ -1,14 +1,24 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H +#include <asm/barrier.h> +#include <asm/processor.h> + #ifdef CONFIG_METAG_ATOMICITY_LOCK1 #include <asm/spinlock_lock1.h> #else #include <asm/spinlock_lnkget.h> #endif -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +/* + * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1 + * locked. + */ + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 835b402e4574..0ab176bdb8e8 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ " " #asm_op " %0, %2 \n" \ " sc %0, %1 \n" \ " .set mips0 \n" \ - : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ } while (unlikely(!temp)); \ } else { \ @@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ } #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ +static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ { \ int result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ int temp; \ \ @@ -125,23 +123,84 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ + return result; \ +} + +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ +{ \ + int result; \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + int temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: ll %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " move %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + int temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set "MIPS_ISA_LEVEL" \n" \ + " ll %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } while (unlikely(!result)); \ + \ + result = temp; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ \ return result; \ } #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) ATOMIC_OPS(add, +=, addu) ATOMIC_OPS(sub, -=, subu) -ATOMIC_OP(and, &=, and) -ATOMIC_OP(or, |=, or) -ATOMIC_OP(xor, ^=, xor) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -362,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } #define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ -static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ { \ long result; \ \ - smp_mb__before_llsc(); \ - \ if (kernel_uses_llsc && R10000_LLSC_WAR) { \ long temp; \ \ @@ -409,22 +466,85 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ raw_local_irq_restore(flags); \ } \ \ - smp_llsc_mb(); \ + return result; \ +} + +#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +{ \ + long result; \ + \ + if (kernel_uses_llsc && R10000_LLSC_WAR) { \ + long temp; \ + \ + __asm__ __volatile__( \ + " .set arch=r4000 \n" \ + "1: lld %1, %2 # atomic64_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " beqzl %0, 1b \n" \ + " move %0, %1 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "+" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i)); \ + } else if (kernel_uses_llsc) { \ + long temp; \ + \ + do { \ + __asm__ __volatile__( \ + " .set "MIPS_ISA_LEVEL" \n" \ + " lld %1, %2 # atomic64_fetch_" #op "\n" \ + " " #asm_op " %0, %1, %3 \n" \ + " scd %0, %2 \n" \ + " .set mips0 \n" \ + : "=&r" (result), "=&r" (temp), \ + "=" GCC_OFF_SMALL_ASM() (v->counter) \ + : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter) \ + : "memory"); \ + } while (unlikely(!result)); \ + \ + result = temp; \ + } else { \ + unsigned long flags; \ + \ + raw_local_irq_save(flags); \ + result = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + } \ \ return result; \ } #define ATOMIC64_OPS(op, c_op, asm_op) \ ATOMIC64_OP(op, c_op, asm_op) \ - ATOMIC64_OP_RETURN(op, c_op, asm_op) + ATOMIC64_OP_RETURN(op, c_op, asm_op) \ + ATOMIC64_FETCH_OP(op, c_op, asm_op) ATOMIC64_OPS(add, +=, daddu) ATOMIC64_OPS(sub, -=, dsubu) -ATOMIC64_OP(and, &=, and) -ATOMIC64_OP(or, |=, or) -ATOMIC64_OP(xor, ^=, xor) + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op, asm_op) \ + ATOMIC64_OP(op, c_op, asm_op) \ + ATOMIC64_FETCH_OP(op, c_op, asm_op) + +ATOMIC64_OPS(and, &=, and) +ATOMIC64_OPS(or, |=, or) +ATOMIC64_OPS(xor, ^=, xor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 40196bebe849..f485afe51514 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -12,6 +12,7 @@ #include <linux/compiler.h> #include <asm/barrier.h> +#include <asm/processor.h> #include <asm/compiler.h> #include <asm/war.h> @@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) } #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - while (arch_spin_is_locked(x)) { cpu_relax(); } + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + u16 owner = READ_ONCE(lock->h.serving_now); + smp_rmb(); + for (;;) { + arch_spinlock_t tmp = READ_ONCE(*lock); + + if (tmp.h.serving_now == tmp.h.ticket || + tmp.h.serving_now != owner) + break; + + cpu_relax(); + } + smp_acquire__after_ctrl_dep(); +} static inline int arch_spin_is_contended(arch_spinlock_t *lock) { diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index ce318d5ab23b..36389efd45e8 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -84,16 +84,41 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return retval; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int retval, status; \ + \ + asm volatile( \ + "1: mov %4,(_AAR,%3) \n" \ + " mov (_ADR,%3),%1 \n" \ + " mov %1,%0 \n" \ + " " #op " %5,%0 \n" \ + " mov %0,(_ADR,%3) \n" \ + " mov (_ADR,%3),%0 \n" /* flush */ \ + " mov (_ASR,%3),%0 \n" \ + " or %0,%0 \n" \ + " bne 1b \n" \ + : "=&r"(status), "=&r"(retval), "=m"(v->counter) \ + : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) \ + : "memory", "cc"); \ + return retval; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h index 1ae580f38933..9c7b8f7942d8 100644 --- a/arch/mn10300/include/asm/spinlock.h +++ b/arch/mn10300/include/asm/spinlock.h @@ -12,6 +12,8 @@ #define _ASM_SPINLOCK_H #include <linux/atomic.h> +#include <asm/barrier.h> +#include <asm/processor.h> #include <asm/rwlock.h> #include <asm/page.h> @@ -23,7 +25,11 @@ */ #define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0) -#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} static inline void arch_spin_unlock(arch_spinlock_t *lock) { diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 1d109990a022..5394b9c5f914 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -121,16 +121,39 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ return ret; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + int ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = v->counter; \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -185,15 +208,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \ return ret; \ } -#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + unsigned long flags; \ + s64 ret; \ + \ + _atomic_spin_lock_irqsave(v, flags); \ + ret = v->counter; \ + v->counter c_op i; \ + _atomic_spin_unlock_irqrestore(v, flags); \ + \ + return ret; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_OP_RETURN(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) -ATOMIC64_OP(and, &=) -ATOMIC64_OP(or, |=) -ATOMIC64_OP(xor, ^=) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &=) +ATOMIC64_OPS(or, |=) +ATOMIC64_OPS(xor, ^=) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 64f2992e439f..e32936cd7f10 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x) } #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0) -#define arch_spin_unlock_wait(x) \ - do { cpu_relax(); } while (arch_spin_is_locked(x)) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *x) +{ + volatile unsigned int *a = __ldcw_align(x); + + smp_cond_load_acquire(a, VAL); +} static inline void arch_spin_lock_flags(arch_spinlock_t *x, unsigned long flags) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index ae0751ef8788..f08d567e0ca4 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \ return t; \ } +#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \ +static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ +{ \ + int res, t; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ + #asm_op " %1,%3,%0\n" \ + PPC405_ERR77(0, %4) \ +" stwcx. %1,0,%4\n" \ +" bne- 1b\n" \ + : "=&r" (res), "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ + \ + return res; \ +} + #define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN_RELAXED(op, asm_op) + ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op) ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) -ATOMIC_OP(and, and) -ATOMIC_OP(or, or) -ATOMIC_OP(xor, xor) - #define atomic_add_return_relaxed atomic_add_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op) + +ATOMIC_OPS(and, and) +ATOMIC_OPS(or, or) +ATOMIC_OPS(xor, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP_RELAXED #undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP @@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ return t; \ } +#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \ +static inline long \ +atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \ +{ \ + long res, t; \ + \ + __asm__ __volatile__( \ +"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \ + #asm_op " %1,%3,%0\n" \ +" stdcx. %1,0,%4\n" \ +" bne- 1b\n" \ + : "=&r" (res), "=&r" (t), "+m" (v->counter) \ + : "r" (a), "r" (&v->counter) \ + : "cc"); \ + \ + return res; \ +} + #define ATOMIC64_OPS(op, asm_op) \ ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN_RELAXED(op, asm_op) + ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ + ATOMIC64_FETCH_OP_RELAXED(op, asm_op) ATOMIC64_OPS(add, add) ATOMIC64_OPS(sub, subf) -ATOMIC64_OP(and, and) -ATOMIC64_OP(or, or) -ATOMIC64_OP(xor, xor) #define atomic64_add_return_relaxed atomic64_add_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_FETCH_OP_RELAXED(op, asm_op) + +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(or, or) +ATOMIC64_OPS(xor, xor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #undef ATOPIC64_OPS +#undef ATOMIC64_FETCH_OP_RELAXED #undef ATOMIC64_OP_RETURN_RELAXED #undef ATOMIC64_OP diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h index 127ab23e1f6c..078155fa1189 100644 --- a/arch/powerpc/include/asm/mutex.h +++ b/arch/powerpc/include/asm/mutex.h @@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) return 1; return 0; } diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 911064aa59b2..d28cc2f5b7b2 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v) return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i; } +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER); +} + static inline void atomic_add(int i, atomic_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -114,22 +119,27 @@ static inline void atomic_add(int i, atomic_t *v) #define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) #define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v) #define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v) +#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v) #define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0) #define atomic_dec(_v) atomic_sub(1, _v) #define atomic_dec_return(_v) atomic_sub_return(1, _v) #define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) -#define ATOMIC_OP(op, OP) \ +#define ATOMIC_OPS(op, OP) \ static inline void atomic_##op(int i, atomic_t *v) \ { \ __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \ } -ATOMIC_OP(and, AND) -ATOMIC_OP(or, OR) -ATOMIC_OP(xor, XOR) +ATOMIC_OPS(and, AND) +ATOMIC_OPS(or, OR) +ATOMIC_OPS(xor, XOR) -#undef ATOMIC_OP +#undef ATOMIC_OPS #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -236,6 +246,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i; } +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER); +} + static inline void atomic64_add(long long i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES @@ -264,17 +279,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v, return old; } -#define ATOMIC64_OP(op, OP) \ +#define ATOMIC64_OPS(op, OP) \ static inline void atomic64_##op(long i, atomic64_t *v) \ { \ __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \ +} \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \ } -ATOMIC64_OP(and, AND) -ATOMIC64_OP(or, OR) -ATOMIC64_OP(xor, XOR) +ATOMIC64_OPS(and, AND) +ATOMIC64_OPS(or, OR) +ATOMIC64_OPS(xor, XOR) -#undef ATOMIC64_OP +#undef ATOMIC64_OPS #undef __ATOMIC64_LOOP static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) @@ -315,6 +334,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #define atomic64_inc_return(_v) atomic64_add_return(1, _v) #define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) #define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v) +#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long long)(_i), _v) #define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v) #define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) #define atomic64_dec(_v) atomic64_sub(1, _v) diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index c75e4471e618..597e7e96b59e 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); - return new; -} - #endif /* _S390_RWSEM_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 63ebf37d3143..7e9e09f600fa 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -10,6 +10,8 @@ #define __ASM_SPINLOCK_H #include <linux/smp.h> +#include <asm/barrier.h> +#include <asm/processor.h> #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval) @@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { while (arch_spin_is_locked(lock)) arch_spin_relax(lock); + smp_acquire__after_ctrl_dep(); } /* diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index b94df40e5f2d..d755e96c3064 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return tmp; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int res, tmp; \ + \ + __asm__ __volatile__ ( \ + " .align 2 \n\t" \ + " mova 1f, r0 \n\t" /* r0 = end point */ \ + " mov r15, r1 \n\t" /* r1 = saved sp */ \ + " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ \ + " mov.l @%2, %0 \n\t" /* load old value */ \ + " mov %0, %1 \n\t" /* save old value */ \ + " " #op " %3, %0 \n\t" /* $op */ \ + " mov.l %0, @%2 \n\t" /* store new value */ \ + "1: mov r1, r15 \n\t" /* LOGOUT */ \ + : "=&r" (tmp), "=&r" (res), "+r" (v) \ + : "r" (i) \ + : "memory" , "r0", "r1"); \ + \ + return res; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h index 23fcdad5773e..8e2da5fa0178 100644 --- a/arch/sh/include/asm/atomic-irq.h +++ b/arch/sh/include/asm/atomic-irq.h @@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } -#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long temp, flags; \ + \ + raw_local_irq_save(flags); \ + temp = v->counter; \ + v->counter c_op i; \ + raw_local_irq_restore(flags); \ + \ + return temp; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_OP_RETURN(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) #undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &=) +ATOMIC_OPS(or, |=) +ATOMIC_OPS(xor, ^=) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index 33d34b16d4d6..caea2c45f6c2 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ return temp; \ } -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long res, temp; \ + \ + __asm__ __volatile__ ( \ +"1: movli.l @%3, %0 ! atomic_fetch_" #op " \n" \ +" mov %0, %1 \n" \ +" " #op " %2, %0 \n" \ +" movco.l %0, @%3 \n" \ +" bf 1b \n" \ +" synco \n" \ + : "=&z" (temp), "=&z" (res) \ + : "r" (i), "r" (&v->counter) \ + : "t"); \ + \ + return res; \ +} + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h index bdc0f3b6c56a..416834b60ad0 100644 --- a/arch/sh/include/asm/spinlock.h +++ b/arch/sh/include/asm/spinlock.h @@ -19,14 +19,20 @@ #error "Need movli.l/movco.l for spinlocks" #endif +#include <asm/barrier.h> +#include <asm/processor.h> + /* * Your basic SMP spinlocks, allowing only a single CPU anywhere */ #define arch_spin_is_locked(x) ((x)->lock <= 0) #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) -#define arch_spin_unlock_wait(x) \ - do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, VAL > 0); +} /* * Simple spin lock operations. There are two variants, one clears IRQ's diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 7dcbebbcaec6..ee3f11c43cda 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -20,9 +20,10 @@ #define ATOMIC_INIT(i) { (i) } int atomic_add_return(int, atomic_t *); -void atomic_and(int, atomic_t *); -void atomic_or(int, atomic_t *); -void atomic_xor(int, atomic_t *); +int atomic_fetch_add(int, atomic_t *); +int atomic_fetch_and(int, atomic_t *); +int atomic_fetch_or(int, atomic_t *); +int atomic_fetch_xor(int, atomic_t *); int atomic_cmpxchg(atomic_t *, int, int); int atomic_xchg(atomic_t *, int); int __atomic_add_unless(atomic_t *, int, int); @@ -35,7 +36,13 @@ void atomic_set(atomic_t *, int); #define atomic_inc(v) ((void)atomic_add_return( 1, (v))) #define atomic_dec(v) ((void)atomic_add_return( -1, (v))) +#define atomic_and(i, v) ((void)atomic_fetch_and((i), (v))) +#define atomic_or(i, v) ((void)atomic_fetch_or((i), (v))) +#define atomic_xor(i, v) ((void)atomic_fetch_xor((i), (v))) + #define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v))) +#define atomic_fetch_sub(i, v) (atomic_fetch_add (-(int)(i), (v))) + #define atomic_inc_return(v) (atomic_add_return( 1, (v))) #define atomic_dec_return(v) (atomic_add_return( -1, (v))) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index f2fbf9e16faf..24827a3f733a 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *); int atomic_##op##_return(int, atomic_t *); \ long atomic64_##op##_return(long, atomic64_t *); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +int atomic_fetch_##op(int, atomic_t *); \ +long atomic64_fetch_##op(long, atomic64_t *); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index bcc98fc35281..d9c5876c6121 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -9,12 +9,15 @@ #ifndef __ASSEMBLY__ #include <asm/psr.h> +#include <asm/barrier.h> #include <asm/processor.h> /* for cpu_relax */ #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline void arch_spin_lock(arch_spinlock_t *lock) { diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 968917694978..87990b7c6b0d 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -8,6 +8,9 @@ #ifndef __ASSEMBLY__ +#include <asm/processor.h> +#include <asm/barrier.h> + /* To get debugging spinlocks which detect and catch * deadlock situations, set CONFIG_DEBUG_SPINLOCK * and rebuild your kernel. @@ -23,9 +26,10 @@ #define arch_spin_is_locked(lp) ((lp)->lock != 0) -#define arch_spin_unlock_wait(lp) \ - do { rmb(); \ - } while((lp)->lock) +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->lock, !VAL); +} static inline void arch_spin_lock(arch_spinlock_t *lock) { diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index b9d63c0a7aab..2c373329d5cb 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -27,39 +27,44 @@ static DEFINE_SPINLOCK(dummy); #endif /* SMP */ -#define ATOMIC_OP_RETURN(op, c_op) \ -int atomic_##op##_return(int i, atomic_t *v) \ +#define ATOMIC_FETCH_OP(op, c_op) \ +int atomic_fetch_##op(int i, atomic_t *v) \ { \ int ret; \ unsigned long flags; \ spin_lock_irqsave(ATOMIC_HASH(v), flags); \ \ - ret = (v->counter c_op i); \ + ret = v->counter; \ + v->counter c_op i; \ \ spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ return ret; \ } \ -EXPORT_SYMBOL(atomic_##op##_return); +EXPORT_SYMBOL(atomic_fetch_##op); -#define ATOMIC_OP(op, c_op) \ -void atomic_##op(int i, atomic_t *v) \ +#define ATOMIC_OP_RETURN(op, c_op) \ +int atomic_##op##_return(int i, atomic_t *v) \ { \ + int ret; \ unsigned long flags; \ spin_lock_irqsave(ATOMIC_HASH(v), flags); \ \ - v->counter c_op i; \ + ret = (v->counter c_op i); \ \ spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \ + return ret; \ } \ -EXPORT_SYMBOL(atomic_##op); +EXPORT_SYMBOL(atomic_##op##_return); ATOMIC_OP_RETURN(add, +=) -ATOMIC_OP(and, &=) -ATOMIC_OP(or, |=) -ATOMIC_OP(xor, ^=) +ATOMIC_FETCH_OP(add, +=) +ATOMIC_FETCH_OP(and, &=) +ATOMIC_FETCH_OP(or, |=) +ATOMIC_FETCH_OP(xor, ^=) + +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN -#undef ATOMIC_OP int atomic_xchg(atomic_t *v, int new) { diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index d6b0363f345b..a5c5a0279ccc 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -9,10 +9,11 @@ .text - /* Two versions of the atomic routines, one that + /* Three versions of the atomic routines, one that * does not return a value and does not perform - * memory barriers, and a second which returns - * a value and does the barriers. + * memory barriers, and a two which return + * a value, the new and old value resp. and does the + * barriers. */ #define ATOMIC_OP(op) \ @@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ 2: BACKOFF_SPIN(%o2, %o3, 1b); \ ENDPROC(atomic_##op##_return); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: lduw [%o1], %g1; \ + op %g1, %o0, %g7; \ + cas [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %icc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + sra %g1, 0, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic_fetch_##op); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \ 2: BACKOFF_SPIN(%o2, %o3, 1b); \ ENDPROC(atomic64_##op##_return); -#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) +#define ATOMIC64_FETCH_OP(op) \ +ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \ + BACKOFF_SETUP(%o2); \ +1: ldx [%o1], %g1; \ + op %g1, %o0, %g7; \ + casx [%o1], %g1, %g7; \ + cmp %g1, %g7; \ + bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \ + nop; \ + retl; \ + mov %g1, %o0; \ +2: BACKOFF_SPIN(%o2, %o3, 1b); \ +ENDPROC(atomic64_fetch_##op); + +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op) ATOMIC64_OPS(add) ATOMIC64_OPS(sub) -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) #undef ATOMIC64_OPS +#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op) + +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 8eb454cfe05c..de5e97817bdb 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op); EXPORT_SYMBOL(atomic_##op##_return); \ EXPORT_SYMBOL(atomic64_##op##_return); -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_FETCH_OP(op) \ +EXPORT_SYMBOL(atomic_fetch_##op); \ +EXPORT_SYMBOL(atomic64_fetch_##op); + +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) #undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 9fc0107a9c5e..8dda3c8ff5ab 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -46,6 +46,8 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v)) +#define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v)) + /** * atomic_sub - subtract integer from atomic variable * @i: integer value to subtract diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index d320ce253d86..a93774255136 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v) _atomic_xchg_add(&v->counter, i); } -#define ATOMIC_OP(op) \ -unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \ +#define ATOMIC_OPS(op) \ +unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - _atomic_##op((unsigned long *)&v->counter, i); \ + _atomic_fetch_##op((unsigned long *)&v->counter, i); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + smp_mb(); \ + return _atomic_fetch_##op((unsigned long *)&v->counter, i); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) -#undef ATOMIC_OP +#undef ATOMIC_OPS + +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + smp_mb(); + return _atomic_xchg_add(&v->counter, i); +} /** * atomic_add_return - add integer and return @@ -126,16 +137,29 @@ static inline void atomic64_add(long long i, atomic64_t *v) _atomic64_xchg_add(&v->counter, i); } -#define ATOMIC64_OP(op) \ -long long _atomic64_##op(long long *v, long long n); \ +#define ATOMIC64_OPS(op) \ +long long _atomic64_fetch_##op(long long *v, long long n); \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ - _atomic64_##op(&v->counter, i); \ + _atomic64_fetch_##op(&v->counter, i); \ +} \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + smp_mb(); \ + return _atomic64_fetch_##op(&v->counter, i); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) + +#undef ATOMIC64_OPS + +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + smp_mb(); + return _atomic64_xchg_add(&v->counter, i); +} /** * atomic64_add_return - add integer and return @@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_dec(v) atomic64_sub(1LL, (v)) @@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) - #endif /* !__ASSEMBLY__ */ /* @@ -242,16 +266,16 @@ struct __get_user { unsigned long val; int err; }; -extern struct __get_user __atomic_cmpxchg(volatile int *p, +extern struct __get_user __atomic32_cmpxchg(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add_unless(volatile int *p, +extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add_unless(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_and(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n); extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, long long o, long long n); extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); @@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock, long long n); extern long long __atomic64_xchg_add_unless(volatile long long *p, int *lock, long long o, long long n); -extern long long __atomic64_and(volatile long long *p, int *lock, long long n); -extern long long __atomic64_or(volatile long long *p, int *lock, long long n); -extern long long __atomic64_xor(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n); /* Return failure from the atomic wrappers. */ struct __get_user __atomic_bad_address(int __user *addr); diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index b0531a623653..4cefa0c9fd81 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -32,11 +32,6 @@ * on any routine which updates memory and returns a value. */ -static inline void atomic_add(int i, atomic_t *v) -{ - __insn_fetchadd4((void *)&v->counter, i); -} - /* * Note a subtlety of the locking here. We are required to provide a * full memory barrier before and after the operation. However, we @@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v) return val; } -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +#define ATOMIC_OPS(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int val; \ + smp_mb(); \ + val = __insn_fetch##op##4((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + __insn_fetch##op##4((void *)&v->counter, i); \ +} + +ATOMIC_OPS(add) +ATOMIC_OPS(and) +ATOMIC_OPS(or) + +#undef ATOMIC_OPS + +static inline int atomic_fetch_xor(int i, atomic_t *v) { int guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch4(&v->counter, guess ^ i); } while (guess != oldval); + smp_mb(); return oldval; } -static inline void atomic_and(int i, atomic_t *v) -{ - __insn_fetchand4((void *)&v->counter, i); -} - -static inline void atomic_or(int i, atomic_t *v) -{ - __insn_fetchor4((void *)&v->counter, i); -} - static inline void atomic_xor(int i, atomic_t *v) { int guess, oldval = v->counter; @@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v) } while (guess != oldval); } +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval; +} + /* Now the true 64-bit operations. */ #define ATOMIC64_INIT(i) { (i) } @@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v) #define atomic64_read(v) READ_ONCE((v)->counter) #define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) -static inline void atomic64_add(long i, atomic64_t *v) -{ - __insn_fetchadd((void *)&v->counter, i); -} - static inline long atomic64_add_return(long i, atomic64_t *v) { int val; @@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v) return val; } -static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +#define ATOMIC64_OPS(op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long val; \ + smp_mb(); \ + val = __insn_fetch##op((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + __insn_fetch##op((void *)&v->counter, i); \ +} + +ATOMIC64_OPS(add) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) + +#undef ATOMIC64_OPS + +static inline long atomic64_fetch_xor(long i, atomic64_t *v) { long guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch(&v->counter, guess ^ i); } while (guess != oldval); - return oldval != u; -} - -static inline void atomic64_and(long i, atomic64_t *v) -{ - __insn_fetchand((void *)&v->counter, i); -} - -static inline void atomic64_or(long i, atomic64_t *v) -{ - __insn_fetchor((void *)&v->counter, i); + smp_mb(); + return oldval; } static inline void atomic64_xor(long i, atomic64_t *v) @@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v) } while (guess != oldval); } +static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval != u; +} + #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_inc_return(v) atomic64_add_return(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h index d55222806c2f..4c419ab95ab7 100644 --- a/arch/tile/include/asm/barrier.h +++ b/arch/tile/include/asm/barrier.h @@ -87,6 +87,13 @@ mb_incoherent(void) #define __smp_mb__after_atomic() __smp_mb() #endif +/* + * The TILE architecture does not do speculative reads; this ensures + * that a control dependency also orders against loads and already provides + * a LOAD->{LOAD,STORE} order and can forgo the additional RMB. + */ +#define smp_acquire__after_ctrl_dep() barrier() + #include <asm-generic/barrier.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index bbf7b666f21d..d1406a95f6b7 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h @@ -19,9 +19,9 @@ #include <asm/barrier.h> /* Tile-specific routines to support <asm/bitops.h>. */ -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask); /** * set_bit - Atomically set a bit in memory @@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); */ static inline void set_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr) */ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) */ static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_or(addr, mask) & mask) != 0; + return (_atomic_fetch_or(addr, mask) & mask) != 0; } /** @@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_andn(addr, mask) & mask) != 0; + return (_atomic_fetch_andn(addr, mask) & mask) != 0; } /** @@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr, unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_xor(addr, mask) & mask) != 0; + return (_atomic_fetch_xor(addr, mask) & mask) != 0; } #include <asm-generic/bitops/ext2-atomic.h> diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index 1a6ef1b69cb1..e64a1b75fc38 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -80,16 +80,16 @@ ret = gu.err; \ } -#define __futex_set() __futex_call(__atomic_xchg) -#define __futex_add() __futex_call(__atomic_xchg_add) -#define __futex_or() __futex_call(__atomic_or) -#define __futex_andn() __futex_call(__atomic_andn) -#define __futex_xor() __futex_call(__atomic_xor) +#define __futex_set() __futex_call(__atomic32_xchg) +#define __futex_add() __futex_call(__atomic32_xchg_add) +#define __futex_or() __futex_call(__atomic32_fetch_or) +#define __futex_andn() __futex_call(__atomic32_fetch_andn) +#define __futex_xor() __futex_call(__atomic32_fetch_xor) #define __futex_cmpxchg() \ { \ - struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \ - lock, oldval, oparg); \ + struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \ + lock, oldval, oparg); \ val = gu.val; \ ret = gu.err; \ } diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 298df1e9912a..f8128800dbf5 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v) int _atomic_xchg(int *v, int n) { - return __atomic_xchg(v, __atomic_setup(v), n).val; + return __atomic32_xchg(v, __atomic_setup(v), n).val; } EXPORT_SYMBOL(_atomic_xchg); int _atomic_xchg_add(int *v, int i) { - return __atomic_xchg_add(v, __atomic_setup(v), i).val; + return __atomic32_xchg_add(v, __atomic_setup(v), i).val; } EXPORT_SYMBOL(_atomic_xchg_add); @@ -78,39 +78,39 @@ int _atomic_xchg_add_unless(int *v, int a, int u) * to use the first argument consistently as the "old value" * in the assembly, as is done for _atomic_cmpxchg(). */ - return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val; + return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val; } EXPORT_SYMBOL(_atomic_xchg_add_unless); int _atomic_cmpxchg(int *v, int o, int n) { - return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val; + return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val; } EXPORT_SYMBOL(_atomic_cmpxchg); -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask) { - return __atomic_or((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_or); +EXPORT_SYMBOL(_atomic_fetch_or); -unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask) { - return __atomic_and((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_and); +EXPORT_SYMBOL(_atomic_fetch_and); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask) { - return __atomic_andn((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_andn); +EXPORT_SYMBOL(_atomic_fetch_andn); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask) +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask) { - return __atomic_xor((int *)p, __atomic_setup(p), mask).val; + return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val; } -EXPORT_SYMBOL(_atomic_xor); +EXPORT_SYMBOL(_atomic_fetch_xor); long long _atomic64_xchg(long long *v, long long n) @@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n) } EXPORT_SYMBOL(_atomic64_cmpxchg); -long long _atomic64_and(long long *v, long long n) +long long _atomic64_fetch_and(long long *v, long long n) { - return __atomic64_and(v, __atomic_setup(v), n); + return __atomic64_fetch_and(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_and); +EXPORT_SYMBOL(_atomic64_fetch_and); -long long _atomic64_or(long long *v, long long n) +long long _atomic64_fetch_or(long long *v, long long n) { - return __atomic64_or(v, __atomic_setup(v), n); + return __atomic64_fetch_or(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_or); +EXPORT_SYMBOL(_atomic64_fetch_or); -long long _atomic64_xor(long long *v, long long n) +long long _atomic64_fetch_xor(long long *v, long long n) { - return __atomic64_xor(v, __atomic_setup(v), n); + return __atomic64_fetch_xor(v, __atomic_setup(v), n); } -EXPORT_SYMBOL(_atomic64_xor); +EXPORT_SYMBOL(_atomic64_fetch_xor); /* * If any of the atomic or futex routines hit a bad address (not in diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index f611265633d6..1a70e6c0f259 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic) .endif .endm -atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" -atomic_op _xchg, 32, "move r24, r2" -atomic_op _xchg_add, 32, "add r24, r22, r2" -atomic_op _xchg_add_unless, 32, \ + +/* + * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions. + */ + +atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" +atomic_op 32_xchg, 32, "move r24, r2" +atomic_op 32_xchg_add, 32, "add r24, r22, r2" +atomic_op 32_xchg_add_unless, 32, \ "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" -atomic_op _or, 32, "or r24, r22, r2" -atomic_op _and, 32, "and r24, r22, r2" -atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2" -atomic_op _xor, 32, "xor r24, r22, r2" +atomic_op 32_fetch_or, 32, "or r24, r22, r2" +atomic_op 32_fetch_and, 32, "and r24, r22, r2" +atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" +atomic_op 32_fetch_xor, 32, "xor r24, r22, r2" atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" @@ -192,9 +197,9 @@ atomic_op 64_xchg_add_unless, 64, \ { bbns r26, 3f; add r24, r22, r4 }; \ { bbns r27, 3f; add r25, r23, r5 }; \ slt_u r26, r24, r22; add r25, r25, r26" -atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" -atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" -atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" +atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" +atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" +atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" jrp lr /* happy backtracer */ diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 88c2a53362e7..076c6cc43113 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) do { delay_backoff(iterations++); } while (READ_ONCE(lock->current_ticket) == curr); + + /* + * The TILE architecture doesn't do read speculation; therefore + * a control dependency guarantees a LOAD->{LOAD,STORE} order. + */ + barrier(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c index c8d1f94ff1fe..a4b5b2cbce93 100644 --- a/arch/tile/lib/spinlock_64.c +++ b/arch/tile/lib/spinlock_64.c @@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock) do { delay_backoff(iterations++); } while (arch_spin_current(READ_ONCE(lock->lock)) == curr); + + /* + * The TILE architecture doesn't do read speculation; therefore + * a control dependency guarantees a LOAD->{LOAD,STORE} order. + */ + barrier(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 3e8674288198..a58b99811105 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v) #define atomic_inc_return(v) (atomic_add_return(1, v)) #define atomic_dec_return(v) (atomic_sub_return(1, v)) +static __always_inline int atomic_fetch_add(int i, atomic_t *v) +{ + return xadd(&v->counter, i); +} + +static __always_inline int atomic_fetch_sub(int i, atomic_t *v) +{ + return xadd(&v->counter, -i); +} + static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); @@ -190,10 +200,29 @@ static inline void atomic_##op(int i, atomic_t *v) \ : "memory"); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#define ATOMIC_FETCH_OP(op, c_op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int old, val = atomic_read(v); \ + for (;;) { \ + old = atomic_cmpxchg(v, val, val c_op i); \ + if (old == val) \ + break; \ + val = old; \ + } \ + return old; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(and, &) +ATOMIC_OPS(or , |) +ATOMIC_OPS(xor, ^) +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP /** diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index a984111135b1..71d7705fb303 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ c = old; \ } -ATOMIC64_OP(and, &) -ATOMIC64_OP(or, |) -ATOMIC64_OP(xor, ^) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ +{ \ + long long old, c = 0; \ + while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ + c = old; \ + return old; \ +} + +ATOMIC64_FETCH_OP(add, +) + +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 037351022f54..70eed0e14553 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) return atomic64_add_return(-i, v); } +static inline long atomic64_fetch_add(long i, atomic64_t *v) +{ + return xadd(&v->counter, i); +} + +static inline long atomic64_fetch_sub(long i, atomic64_t *v) +{ + return xadd(&v->counter, -i); +} + #define atomic64_inc_return(v) (atomic64_add_return(1, (v))) #define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) @@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v) \ : "memory"); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long old, val = atomic64_read(v); \ + for (;;) { \ + old = atomic64_cmpxchg(v, val, val c_op i); \ + if (old == val) \ + break; \ + val = old; \ + } \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP #endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h index 85e6cda45a02..e9355a84fc67 100644 --- a/arch/x86/include/asm/mutex_32.h +++ b/arch/x86/include/asm/mutex_32.h @@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { /* cmpxchg because it never induces a false contention state. */ - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; return 0; diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 07537a44216e..d9850758464e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -118,10 +118,10 @@ do { \ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; - else - return 0; + + return 0; } #endif /* _ASM_X86_MUTEX_64_H */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 453744c1d347..089ced4edbbc 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) : "memory", "cc"); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" - : "+m" (sem->count) - : "er" (delta)); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return delta + xadd(&sem->count, delta); -} - #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index fd8017ce298a..e7a23f2a519a 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return result; \ } +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32i %1, %3, 0\n" \ + " wsr %1, scompare1\n" \ + " " #op " %0, %1, %2\n" \ + " s32c1i %0, %3, 0\n" \ + " bne %0, %1, 1b\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ + \ + return result; \ +} + #else /* XCHAL_HAVE_S32C1I */ #define ATOMIC_OP(op) \ @@ -138,18 +158,42 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ return vval; \ } +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t * v) \ +{ \ + unsigned int tmp, vval; \ + \ + __asm__ __volatile__( \ + " rsil a15,"__stringify(TOPLEVEL)"\n" \ + " l32i %0, %3, 0\n" \ + " " #op " %1, %0, %2\n" \ + " s32i %1, %3, 0\n" \ + " wsr a15, ps\n" \ + " rsync\n" \ + : "=&a" (vval), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "a15", "memory" \ + ); \ + \ + return vval; \ +} + #endif /* XCHAL_HAVE_S32C1I */ -#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_OPS(add) ATOMIC_OPS(sub) -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +#undef ATOMIC_OPS +#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h index 1d95fa5dcd10..a36221cf6363 100644 --- a/arch/xtensa/include/asm/spinlock.h +++ b/arch/xtensa/include/asm/spinlock.h @@ -11,6 +11,9 @@ #ifndef _XTENSA_SPINLOCK_H #define _XTENSA_SPINLOCK_H +#include <asm/barrier.h> +#include <asm/processor.h> + /* * spinlock * @@ -29,8 +32,11 @@ */ #define arch_spin_is_locked(x) ((x)->slock != 0) -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + smp_cond_load_acquire(&lock->slock, !VAL); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) |