diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2024-03-20 09:30:40 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2024-03-20 12:29:02 +0100 |
commit | 0539084639f3835c8d0b798e6659ec14a266b4f1 (patch) | |
tree | c66aeefb14e9a55ffafde12f48f0873e1c1c9856 /arch/x86/include/asm/percpu.h | |
parent | 4ae3dc83b047d51485cce1a72be277a110d77c91 (diff) |
x86/percpu: Convert this_percpu_xchg_op() from asm() to C code, to generate better code
Rewrite percpu_xchg_op() using generic percpu primitives instead
of using asm. The new implementation is similar to local_xchg() and
allows the compiler to perform various optimizations: e.g. the
compiler is able to create fast path through the loop, according
to likely/unlikely annotations in percpu_try_cmpxchg_op().
No functional changes intended.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20240320083127.493250-1-ubizjak@gmail.com
Diffstat (limited to 'arch/x86/include/asm/percpu.h')
-rw-r--r-- | arch/x86/include/asm/percpu.h | 32 |
1 files changed, 11 insertions, 21 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 44958ebaf626..de991e6d050a 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -230,25 +230,15 @@ do { \ }) /* - * xchg is implemented using cmpxchg without a lock prefix. xchg is - * expensive due to the implied lock prefix. The processor cannot prefetch - * cachelines if xchg is used. + * this_cpu_xchg() is implemented using cmpxchg without a lock prefix. + * xchg is expensive due to the implied lock prefix. The processor + * cannot prefetch cachelines if xchg is used. */ -#define percpu_xchg_op(size, qual, _var, _nval) \ +#define this_percpu_xchg_op(_var, _nval) \ ({ \ - __pcpu_type_##size pxo_old__; \ - __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \ - asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \ - "%[oval]") \ - "\n1:\t" \ - __pcpu_op2_##size("cmpxchg", "%[nval]", \ - __percpu_arg([var])) \ - "\n\tjnz 1b" \ - : [oval] "=&a" (pxo_old__), \ - [var] "+m" (__my_cpu_var(_var)) \ - : [nval] __pcpu_reg_##size(, pxo_new__) \ - : "memory"); \ - (typeof(_var))(unsigned long) pxo_old__; \ + typeof(_var) pxo_old__ = this_cpu_read(_var); \ + do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \ + pxo_old__; \ }) /* @@ -534,9 +524,9 @@ do { \ #define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val) #define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val) #define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val) -#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval) -#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval) -#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval) +#define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval) #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val) #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val) @@ -575,7 +565,7 @@ do { \ #define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val) #define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val) #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) +#define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) #define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval) #endif |