cpuops: Use cmpxchg for xchg to avoid lock semantics
Use cmpxchg instead of xchg to realize this_cpu_xchg. xchg will cause LOCK overhead since LOCK is always implied but cmpxchg will not. Baselines: xchg() = 18 cycles (no segment prefix, LOCK semantics) __this_cpu_xchg = 1 cycle (simulated using this_cpu_read/write, two prefixes. Looks like the cpu can use loop optimization to get rid of most of the overhead) Cycles before: this_cpu_xchg = 37 cycles (segment prefix and LOCK (implied by xchg)) After: this_cpu_xchg = 11 cycle (using cmpxchg without lock semantics) Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
7296e08aba
commit
8270137a0d
@ -263,8 +263,9 @@ do { \
|
||||
})
|
||||
|
||||
/*
|
||||
* Beware: xchg on x86 has an implied lock prefix. There will be the cost of
|
||||
* full lock semantics even though they are not needed.
|
||||
* xchg is implemented using cmpxchg without a lock prefix. xchg is
|
||||
* expensive due to the implied lock prefix. The processor cannot prefetch
|
||||
* cachelines if xchg is used.
|
||||
*/
|
||||
#define percpu_xchg_op(var, nval) \
|
||||
({ \
|
||||
@ -272,25 +273,33 @@ do { \
|
||||
typeof(var) pxo_new__ = (nval); \
|
||||
switch (sizeof(var)) { \
|
||||
case 1: \
|
||||
asm("xchgb %2, "__percpu_arg(1) \
|
||||
asm("\n1:mov "__percpu_arg(1)",%%al" \
|
||||
"\n\tcmpxchgb %2, "__percpu_arg(1) \
|
||||
"\n\tjnz 1b" \
|
||||
: "=a" (pxo_ret__), "+m" (var) \
|
||||
: "q" (pxo_new__) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
case 2: \
|
||||
asm("xchgw %2, "__percpu_arg(1) \
|
||||
asm("\n1:mov "__percpu_arg(1)",%%ax" \
|
||||
"\n\tcmpxchgw %2, "__percpu_arg(1) \
|
||||
"\n\tjnz 1b" \
|
||||
: "=a" (pxo_ret__), "+m" (var) \
|
||||
: "r" (pxo_new__) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
case 4: \
|
||||
asm("xchgl %2, "__percpu_arg(1) \
|
||||
asm("\n1:mov "__percpu_arg(1)",%%eax" \
|
||||
"\n\tcmpxchgl %2, "__percpu_arg(1) \
|
||||
"\n\tjnz 1b" \
|
||||
: "=a" (pxo_ret__), "+m" (var) \
|
||||
: "r" (pxo_new__) \
|
||||
: "memory"); \
|
||||
break; \
|
||||
case 8: \
|
||||
asm("xchgq %2, "__percpu_arg(1) \
|
||||
asm("\n1:mov "__percpu_arg(1)",%%rax" \
|
||||
"\n\tcmpxchgq %2, "__percpu_arg(1) \
|
||||
"\n\tjnz 1b" \
|
||||
: "=a" (pxo_ret__), "+m" (var) \
|
||||
: "r" (pxo_new__) \
|
||||
: "memory"); \
|
||||
|
Loading…
Reference in New Issue
Block a user