44fe84459f
Dmitry noted that the new atomic_try_cmpxchg() primitive is broken when
the old pointer doesn't point to the local stack.
He writes:
"Consider a classical lock-free stack push:
node->next = atomic_read(&head);
do {
} while (!atomic_try_cmpxchg(&head, &node->next, node));
This code is broken with the current implementation, the problem is
with unconditional update of *__po.
In case of success it writes the same value back into *__po, but in
case of cmpxchg success we might have lose ownership of some memory
locations and potentially over what __po has pointed to. The same
holds for the re-read of *__po. "
He also points out that this makes it surprisingly different from the
similar C/C++ atomic operation.
After investigating the code-gen differences caused by this patch; and
a number of alternatives (Linus dislikes this interface lots), we
arrived at these results (size x86_64-defconfig/vmlinux):
GCC-6.3.0:
10735757 cmpxchg
10726413 try_cmpxchg
10730509 try_cmpxchg + patch
10730445 try_cmpxchg-linus
GCC-7 (20170327):
10709514 cmpxchg
10704266 try_cmpxchg
10704266 try_cmpxchg + patch
10704394 try_cmpxchg-linus
From this we see that the patch has the advantage of better code-gen
on GCC-7 and keeps the interface roughly consistent with the C
language variant.
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Fixes: a9ebf306f5
("locking/atomic: Introduce atomic_try_cmpxchg()")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
259 lines
7.6 KiB
C
259 lines
7.6 KiB
C
#ifndef ASM_X86_CMPXCHG_H
|
|
#define ASM_X86_CMPXCHG_H
|
|
|
|
#include <linux/compiler.h>
|
|
#include <asm/cpufeatures.h>
|
|
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
|
|
|
|
/*
|
|
* Non-existant functions to indicate usage errors at link time
|
|
* (or compile-time if the compiler implements __compiletime_error().
|
|
*/
|
|
extern void __xchg_wrong_size(void)
|
|
__compiletime_error("Bad argument size for xchg");
|
|
extern void __cmpxchg_wrong_size(void)
|
|
__compiletime_error("Bad argument size for cmpxchg");
|
|
extern void __xadd_wrong_size(void)
|
|
__compiletime_error("Bad argument size for xadd");
|
|
extern void __add_wrong_size(void)
|
|
__compiletime_error("Bad argument size for add");
|
|
|
|
/*
|
|
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
|
|
* -1 because sizeof will never return -1, thereby making those switch
|
|
* case statements guaranteeed dead code which the compiler will
|
|
* eliminate, and allowing the "missing symbol in the default case" to
|
|
* indicate a usage error.
|
|
*/
|
|
#define __X86_CASE_B 1
|
|
#define __X86_CASE_W 2
|
|
#define __X86_CASE_L 4
|
|
#ifdef CONFIG_64BIT
|
|
#define __X86_CASE_Q 8
|
|
#else
|
|
#define __X86_CASE_Q -1 /* sizeof will never return -1 */
|
|
#endif
|
|
|
|
/*
|
|
* An exchange-type operation, which takes a value and a pointer, and
|
|
* returns the old value.
|
|
*/
|
|
#define __xchg_op(ptr, arg, op, lock) \
|
|
({ \
|
|
__typeof__ (*(ptr)) __ret = (arg); \
|
|
switch (sizeof(*(ptr))) { \
|
|
case __X86_CASE_B: \
|
|
asm volatile (lock #op "b %b0, %1\n" \
|
|
: "+q" (__ret), "+m" (*(ptr)) \
|
|
: : "memory", "cc"); \
|
|
break; \
|
|
case __X86_CASE_W: \
|
|
asm volatile (lock #op "w %w0, %1\n" \
|
|
: "+r" (__ret), "+m" (*(ptr)) \
|
|
: : "memory", "cc"); \
|
|
break; \
|
|
case __X86_CASE_L: \
|
|
asm volatile (lock #op "l %0, %1\n" \
|
|
: "+r" (__ret), "+m" (*(ptr)) \
|
|
: : "memory", "cc"); \
|
|
break; \
|
|
case __X86_CASE_Q: \
|
|
asm volatile (lock #op "q %q0, %1\n" \
|
|
: "+r" (__ret), "+m" (*(ptr)) \
|
|
: : "memory", "cc"); \
|
|
break; \
|
|
default: \
|
|
__ ## op ## _wrong_size(); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
/*
|
|
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
|
|
* Since this is generally used to protect other memory information, we
|
|
* use "asm volatile" and "memory" clobbers to prevent gcc from moving
|
|
* information around.
|
|
*/
|
|
#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
|
|
|
|
/*
|
|
* Atomic compare and exchange. Compare OLD with MEM, if identical,
|
|
* store NEW in MEM. Return the initial value in MEM. Success is
|
|
* indicated by comparing RETURN with OLD.
|
|
*/
|
|
#define __raw_cmpxchg(ptr, old, new, size, lock) \
|
|
({ \
|
|
__typeof__(*(ptr)) __ret; \
|
|
__typeof__(*(ptr)) __old = (old); \
|
|
__typeof__(*(ptr)) __new = (new); \
|
|
switch (size) { \
|
|
case __X86_CASE_B: \
|
|
{ \
|
|
volatile u8 *__ptr = (volatile u8 *)(ptr); \
|
|
asm volatile(lock "cmpxchgb %2,%1" \
|
|
: "=a" (__ret), "+m" (*__ptr) \
|
|
: "q" (__new), "0" (__old) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
case __X86_CASE_W: \
|
|
{ \
|
|
volatile u16 *__ptr = (volatile u16 *)(ptr); \
|
|
asm volatile(lock "cmpxchgw %2,%1" \
|
|
: "=a" (__ret), "+m" (*__ptr) \
|
|
: "r" (__new), "0" (__old) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
case __X86_CASE_L: \
|
|
{ \
|
|
volatile u32 *__ptr = (volatile u32 *)(ptr); \
|
|
asm volatile(lock "cmpxchgl %2,%1" \
|
|
: "=a" (__ret), "+m" (*__ptr) \
|
|
: "r" (__new), "0" (__old) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
case __X86_CASE_Q: \
|
|
{ \
|
|
volatile u64 *__ptr = (volatile u64 *)(ptr); \
|
|
asm volatile(lock "cmpxchgq %2,%1" \
|
|
: "=a" (__ret), "+m" (*__ptr) \
|
|
: "r" (__new), "0" (__old) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
default: \
|
|
__cmpxchg_wrong_size(); \
|
|
} \
|
|
__ret; \
|
|
})
|
|
|
|
#define __cmpxchg(ptr, old, new, size) \
|
|
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
|
|
|
|
#define __sync_cmpxchg(ptr, old, new, size) \
|
|
__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
|
|
|
|
#define __cmpxchg_local(ptr, old, new, size) \
|
|
__raw_cmpxchg((ptr), (old), (new), (size), "")
|
|
|
|
#ifdef CONFIG_X86_32
|
|
# include <asm/cmpxchg_32.h>
|
|
#else
|
|
# include <asm/cmpxchg_64.h>
|
|
#endif
|
|
|
|
#define cmpxchg(ptr, old, new) \
|
|
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
|
|
|
|
#define sync_cmpxchg(ptr, old, new) \
|
|
__sync_cmpxchg(ptr, old, new, sizeof(*(ptr)))
|
|
|
|
#define cmpxchg_local(ptr, old, new) \
|
|
__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
|
|
|
|
|
|
#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
|
|
({ \
|
|
bool success; \
|
|
__typeof__(_ptr) _old = (_pold); \
|
|
__typeof__(*(_ptr)) __old = *_old; \
|
|
__typeof__(*(_ptr)) __new = (_new); \
|
|
switch (size) { \
|
|
case __X86_CASE_B: \
|
|
{ \
|
|
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
|
|
asm volatile(lock "cmpxchgb %[new], %[ptr]" \
|
|
CC_SET(z) \
|
|
: CC_OUT(z) (success), \
|
|
[ptr] "+m" (*__ptr), \
|
|
[old] "+a" (__old) \
|
|
: [new] "q" (__new) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
case __X86_CASE_W: \
|
|
{ \
|
|
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
|
|
asm volatile(lock "cmpxchgw %[new], %[ptr]" \
|
|
CC_SET(z) \
|
|
: CC_OUT(z) (success), \
|
|
[ptr] "+m" (*__ptr), \
|
|
[old] "+a" (__old) \
|
|
: [new] "r" (__new) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
case __X86_CASE_L: \
|
|
{ \
|
|
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
|
|
asm volatile(lock "cmpxchgl %[new], %[ptr]" \
|
|
CC_SET(z) \
|
|
: CC_OUT(z) (success), \
|
|
[ptr] "+m" (*__ptr), \
|
|
[old] "+a" (__old) \
|
|
: [new] "r" (__new) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
case __X86_CASE_Q: \
|
|
{ \
|
|
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
|
|
asm volatile(lock "cmpxchgq %[new], %[ptr]" \
|
|
CC_SET(z) \
|
|
: CC_OUT(z) (success), \
|
|
[ptr] "+m" (*__ptr), \
|
|
[old] "+a" (__old) \
|
|
: [new] "r" (__new) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
default: \
|
|
__cmpxchg_wrong_size(); \
|
|
} \
|
|
if (unlikely(!success)) \
|
|
*_old = __old; \
|
|
likely(success); \
|
|
})
|
|
|
|
#define __try_cmpxchg(ptr, pold, new, size) \
|
|
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
|
|
|
|
#define try_cmpxchg(ptr, pold, new) \
|
|
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
|
|
|
|
/*
|
|
* xadd() adds "inc" to "*ptr" and atomically returns the previous
|
|
* value of "*ptr".
|
|
*
|
|
* xadd() is locked when multiple CPUs are online
|
|
*/
|
|
#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
|
|
#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
|
|
|
|
#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
|
|
({ \
|
|
bool __ret; \
|
|
__typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
|
|
__typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
|
|
BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
|
|
BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
|
|
VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
|
|
VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
|
|
asm volatile(pfx "cmpxchg%c4b %2; sete %0" \
|
|
: "=a" (__ret), "+d" (__old2), \
|
|
"+m" (*(p1)), "+m" (*(p2)) \
|
|
: "i" (2 * sizeof(long)), "a" (__old1), \
|
|
"b" (__new1), "c" (__new2)); \
|
|
__ret; \
|
|
})
|
|
|
|
#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
|
|
__cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
|
|
|
|
#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
|
|
__cmpxchg_double(, p1, p2, o1, o2, n1, n2)
|
|
|
|
#endif /* ASM_X86_CMPXCHG_H */
|