Merge branch 'for-next/atomics' into for-next/core

* for-next/atomics: (10 commits) Rework LSE instruction selection to use static keys instead of alternatives
2019-08-30 12:55:39 +01:00 · 2019-08-30 12:55:39 +01:00 · 61b7cddfe8
commit 61b7cddfe8
parent ac12cf85d6 03adcbd996
10 changed files with 411 additions and 417 deletions
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@ -1290,6 +1290,7 @@ config ARM64_PAN
 config ARM64_LSE_ATOMICS
 	bool "Atomic instructions"
 	depends on JUMP_LABEL
 	default y
 	help
 	  As part of the Large System Extensions, ARMv8.1 introduces new
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@ -39,6 +39,12 @@ $(warning LSE atomics not supported by binutils)
  endif
 endif
 cc_has_k_constraint := $(call try-run,echo				\
 	'int main(void) {						\
 		asm volatile("and w0, w0, %w0" :: "K" (4294967295));	\
 		return 0;						\
 	}' | $(CC) -S -x c -o "$$TMP" -,,-DCONFIG_CC_HAS_K_CONSTRAINT=1)
 ifeq ($(CONFIG_ARM64), y)
 brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
@ -63,7 +69,8 @@ ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
  endif
 endif
-KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
+KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)	\
 		   $(compat_vdso) $(cc_has_k_constraint)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS	+= $(call cc-disable-warning, psabi)
 KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst) $(compat_vdso)
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@ -13,19 +13,91 @@
 #include <linux/types.h>
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>
 #include <asm/lse.h>
-#define __ARM64_IN_ATOMIC_IMPL
+#define ATOMIC_OP(op)							\
 static inline void arch_##op(int i, atomic_t *v)			\
 {									\
 	__lse_ll_sc_body(op, i, v);					\
 }
-#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE)
+ATOMIC_OP(atomic_andnot)
-#include <asm/atomic_lse.h>
+ATOMIC_OP(atomic_or)
-#else
+ATOMIC_OP(atomic_xor)
-#include <asm/atomic_ll_sc.h>
+ATOMIC_OP(atomic_add)
-#endif
+ATOMIC_OP(atomic_and)
 ATOMIC_OP(atomic_sub)
-#undef __ARM64_IN_ATOMIC_IMPL
+#undef ATOMIC_OP
-#include <asm/cmpxchg.h>
+#define ATOMIC_FETCH_OP(name, op)					\
 static inline int arch_##op##name(int i, atomic_t *v)			\
 {									\
 	return __lse_ll_sc_body(op##name, i, v);			\
 }
 #define ATOMIC_FETCH_OPS(op)						\
 	ATOMIC_FETCH_OP(_relaxed, op)					\
 	ATOMIC_FETCH_OP(_acquire, op)					\
 	ATOMIC_FETCH_OP(_release, op)					\
 	ATOMIC_FETCH_OP(        , op)
 ATOMIC_FETCH_OPS(atomic_fetch_andnot)
 ATOMIC_FETCH_OPS(atomic_fetch_or)
 ATOMIC_FETCH_OPS(atomic_fetch_xor)
 ATOMIC_FETCH_OPS(atomic_fetch_add)
 ATOMIC_FETCH_OPS(atomic_fetch_and)
 ATOMIC_FETCH_OPS(atomic_fetch_sub)
 ATOMIC_FETCH_OPS(atomic_add_return)
 ATOMIC_FETCH_OPS(atomic_sub_return)
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_FETCH_OPS
 #define ATOMIC64_OP(op)							\
 static inline void arch_##op(long i, atomic64_t *v)			\
 {									\
 	__lse_ll_sc_body(op, i, v);					\
 }
 ATOMIC64_OP(atomic64_andnot)
 ATOMIC64_OP(atomic64_or)
 ATOMIC64_OP(atomic64_xor)
 ATOMIC64_OP(atomic64_add)
 ATOMIC64_OP(atomic64_and)
 ATOMIC64_OP(atomic64_sub)
 #undef ATOMIC64_OP
 #define ATOMIC64_FETCH_OP(name, op)					\
 static inline long arch_##op##name(long i, atomic64_t *v)		\
 {									\
 	return __lse_ll_sc_body(op##name, i, v);			\
 }
 #define ATOMIC64_FETCH_OPS(op)						\
 	ATOMIC64_FETCH_OP(_relaxed, op)					\
 	ATOMIC64_FETCH_OP(_acquire, op)					\
 	ATOMIC64_FETCH_OP(_release, op)					\
 	ATOMIC64_FETCH_OP(        , op)
 ATOMIC64_FETCH_OPS(atomic64_fetch_andnot)
 ATOMIC64_FETCH_OPS(atomic64_fetch_or)
 ATOMIC64_FETCH_OPS(atomic64_fetch_xor)
 ATOMIC64_FETCH_OPS(atomic64_fetch_add)
 ATOMIC64_FETCH_OPS(atomic64_fetch_and)
 ATOMIC64_FETCH_OPS(atomic64_fetch_sub)
 ATOMIC64_FETCH_OPS(atomic64_add_return)
 ATOMIC64_FETCH_OPS(atomic64_sub_return)
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_FETCH_OPS
 static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	return __lse_ll_sc_body(atomic64_dec_if_positive, v);
 }
 #define ATOMIC_INIT(i)	{ (i) }
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@ -10,83 +10,92 @@
 #ifndef __ASM_ATOMIC_LL_SC_H
 #define __ASM_ATOMIC_LL_SC_H
-#ifndef __ARM64_IN_ATOMIC_IMPL
+#include <linux/stringify.h>
-#error "please don't include this file directly"
+
 #if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE)
 #define __LL_SC_FALLBACK(asm_ops)					\
 "	b	3f\n"							\
 "	.subsection	1\n"						\
 "3:\n"									\
 asm_ops "\n"								\
 "	b	4f\n"							\
 "	.previous\n"							\
 "4:\n"
 #else
 #define __LL_SC_FALLBACK(asm_ops) asm_ops
 #endif
 #ifndef CONFIG_CC_HAS_K_CONSTRAINT
 #define K
 #endif
 /*
 * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
 * store exclusive to ensure that these are atomic.  We may loop
 * to ensure that the update happens.
 *
 * NOTE: these functions do *not* follow the PCS and must explicitly
 * save any clobbered registers other than x0 (regardless of return
 * value).  This is achieved through -fcall-saved-* compiler flags for
 * this file, which unfortunately don't work on a per-function basis
 * (the optimize attribute silently ignores these options).
 */
-#define ATOMIC_OP(op, asm_op)						\
+#define ATOMIC_OP(op, asm_op, constraint)				\
-__LL_SC_INLINE void							\
+static inline void							\
-__LL_SC_PREFIX(arch_atomic_##op(int i, atomic_t *v))			\
+__ll_sc_atomic_##op(int i, atomic_t *v)					\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
 	asm volatile("// atomic_" #op "\n"				\
 	__LL_SC_FALLBACK(						\
 "	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%w0, %2\n"						\
 "	" #asm_op "	%w0, %w0, %w3\n"				\
 "	stxr	%w1, %w0, %2\n"						\
-"	cbnz	%w1, 1b"						\
+"	cbnz	%w1, 1b\n")						\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
-	: "Ir" (i));							\
+	: __stringify(constraint) "r" (i));				\
-}									\
+}
 __LL_SC_EXPORT(arch_atomic_##op);
-#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)		\
+#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-__LL_SC_INLINE int							\
+static inline int							\
-__LL_SC_PREFIX(arch_atomic_##op##_return##name(int i, atomic_t *v))	\
+__ll_sc_atomic_##op##_return##name(int i, atomic_t *v)			\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
 	asm volatile("// atomic_" #op "_return" #name "\n"		\
 	__LL_SC_FALLBACK(						\
 "	prfm	pstl1strm, %2\n"					\
 "1:	ld" #acq "xr	%w0, %2\n"					\
 "	" #asm_op "	%w0, %w0, %w3\n"				\
 "	st" #rel "xr	%w1, %w0, %2\n"					\
 "	cbnz	%w1, 1b\n"						\
-"	" #mb								\
+"	" #mb )								\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
-	: "Ir" (i)							\
+	: __stringify(constraint) "r" (i)				\
 	: cl);								\
 									\
 	return result;							\
-}									\
+}
 __LL_SC_EXPORT(arch_atomic_##op##_return##name);
-#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)		\
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
-__LL_SC_INLINE int							\
+static inline int							\
-__LL_SC_PREFIX(arch_atomic_fetch_##op##name(int i, atomic_t *v))	\
+__ll_sc_atomic_fetch_##op##name(int i, atomic_t *v)			\
 {									\
 	unsigned long tmp;						\
 	int val, result;						\
 									\
 	asm volatile("// atomic_fetch_" #op #name "\n"			\
 	__LL_SC_FALLBACK(						\
 "	prfm	pstl1strm, %3\n"					\
 "1:	ld" #acq "xr	%w0, %3\n"					\
 "	" #asm_op "	%w1, %w0, %w4\n"				\
 "	st" #rel "xr	%w2, %w1, %3\n"					\
 "	cbnz	%w2, 1b\n"						\
-"	" #mb								\
+"	" #mb )								\
 	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
-	: "Ir" (i)							\
+	: __stringify(constraint) "r" (i)				\
 	: cl);								\
 									\
 	return result;							\
-}									\
+}
 __LL_SC_EXPORT(arch_atomic_fetch_##op##name);
 #define ATOMIC_OPS(...)							\
 	ATOMIC_OP(__VA_ARGS__)						\
@ -99,8 +108,8 @@ __LL_SC_EXPORT(arch_atomic_fetch_##op##name);
 	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
 	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
-ATOMIC_OPS(add, add)
+ATOMIC_OPS(add, add, I)
-ATOMIC_OPS(sub, sub)
+ATOMIC_OPS(sub, sub, J)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(...)							\
@ -110,77 +119,82 @@ ATOMIC_OPS(sub, sub)
 	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
 	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
-ATOMIC_OPS(and, and)
+ATOMIC_OPS(and, and, K)
-ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, orr, K)
-ATOMIC_OPS(or, orr)
+ATOMIC_OPS(xor, eor, K)
-ATOMIC_OPS(xor, eor)
+/*
 * GAS converts the mysterious and undocumented BIC (immediate) alias to
 * an AND (immediate) instruction with the immediate inverted. We don't
 * have a constraint for this, so fall back to register.
 */
 ATOMIC_OPS(andnot, bic, )
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
-#define ATOMIC64_OP(op, asm_op)						\
+#define ATOMIC64_OP(op, asm_op, constraint)				\
-__LL_SC_INLINE void							\
+static inline void							\
-__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v))		\
+__ll_sc_atomic64_##op(s64 i, atomic64_t *v)				\
 {									\
 	s64 result;							\
 	unsigned long tmp;						\
 									\
 	asm volatile("// atomic64_" #op "\n"				\
 	__LL_SC_FALLBACK(						\
 "	prfm	pstl1strm, %2\n"					\
 "1:	ldxr	%0, %2\n"						\
 "	" #asm_op "	%0, %0, %3\n"					\
 "	stxr	%w1, %0, %2\n"						\
-"	cbnz	%w1, 1b"						\
+"	cbnz	%w1, 1b")						\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
-	: "Ir" (i));							\
+	: __stringify(constraint) "r" (i));				\
-}									\
+}
 __LL_SC_EXPORT(arch_atomic64_##op);
-#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)		\
+#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-__LL_SC_INLINE s64							\
+static inline long							\
-__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
+__ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)		\
 {									\
 	s64 result;							\
 	unsigned long tmp;						\
 									\
 	asm volatile("// atomic64_" #op "_return" #name "\n"		\
 	__LL_SC_FALLBACK(						\
 "	prfm	pstl1strm, %2\n"					\
 "1:	ld" #acq "xr	%0, %2\n"					\
 "	" #asm_op "	%0, %0, %3\n"					\
 "	st" #rel "xr	%w1, %0, %2\n"					\
 "	cbnz	%w1, 1b\n"						\
-"	" #mb								\
+"	" #mb )								\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
-	: "Ir" (i)							\
+	: __stringify(constraint) "r" (i)				\
 	: cl);								\
 									\
 	return result;							\
-}									\
+}
 __LL_SC_EXPORT(arch_atomic64_##op##_return##name);
-#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)		\
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
-__LL_SC_INLINE s64							\
+static inline long							\
-__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v))	\
+__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)		\
 {									\
 	s64 result, val;						\
 	unsigned long tmp;						\
 									\
 	asm volatile("// atomic64_fetch_" #op #name "\n"		\
 	__LL_SC_FALLBACK(						\
 "	prfm	pstl1strm, %3\n"					\
 "1:	ld" #acq "xr	%0, %3\n"					\
 "	" #asm_op "	%1, %0, %4\n"					\
 "	st" #rel "xr	%w2, %1, %3\n"					\
 "	cbnz	%w2, 1b\n"						\
-"	" #mb								\
+"	" #mb )								\
 	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
-	: "Ir" (i)							\
+	: __stringify(constraint) "r" (i)				\
 	: cl);								\
 									\
 	return result;							\
-}									\
+}
 __LL_SC_EXPORT(arch_atomic64_fetch_##op##name);
 #define ATOMIC64_OPS(...)						\
 	ATOMIC64_OP(__VA_ARGS__)					\
@ -193,8 +207,8 @@ __LL_SC_EXPORT(arch_atomic64_fetch_##op##name);
 	ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)	\
 	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
-ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(add, add, I)
-ATOMIC64_OPS(sub, sub)
+ATOMIC64_OPS(sub, sub, J)
 #undef ATOMIC64_OPS
 #define ATOMIC64_OPS(...)						\
@ -204,23 +218,29 @@ ATOMIC64_OPS(sub, sub)
 	ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)	\
 	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
-ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(and, and, L)
-ATOMIC64_OPS(andnot, bic)
+ATOMIC64_OPS(or, orr, L)
-ATOMIC64_OPS(or, orr)
+ATOMIC64_OPS(xor, eor, L)
-ATOMIC64_OPS(xor, eor)
+/*
 * GAS converts the mysterious and undocumented BIC (immediate) alias to
 * an AND (immediate) instruction with the immediate inverted. We don't
 * have a constraint for this, so fall back to register.
 */
 ATOMIC64_OPS(andnot, bic, )
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
-__LL_SC_INLINE s64
+static inline s64
-__LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
+__ll_sc_atomic64_dec_if_positive(atomic64_t *v)
 {
 	s64 result;
 	unsigned long tmp;
 	asm volatile("// atomic64_dec_if_positive\n"
 	__LL_SC_FALLBACK(
 "	prfm	pstl1strm, %2\n"
 "1:	ldxr	%0, %2\n"
 "	subs	%0, %0, #1\n"
@ -228,20 +248,19 @@ __LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
 "	stlxr	%w1, %0, %2\n"
 "	cbnz	%w1, 1b\n"
 "	dmb	ish\n"
-"2:"
+"2:")
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	:
 	: "cc", "memory");
 	return result;
 }
 __LL_SC_EXPORT(arch_atomic64_dec_if_positive);
-#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl)		\
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint)	\
-__LL_SC_INLINE u##sz							\
+static inline u##sz							\
-__LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr,		\
+__ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
 					 unsigned long old,		\
-					 u##sz new))			\
+					 u##sz new)			\
 {									\
 	unsigned long tmp;						\
 	u##sz oldval;							\
@ -255,6 +274,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr,		\
 		old = (u##sz)old;					\
 									\
 	asm volatile(							\
 	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %[v]\n"				\
 	"1:	ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n"		\
 	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
@ -262,46 +282,51 @@ __LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr,		\
 	"	st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n"	\
 	"	cbnz	%w[tmp], 1b\n"					\
 	"	" #mb "\n"						\
-	"2:"								\
+	"2:")								\
 	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
 	  [v] "+Q" (*(u##sz *)ptr)					\
-	: [old] "Kr" (old), [new] "r" (new)				\
+	: [old] __stringify(constraint) "r" (old), [new] "r" (new)	\
 	: cl);								\
 									\
 	return oldval;							\
-}									\
+}
 __LL_SC_EXPORT(__cmpxchg_case_##name##sz);
-__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         )
+/*
-__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         )
+ * Earlier versions of GCC (no later than 8.1.0) appear to incorrectly
-__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         )
+ * handle the 'K' constraint for the value 4294967295 - thus we use no
-__CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         )
+ * constraint for 32 bit operations.
-__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory")
+ */
-__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory")
+__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , K)
-__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory")
+__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , K)
-__CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory")
+__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , K)
-__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory")
+__CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         , L)
-__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory")
+__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", K)
-__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory")
+__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", K)
-__CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory")
+__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", K)
-__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory")
+__CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory", L)
-__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", K)
-__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", K)
-__CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", K)
 __CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory", L)
 __CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", K)
 __CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", K)
 __CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", K)
 __CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
 #undef __CMPXCHG_CASE
 #define __CMPXCHG_DBL(name, mb, rel, cl)				\
-__LL_SC_INLINE long							\
+static inline long							\
-__LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1,		\
+__ll_sc__cmpxchg_double##name(unsigned long old1,			\
 				      unsigned long old2,		\
 				      unsigned long new1,		\
 				      unsigned long new2,		\
-				      volatile void *ptr))		\
+				      volatile void *ptr)		\
 {									\
 	unsigned long tmp, ret;						\
 									\
 	asm volatile("// __cmpxchg_double" #name "\n"			\
 	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %2\n"				\
 	"1:	ldxp	%0, %1, %2\n"					\
 	"	eor	%0, %0, %3\n"					\
@ -311,18 +336,18 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1,		\
 	"	st" #rel "xp	%w0, %5, %6, %2\n"			\
 	"	cbnz	%w0, 1b\n"					\
 	"	" #mb "\n"						\
-	"2:"								\
+	"2:")								\
 	: "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr)	\
 	: "r" (old1), "r" (old2), "r" (new1), "r" (new2)		\
 	: cl);								\
 									\
 	return ret;							\
-}									\
+}
 __LL_SC_EXPORT(__cmpxchg_double##name);
 __CMPXCHG_DBL(   ,        ,  ,         )
 __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
 #undef __CMPXCHG_DBL
 #undef K
 #endif	/* __ASM_ATOMIC_LL_SC_H */
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@ -10,22 +10,13 @@
 #ifndef __ASM_ATOMIC_LSE_H
 #define __ASM_ATOMIC_LSE_H
 #ifndef __ARM64_IN_ATOMIC_IMPL
 #error "please don't include this file directly"
 #endif
 #define __LL_SC_ATOMIC(op)	__LL_SC_CALL(arch_atomic_##op)
 #define ATOMIC_OP(op, asm_op)						\
-static inline void arch_atomic_##op(int i, atomic_t *v)			\
+static inline void __lse_atomic_##op(int i, atomic_t *v)			\
 {									\
-	register int w0 asm ("w0") = i;					\
+	asm volatile(							\
-	register atomic_t *x1 asm ("x1") = v;				\
+"	" #asm_op "	%w[i], %[v]\n"					\
-									\
+	: [i] "+r" (i), [v] "+Q" (v->counter)				\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op),		\
+	: "r" (v));							\
 "	" #asm_op "	%w[i], %[v]\n")					\
 	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS);						\
 }
 ATOMIC_OP(andnot, stclr)
@ -36,21 +27,15 @@ ATOMIC_OP(add, stadd)
 #undef ATOMIC_OP
 #define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...)			\
-static inline int arch_atomic_fetch_##op##name(int i, atomic_t *v)	\
+static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v)	\
 {									\
-	register int w0 asm ("w0") = i;					\
+	asm volatile(							\
-	register atomic_t *x1 asm ("x1") = v;				\
+"	" #asm_op #mb "	%w[i], %w[i], %[v]"				\
 	: [i] "+r" (i), [v] "+Q" (v->counter)				\
 	: "r" (v)							\
 	: cl);								\
 									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	return i;							\
 	/* LL/SC */							\
 	__LL_SC_ATOMIC(fetch_##op##name),				\
 	/* LSE atomics */						\
 "	" #asm_op #mb "	%w[i], %w[i], %[v]")				\
 	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return w0;							\
 }
 #define ATOMIC_FETCH_OPS(op, asm_op)					\
@ -68,23 +53,18 @@ ATOMIC_FETCH_OPS(add, ldadd)
 #undef ATOMIC_FETCH_OPS
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)				\
-static inline int arch_atomic_add_return##name(int i, atomic_t *v)	\
+static inline int __lse_atomic_add_return##name(int i, atomic_t *v)	\
 {									\
-	register int w0 asm ("w0") = i;					\
+	u32 tmp;							\
 	register atomic_t *x1 asm ("x1") = v;				\
 									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	asm volatile(							\
-	/* LL/SC */							\
+	"	ldadd" #mb "	%w[i], %w[tmp], %[v]\n"			\
-	__LL_SC_ATOMIC(add_return##name)				\
+	"	add	%w[i], %w[i], %w[tmp]"				\
-	__nops(1),							\
+	: [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)	\
-	/* LSE atomics */						\
+	: "r" (v)							\
-	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
+	: cl);								\
 	"	add	%w[i], %w[i], w30")				\
 	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
-	return w0;							\
+	return i;							\
 }
 ATOMIC_OP_ADD_RETURN(_relaxed,   )
@ -94,41 +74,26 @@ ATOMIC_OP_ADD_RETURN(        , al, "memory")
 #undef ATOMIC_OP_ADD_RETURN
-static inline void arch_atomic_and(int i, atomic_t *v)
+static inline void __lse_atomic_and(int i, atomic_t *v)
 {
-	register int w0 asm ("w0") = i;
+	asm volatile(
 	register atomic_t *x1 asm ("x1") = v;
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
 	__LL_SC_ATOMIC(and)
 	__nops(1),
 	/* LSE atomics */
 	"	mvn	%w[i], %w[i]\n"
-	"	stclr	%w[i], %[v]")
+	"	stclr	%w[i], %[v]"
-	: [i] "+&r" (w0), [v] "+Q" (v->counter)
+	: [i] "+&r" (i), [v] "+Q" (v->counter)
-	: "r" (x1)
+	: "r" (v));
 	: __LL_SC_CLOBBERS);
 }
 #define ATOMIC_FETCH_OP_AND(name, mb, cl...)				\
-static inline int arch_atomic_fetch_and##name(int i, atomic_t *v)	\
+static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v)	\
 {									\
-	register int w0 asm ("w0") = i;					\
+	asm volatile(							\
 	register atomic_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
 	__LL_SC_ATOMIC(fetch_and##name)					\
 	__nops(1),							\
 	/* LSE atomics */						\
 	"	mvn	%w[i], %w[i]\n"					\
-	"	ldclr" #mb "	%w[i], %w[i], %[v]")			\
+	"	ldclr" #mb "	%w[i], %w[i], %[v]"			\
-	: [i] "+&r" (w0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (i), [v] "+Q" (v->counter)				\
-	: "r" (x1)							\
+	: "r" (v)							\
-	: __LL_SC_CLOBBERS, ##cl);					\
+	: cl);								\
 									\
-	return w0;							\
+	return i;							\
 }
 ATOMIC_FETCH_OP_AND(_relaxed,   )
@ -138,42 +103,29 @@ ATOMIC_FETCH_OP_AND(        , al, "memory")
 #undef ATOMIC_FETCH_OP_AND
-static inline void arch_atomic_sub(int i, atomic_t *v)
+static inline void __lse_atomic_sub(int i, atomic_t *v)
 {
-	register int w0 asm ("w0") = i;
+	asm volatile(
 	register atomic_t *x1 asm ("x1") = v;
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
 	__LL_SC_ATOMIC(sub)
 	__nops(1),
 	/* LSE atomics */
 	"	neg	%w[i], %w[i]\n"
-	"	stadd	%w[i], %[v]")
+	"	stadd	%w[i], %[v]"
-	: [i] "+&r" (w0), [v] "+Q" (v->counter)
+	: [i] "+&r" (i), [v] "+Q" (v->counter)
-	: "r" (x1)
+	: "r" (v));
 	: __LL_SC_CLOBBERS);
 }
 #define ATOMIC_OP_SUB_RETURN(name, mb, cl...)				\
-static inline int arch_atomic_sub_return##name(int i, atomic_t *v)	\
+static inline int __lse_atomic_sub_return##name(int i, atomic_t *v)	\
 {									\
-	register int w0 asm ("w0") = i;					\
+	u32 tmp;							\
 	register atomic_t *x1 asm ("x1") = v;				\
 									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	asm volatile(							\
 	/* LL/SC */							\
 	__LL_SC_ATOMIC(sub_return##name)				\
 	__nops(2),							\
 	/* LSE atomics */						\
 	"	neg	%w[i], %w[i]\n"					\
-	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
+	"	ldadd" #mb "	%w[i], %w[tmp], %[v]\n"			\
-	"	add	%w[i], %w[i], w30")				\
+	"	add	%w[i], %w[i], %w[tmp]"				\
-	: [i] "+&r" (w0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)	\
-	: "r" (x1)							\
+	: "r" (v)							\
-	: __LL_SC_CLOBBERS , ##cl);					\
+	: cl);							\
 									\
-	return w0;							\
+	return i;							\
 }
 ATOMIC_OP_SUB_RETURN(_relaxed,   )
@ -184,23 +136,16 @@ ATOMIC_OP_SUB_RETURN(        , al, "memory")
 #undef ATOMIC_OP_SUB_RETURN
 #define ATOMIC_FETCH_OP_SUB(name, mb, cl...)				\
-static inline int arch_atomic_fetch_sub##name(int i, atomic_t *v)	\
+static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v)	\
 {									\
-	register int w0 asm ("w0") = i;					\
+	asm volatile(							\
 	register atomic_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
 	__LL_SC_ATOMIC(fetch_sub##name)					\
 	__nops(1),							\
 	/* LSE atomics */						\
 	"	neg	%w[i], %w[i]\n"					\
-	"	ldadd" #mb "	%w[i], %w[i], %[v]")			\
+	"	ldadd" #mb "	%w[i], %w[i], %[v]"			\
-	: [i] "+&r" (w0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (i), [v] "+Q" (v->counter)				\
-	: "r" (x1)							\
+	: "r" (v)							\
-	: __LL_SC_CLOBBERS, ##cl);					\
+	: cl);								\
 									\
-	return w0;							\
+	return i;							\
 }
 ATOMIC_FETCH_OP_SUB(_relaxed,   )
@ -209,20 +154,14 @@ ATOMIC_FETCH_OP_SUB(_release,  l, "memory")
 ATOMIC_FETCH_OP_SUB(        , al, "memory")
 #undef ATOMIC_FETCH_OP_SUB
 #undef __LL_SC_ATOMIC
 #define __LL_SC_ATOMIC64(op)	__LL_SC_CALL(arch_atomic64_##op)
 #define ATOMIC64_OP(op, asm_op)						\
-static inline void arch_atomic64_##op(s64 i, atomic64_t *v)		\
+static inline void __lse_atomic64_##op(s64 i, atomic64_t *v)		\
 {									\
-	register s64 x0 asm ("x0") = i;					\
+	asm volatile(							\
-	register atomic64_t *x1 asm ("x1") = v;				\
+"	" #asm_op "	%[i], %[v]\n"					\
-									\
+	: [i] "+r" (i), [v] "+Q" (v->counter)				\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op),	\
+	: "r" (v));							\
 "	" #asm_op "	%[i], %[v]\n")					\
 	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS);						\
 }
 ATOMIC64_OP(andnot, stclr)
@ -233,21 +172,15 @@ ATOMIC64_OP(add, stadd)
 #undef ATOMIC64_OP
 #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...)			\
-static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)	\
+static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
 {									\
-	register s64 x0 asm ("x0") = i;					\
+	asm volatile(							\
-	register atomic64_t *x1 asm ("x1") = v;				\
+"	" #asm_op #mb "	%[i], %[i], %[v]"				\
 	: [i] "+r" (i), [v] "+Q" (v->counter)				\
 	: "r" (v)							\
 	: cl);								\
 									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	return i;							\
 	/* LL/SC */							\
 	__LL_SC_ATOMIC64(fetch_##op##name),				\
 	/* LSE atomics */						\
 "	" #asm_op #mb "	%[i], %[i], %[v]")				\
 	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
 #define ATOMIC64_FETCH_OPS(op, asm_op)					\
@ -265,23 +198,18 @@ ATOMIC64_FETCH_OPS(add, ldadd)
 #undef ATOMIC64_FETCH_OPS
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)				\
-static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v)	\
+static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\
 {									\
-	register s64 x0 asm ("x0") = i;					\
+	unsigned long tmp;						\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	asm volatile(							\
-	/* LL/SC */							\
+	"	ldadd" #mb "	%[i], %x[tmp], %[v]\n"			\
-	__LL_SC_ATOMIC64(add_return##name)				\
+	"	add	%[i], %[i], %x[tmp]"				\
-	__nops(1),							\
+	: [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)	\
-	/* LSE atomics */						\
+	: "r" (v)							\
-	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
+	: cl);								\
 	"	add	%[i], %[i], x30")				\
 	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
 	: __LL_SC_CLOBBERS, ##cl);					\
 									\
-	return x0;							\
+	return i;							\
 }
 ATOMIC64_OP_ADD_RETURN(_relaxed,   )
@ -291,41 +219,26 @@ ATOMIC64_OP_ADD_RETURN(        , al, "memory")
 #undef ATOMIC64_OP_ADD_RETURN
-static inline void arch_atomic64_and(s64 i, atomic64_t *v)
+static inline void __lse_atomic64_and(s64 i, atomic64_t *v)
 {
-	register s64 x0 asm ("x0") = i;
+	asm volatile(
 	register atomic64_t *x1 asm ("x1") = v;
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
 	__LL_SC_ATOMIC64(and)
 	__nops(1),
 	/* LSE atomics */
 	"	mvn	%[i], %[i]\n"
-	"	stclr	%[i], %[v]")
+	"	stclr	%[i], %[v]"
-	: [i] "+&r" (x0), [v] "+Q" (v->counter)
+	: [i] "+&r" (i), [v] "+Q" (v->counter)
-	: "r" (x1)
+	: "r" (v));
 	: __LL_SC_CLOBBERS);
 }
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
-static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v)	\
+static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v)	\
 {									\
-	register s64 x0 asm ("x0") = i;					\
+	asm volatile(							\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
 	__LL_SC_ATOMIC64(fetch_and##name)				\
 	__nops(1),							\
 	/* LSE atomics */						\
 	"	mvn	%[i], %[i]\n"					\
-	"	ldclr" #mb "	%[i], %[i], %[v]")			\
+	"	ldclr" #mb "	%[i], %[i], %[v]"			\
-	: [i] "+&r" (x0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (i), [v] "+Q" (v->counter)				\
-	: "r" (x1)							\
+	: "r" (v)							\
-	: __LL_SC_CLOBBERS, ##cl);					\
+	: cl);								\
 									\
-	return x0;							\
+	return i;							\
 }
 ATOMIC64_FETCH_OP_AND(_relaxed,   )
@ -335,42 +248,29 @@ ATOMIC64_FETCH_OP_AND(        , al, "memory")
 #undef ATOMIC64_FETCH_OP_AND
-static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
+static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
 {
-	register s64 x0 asm ("x0") = i;
+	asm volatile(
 	register atomic64_t *x1 asm ("x1") = v;
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
 	__LL_SC_ATOMIC64(sub)
 	__nops(1),
 	/* LSE atomics */
 	"	neg	%[i], %[i]\n"
-	"	stadd	%[i], %[v]")
+	"	stadd	%[i], %[v]"
-	: [i] "+&r" (x0), [v] "+Q" (v->counter)
+	: [i] "+&r" (i), [v] "+Q" (v->counter)
-	: "r" (x1)
+	: "r" (v));
 	: __LL_SC_CLOBBERS);
 }
 #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)				\
-static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v)	\
+static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)	\
 {									\
-	register s64 x0 asm ("x0") = i;					\
+	unsigned long tmp;						\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	asm volatile(							\
 	/* LL/SC */							\
 	__LL_SC_ATOMIC64(sub_return##name)				\
 	__nops(2),							\
 	/* LSE atomics */						\
 	"	neg	%[i], %[i]\n"					\
-	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
+	"	ldadd" #mb "	%[i], %x[tmp], %[v]\n"			\
-	"	add	%[i], %[i], x30")				\
+	"	add	%[i], %[i], %x[tmp]"				\
-	: [i] "+&r" (x0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)	\
-	: "r" (x1)							\
+	: "r" (v)							\
-	: __LL_SC_CLOBBERS, ##cl);					\
+	: cl);								\
 									\
-	return x0;							\
+	return i;							\
 }
 ATOMIC64_OP_SUB_RETURN(_relaxed,   )
@ -381,23 +281,16 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 #undef ATOMIC64_OP_SUB_RETURN
 #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)				\
-static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v)	\
+static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v)	\
 {									\
-	register s64 x0 asm ("x0") = i;					\
+	asm volatile(							\
 	register atomic64_t *x1 asm ("x1") = v;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
 	__LL_SC_ATOMIC64(fetch_sub##name)				\
 	__nops(1),							\
 	/* LSE atomics */						\
 	"	neg	%[i], %[i]\n"					\
-	"	ldadd" #mb "	%[i], %[i], %[v]")			\
+	"	ldadd" #mb "	%[i], %[i], %[v]"			\
-	: [i] "+&r" (x0), [v] "+Q" (v->counter)				\
+	: [i] "+&r" (i), [v] "+Q" (v->counter)				\
-	: "r" (x1)							\
+	: "r" (v)							\
-	: __LL_SC_CLOBBERS, ##cl);					\
+	: cl);								\
 									\
-	return x0;							\
+	return i;							\
 }
 ATOMIC64_FETCH_OP_SUB(_relaxed,   )
@ -407,54 +300,44 @@ ATOMIC64_FETCH_OP_SUB(        , al, "memory")
 #undef ATOMIC64_FETCH_OP_SUB
-static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
 {
-	register long x0 asm ("x0") = (long)v;
+	unsigned long tmp;
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
+	asm volatile(
-	/* LL/SC */
+	"1:	ldr	%x[tmp], %[v]\n"
-	__LL_SC_ATOMIC64(dec_if_positive)
+	"	subs	%[ret], %x[tmp], #1\n"
 	__nops(6),
 	/* LSE atomics */
 	"1:	ldr	x30, %[v]\n"
 	"	subs	%[ret], x30, #1\n"
 	"	b.lt	2f\n"
-	"	casal	x30, %[ret], %[v]\n"
+	"	casal	%x[tmp], %[ret], %[v]\n"
-	"	sub	x30, x30, #1\n"
+	"	sub	%x[tmp], %x[tmp], #1\n"
-	"	sub	x30, x30, %[ret]\n"
+	"	sub	%x[tmp], %x[tmp], %[ret]\n"
-	"	cbnz	x30, 1b\n"
+	"	cbnz	%x[tmp], 1b\n"
-	"2:")
+	"2:"
-	: [ret] "+&r" (x0), [v] "+Q" (v->counter)
+	: [ret] "+&r" (v), [v] "+Q" (v->counter), [tmp] "=&r" (tmp)
 	:
-	: __LL_SC_CLOBBERS, "cc", "memory");
+	: "cc", "memory");
-	return x0;
+	return (long)v;
 }
 #undef __LL_SC_ATOMIC64
 #define __LL_SC_CMPXCHG(op)	__LL_SC_CALL(__cmpxchg_case_##op)
 #define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...)			\
-static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr,	\
+static inline u##sz __lse__cmpxchg_case_##name##sz(volatile void *ptr,	\
 					      u##sz old,		\
 					      u##sz new)		\
 {									\
 	register unsigned long x0 asm ("x0") = (unsigned long)ptr;	\
 	register u##sz x1 asm ("x1") = old;				\
 	register u##sz x2 asm ("x2") = new;				\
 	unsigned long tmp;						\
 									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	asm volatile(							\
-	/* LL/SC */							\
+	"	mov	%" #w "[tmp], %" #w "[old]\n"			\
-	__LL_SC_CMPXCHG(name##sz)					\
+	"	cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n"	\
-	__nops(2),							\
+	"	mov	%" #w "[ret], %" #w "[tmp]"			\
-	/* LSE atomics */						\
+	: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr),		\
-	"	mov	" #w "30, %" #w "[old]\n"			\
+	  [tmp] "=&r" (tmp)						\
 	"	cas" #mb #sfx "\t" #w "30, %" #w "[new], %[v]\n"	\
 	"	mov	%" #w "[ret], " #w "30")			\
 	: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr)		\
 	: [old] "r" (x1), [new] "r" (x2)				\
-	: __LL_SC_CLOBBERS, ##cl);					\
+	: cl);								\
 									\
 	return x0;							\
 }
@ -476,13 +359,10 @@ __CMPXCHG_CASE(w, h,  mb_, 16, al, "memory")
 __CMPXCHG_CASE(w,  ,  mb_, 32, al, "memory")
 __CMPXCHG_CASE(x,  ,  mb_, 64, al, "memory")
 #undef __LL_SC_CMPXCHG
 #undef __CMPXCHG_CASE
 #define __LL_SC_CMPXCHG_DBL(op)	__LL_SC_CALL(__cmpxchg_double##op)
 #define __CMPXCHG_DBL(name, mb, cl...)					\
-static inline long __cmpxchg_double##name(unsigned long old1,		\
+static inline long __lse__cmpxchg_double##name(unsigned long old1,	\
 					 unsigned long old2,		\
 					 unsigned long new1,		\
 					 unsigned long new2,		\
@ -496,20 +376,16 @@ static inline long __cmpxchg_double##name(unsigned long old1,		\
 	register unsigned long x3 asm ("x3") = new2;			\
 	register unsigned long x4 asm ("x4") = (unsigned long)ptr;	\
 									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	asm volatile(							\
 	/* LL/SC */							\
 	__LL_SC_CMPXCHG_DBL(name)					\
 	__nops(3),							\
 	/* LSE atomics */						\
 	"	casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
 	"	eor	%[old1], %[old1], %[oldval1]\n"			\
 	"	eor	%[old2], %[old2], %[oldval2]\n"			\
-	"	orr	%[old1], %[old1], %[old2]")			\
+	"	orr	%[old1], %[old1], %[old2]"			\
 	: [old1] "+&r" (x0), [old2] "+&r" (x1),				\
 	  [v] "+Q" (*(unsigned long *)ptr)				\
 	: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),		\
 	  [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)		\
-	: __LL_SC_CLOBBERS, ##cl);					\
+	: cl);								\
 									\
 	return x0;							\
 }
@ -517,7 +393,6 @@ static inline long __cmpxchg_double##name(unsigned long old1,		\
 __CMPXCHG_DBL(   ,   )
 __CMPXCHG_DBL(_mb, al, "memory")
 #undef __LL_SC_CMPXCHG_DBL
 #undef __CMPXCHG_DBL
 #endif	/* __ASM_ATOMIC_LSE_H */
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@ -10,7 +10,6 @@
 #include <linux/build_bug.h>
 #include <linux/compiler.h>
 #include <asm/atomic.h>
 #include <asm/barrier.h>
 #include <asm/lse.h>
@ -104,6 +103,50 @@ __XCHG_GEN(_mb)
 #define arch_xchg_release(...)	__xchg_wrapper(_rel, __VA_ARGS__)
 #define arch_xchg(...)		__xchg_wrapper( _mb, __VA_ARGS__)
 #define __CMPXCHG_CASE(name, sz)			\
 static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr,	\
 					      u##sz old,		\
 					      u##sz new)		\
 {									\
 	return __lse_ll_sc_body(_cmpxchg_case_##name##sz,		\
 				ptr, old, new);				\
 }
 __CMPXCHG_CASE(    ,  8)
 __CMPXCHG_CASE(    , 16)
 __CMPXCHG_CASE(    , 32)
 __CMPXCHG_CASE(    , 64)
 __CMPXCHG_CASE(acq_,  8)
 __CMPXCHG_CASE(acq_, 16)
 __CMPXCHG_CASE(acq_, 32)
 __CMPXCHG_CASE(acq_, 64)
 __CMPXCHG_CASE(rel_,  8)
 __CMPXCHG_CASE(rel_, 16)
 __CMPXCHG_CASE(rel_, 32)
 __CMPXCHG_CASE(rel_, 64)
 __CMPXCHG_CASE(mb_,  8)
 __CMPXCHG_CASE(mb_, 16)
 __CMPXCHG_CASE(mb_, 32)
 __CMPXCHG_CASE(mb_, 64)
 #undef __CMPXCHG_CASE
 #define __CMPXCHG_DBL(name)						\
 static inline long __cmpxchg_double##name(unsigned long old1,		\
 					 unsigned long old2,		\
 					 unsigned long new1,		\
 					 unsigned long new2,		\
 					 volatile void *ptr)		\
 {									\
 	return __lse_ll_sc_body(_cmpxchg_double##name, 			\
 				old1, old2, new1, new2, ptr);		\
 }
 __CMPXCHG_DBL(   )
 __CMPXCHG_DBL(_mb)
 #undef __CMPXCHG_DBL
 #define __CMPXCHG_GEN(sfx)						\
 static inline unsigned long __cmpxchg##sfx(volatile void *ptr,		\
 					   unsigned long old,		\
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@ -2,56 +2,47 @@
 #ifndef __ASM_LSE_H
 #define __ASM_LSE_H
 #include <asm/atomic_ll_sc.h>
 #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
 #include <linux/compiler_types.h>
 #include <linux/export.h>
 #include <linux/jump_label.h>
 #include <linux/stringify.h>
 #include <asm/alternative.h>
 #include <asm/atomic_lse.h>
 #include <asm/cpucaps.h>
 #ifdef __ASSEMBLER__
 .arch_extension	lse
 .macro alt_lse, llsc, lse
 	alternative_insn "\llsc", "\lse", ARM64_HAS_LSE_ATOMICS
 .endm
 #else	/* __ASSEMBLER__ */
 __asm__(".arch_extension	lse");
-/* Move the ll/sc atomics out-of-line */
+extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-#define __LL_SC_INLINE		notrace
+extern struct static_key_false arm64_const_caps_ready;
 #define __LL_SC_PREFIX(x)	__ll_sc_##x
 #define __LL_SC_EXPORT(x)	EXPORT_SYMBOL(__LL_SC_PREFIX(x))
-/* Macro for constructing calls to out-of-line ll/sc atomics */
+static inline bool system_uses_lse_atomics(void)
-#define __LL_SC_CALL(op)	"bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
+{
-#define __LL_SC_CLOBBERS	"x16", "x17", "x30"
+	return (static_branch_likely(&arm64_const_caps_ready)) &&
 		static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
 }
 #define __lse_ll_sc_body(op, ...)					\
 ({									\
 	system_uses_lse_atomics() ?					\
 		__lse_##op(__VA_ARGS__) :				\
 		__ll_sc_##op(__VA_ARGS__);				\
 })
 /* In-line patching at runtime */
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)				\
 	ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS)
 #endif	/* __ASSEMBLER__ */
 #else	/* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
-#ifdef __ASSEMBLER__
+static inline bool system_uses_lse_atomics(void) { return false; }
-.macro alt_lse, llsc, lse
+#define __lse_ll_sc_body(op, ...)		__ll_sc_##op(__VA_ARGS__)
 	\llsc
 .endm
 #else	/* __ASSEMBLER__ */
 #define __LL_SC_INLINE		static inline
 #define __LL_SC_PREFIX(x)	x
 #define __LL_SC_EXPORT(x)
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)	llsc
 #endif	/* __ASSEMBLER__ */
 #endif	/* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
 #endif	/* __ASM_LSE_H */
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@ -11,25 +11,6 @@ CFLAGS_REMOVE_xor-neon.o	+= -mgeneral-regs-only
 CFLAGS_xor-neon.o		+= -ffreestanding
 endif
 # Tell the compiler to treat all general purpose registers (with the
 # exception of the IP registers, which are already handled by the caller
 # in case of a PLT) as callee-saved, which allows for efficient runtime
 # patching of the bl instruction in the caller with an atomic instruction
 # when supported by the CPU. Result and argument registers are handled
 # correctly, based on the function prototype.
 lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
 CFLAGS_atomic_ll_sc.o	:= -ffixed-x1 -ffixed-x2        		\
 		   -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6		\
 		   -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9		\
 		   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12	\
 		   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15	\
 		   -fcall-saved-x18 -fomit-frame-pointer
 CFLAGS_REMOVE_atomic_ll_sc.o := $(CC_FLAGS_FTRACE)
 GCOV_PROFILE_atomic_ll_sc.o	:= n
 KASAN_SANITIZE_atomic_ll_sc.o	:= n
 KCOV_INSTRUMENT_atomic_ll_sc.o	:= n
 UBSAN_SANITIZE_atomic_ll_sc.o	:= n
 lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
 obj-$(CONFIG_CRC32) += crc32.o
--- a/arch/arm64/lib/atomic_ll_sc.c
+++ b/arch/arm64/lib/atomic_ll_sc.c
@ -1,3 +0,0 @@
 #include <asm/atomic.h>
 #define __ARM64_IN_ATOMIC_IMPL
 #include <asm/atomic_ll_sc.h>
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@ -407,7 +407,9 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init)
 		return false;
 	if (!kernel_text_address(jump_entry_code(entry))) {
-		WARN_ONCE(1, "can't patch jump_label at %pS", (void *)jump_entry_code(entry));
+		WARN_ONCE(!jump_entry_is_init(entry),
 			  "can't patch jump_label at %pS",
 			  (void *)jump_entry_code(entry));
 		return false;
 	}