From 42b6b10a54f0bf00397511fc6d5b8a296b405563 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Thu, 8 Jul 2021 19:35:32 -0700
Subject: [PATCH 01/56] arm64: mte: avoid TFSRE0_EL1 related operations unless
 in async mode

There is no reason to touch TFSRE0_EL1 nor issue a DSB unless our task
is in asynchronous mode. Since these operations (especially the DSB) may
be expensive on certain microarchitectures, only perform them if
necessary.

Furthermore, stop clearing TFSRE0_EL1 on entry because it will be
cleared on exit and it is not necessary to have any particular value in
TFSRE0_EL1 between entry and exit.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ib353a63e3d0abc2b0b008e96aa2d9692cfc1b815
Link: https://lore.kernel.org/r/20210709023532.2133673-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 863d44f73028..5cf160135411 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -133,29 +133,37 @@ alternative_cb_end
 	.endm
 
 	/* Check for MTE asynchronous tag check faults */
-	.macro check_mte_async_tcf, tmp, ti_flags
+	.macro check_mte_async_tcf, tmp, ti_flags, thread_sctlr
 #ifdef CONFIG_ARM64_MTE
 	.arch_extension lse
 alternative_if_not ARM64_MTE
 	b	1f
 alternative_else_nop_endif
+	/*
+	 * Asynchronous tag check faults are only possible in ASYNC (2) or
+	 * ASYM (3) modes. In each of these modes bit 1 of SCTLR_EL1.TCF0 is
+	 * set, so skip the check if it is unset.
+	 */
+	tbz	\thread_sctlr, #(SCTLR_EL1_TCF0_SHIFT + 1), 1f
 	mrs_s	\tmp, SYS_TFSRE0_EL1
 	tbz	\tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f
 	/* Asynchronous TCF occurred for TTBR0 access, set the TI flag */
 	mov	\tmp, #_TIF_MTE_ASYNC_FAULT
 	add	\ti_flags, tsk, #TSK_TI_FLAGS
 	stset	\tmp, [\ti_flags]
-	msr_s	SYS_TFSRE0_EL1, xzr
 1:
 #endif
 	.endm
 
 	/* Clear the MTE asynchronous tag check faults */
-	.macro clear_mte_async_tcf
+	.macro clear_mte_async_tcf thread_sctlr
 #ifdef CONFIG_ARM64_MTE
 alternative_if ARM64_MTE
+	/* See comment in check_mte_async_tcf above. */
+	tbz	\thread_sctlr, #(SCTLR_EL1_TCF0_SHIFT + 1), 1f
 	dsb	ish
 	msr_s	SYS_TFSRE0_EL1, xzr
+1:
 alternative_else_nop_endif
 #endif
 	.endm
@@ -231,8 +239,8 @@ alternative_else_nop_endif
 	disable_step_tsk x19, x20
 
 	/* Check for asynchronous tag check faults in user space */
-	check_mte_async_tcf x22, x23
-	apply_ssbd 1, x22, x23
+	ldr	x0, [tsk, THREAD_SCTLR_USER]
+	check_mte_async_tcf x22, x23, x0
 
 #ifdef CONFIG_ARM64_PTR_AUTH
 alternative_if ARM64_HAS_ADDRESS_AUTH
@@ -245,7 +253,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
 	 * was disabled on kernel exit then we would have left the kernel IA
 	 * installed so there is no need to install it again.
 	 */
-	ldr	x0, [tsk, THREAD_SCTLR_USER]
 	tbz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
 	__ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
 	b	2f
@@ -258,6 +265,8 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
 alternative_else_nop_endif
 #endif
 
+	apply_ssbd 1, x22, x23
+
 	mte_set_kernel_gcr x22, x23
 
 	scs_load tsk
@@ -362,6 +371,10 @@ alternative_else_nop_endif
 3:
 	scs_save tsk
 
+	/* Ignore asynchronous tag check faults in the uaccess routines */
+	ldr	x0, [tsk, THREAD_SCTLR_USER]
+	clear_mte_async_tcf x0
+
 #ifdef CONFIG_ARM64_PTR_AUTH
 alternative_if ARM64_HAS_ADDRESS_AUTH
 	/*
@@ -371,7 +384,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
 	 *
 	 * No kernel C function calls after this.
 	 */
-	ldr	x0, [tsk, THREAD_SCTLR_USER]
 	tbz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
 	__ptrauth_keys_install_user tsk, x0, x1, x2
 	b	2f
@@ -599,8 +611,6 @@ SYM_CODE_START_LOCAL(ret_to_user)
 	cbnz	x2, work_pending
 finish_ret_to_user:
 	user_enter_irqoff
-	/* Ignore asynchronous tag check faults in the uaccess routines */
-	clear_mte_async_tcf
 	enable_step_tsk x19, x2
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	bl	stackleak_erase

From 638982a03fbcdcda9bcb97b62ceec2d4a0f88162 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Tue, 27 Jul 2021 13:52:55 -0700
Subject: [PATCH 02/56] arm64: mte: rename gcr_user_excl to mte_ctrl

We are going to use this field to store more data. To prepare for
that, rename it and change the users to rely on the bit position of
gcr_user_excl in mte_ctrl.

Link: https://linux-review.googlesource.com/id/Ie1fd18e480100655f5d22137f5b22f4f3a9f9e2e
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210727205300.2554659-2-pcc@google.com
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/processor.h |  5 ++++-
 arch/arm64/kernel/asm-offsets.c    |  2 +-
 arch/arm64/kernel/entry.S          |  4 ++--
 arch/arm64/kernel/mte.c            | 14 ++++++++------
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index b6517fd03d7b..54d34276fa91 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -16,6 +16,9 @@
  */
 #define NET_IP_ALIGN	0
 
+#define MTE_CTRL_GCR_USER_EXCL_SHIFT	0
+#define MTE_CTRL_GCR_USER_EXCL_MASK	0xffff
+
 #ifndef __ASSEMBLY__
 
 #include <linux/build_bug.h>
@@ -153,7 +156,7 @@ struct thread_struct {
 #endif
 #endif
 #ifdef CONFIG_ARM64_MTE
-	u64			gcr_user_excl;
+	u64			mte_ctrl;
 #endif
 	u64			sctlr_user;
 };
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index c85670692afa..551427ae8cc5 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -52,7 +52,7 @@ int main(void)
   DEFINE(THREAD_KEYS_KERNEL,	offsetof(struct task_struct, thread.keys_kernel));
 #endif
 #ifdef CONFIG_ARM64_MTE
-  DEFINE(THREAD_GCR_EL1_USER,	offsetof(struct task_struct, thread.gcr_user_excl));
+  DEFINE(THREAD_MTE_CTRL,	offsetof(struct task_struct, thread.mte_ctrl));
 #endif
   BLANK();
   DEFINE(S_X0,			offsetof(struct pt_regs, regs[0]));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 5cf160135411..4b779f198486 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -175,7 +175,7 @@ alternative_else_nop_endif
 	 * the RRND (bit[16]) setting.
 	 */
 	mrs_s	\tmp2, SYS_GCR_EL1
-	bfi	\tmp2, \tmp, #0, #16
+	bfxil	\tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
 	msr_s	SYS_GCR_EL1, \tmp2
 #endif
 	.endm
@@ -198,7 +198,7 @@ alternative_else_nop_endif
 alternative_if_not ARM64_MTE
 	b	1f
 alternative_else_nop_endif
-	ldr	\tmp, [\tsk, #THREAD_GCR_EL1_USER]
+	ldr	\tmp, [\tsk, #THREAD_MTE_CTRL]
 
 	mte_set_gcr \tmp, \tmp2
 1:
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 36f51b0e438a..a5269558210c 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -195,7 +195,7 @@ void mte_check_tfsr_el1(void)
 
 static void set_gcr_el1_excl(u64 excl)
 {
-	current->thread.gcr_user_excl = excl;
+	current->thread.mte_ctrl = excl;
 
 	/*
 	 * SYS_GCR_EL1 will be set to current->thread.gcr_user_excl value
@@ -260,8 +260,8 @@ void mte_suspend_exit(void)
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
-	u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
-		       SYS_GCR_EL1_EXCL_MASK;
+	u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
+			SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT;
 
 	if (!system_supports_mte())
 		return 0;
@@ -282,10 +282,10 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 
 	if (task != current) {
 		task->thread.sctlr_user = sctlr;
-		task->thread.gcr_user_excl = gcr_excl;
+		task->thread.mte_ctrl = mte_ctrl;
 	} else {
 		set_task_sctlr_el1(sctlr);
-		set_gcr_el1_excl(gcr_excl);
+		set_gcr_el1_excl(mte_ctrl);
 	}
 
 	return 0;
@@ -294,7 +294,9 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 long get_mte_ctrl(struct task_struct *task)
 {
 	unsigned long ret;
-	u64 incl = ~task->thread.gcr_user_excl & SYS_GCR_EL1_EXCL_MASK;
+	u64 mte_ctrl = task->thread.mte_ctrl;
+	u64 incl = (~mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
+		   SYS_GCR_EL1_EXCL_MASK;
 
 	if (!system_supports_mte())
 		return 0;

From 433c38f40f6a81cf3988b9372f2983912737f322 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Tue, 27 Jul 2021 13:52:56 -0700
Subject: [PATCH 03/56] arm64: mte: change ASYNC and SYNC TCF settings into
 bitfields

Allow the user program to specify both ASYNC and SYNC TCF modes by
repurposing the existing constants as bitfields. This will allow the
kernel to select one of the modes on behalf of the user program. With
this patch the kernel will always select async mode, but a subsequent
patch will make this configurable.

Link: https://linux-review.googlesource.com/id/Icc5923c85a8ea284588cc399ae74fd19ec291230
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210727205300.2554659-3-pcc@google.com
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/processor.h |  3 ++
 arch/arm64/kernel/mte.c            | 70 ++++++++++++------------------
 include/uapi/linux/prctl.h         | 11 ++---
 3 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 54d34276fa91..ee82ebbb5e5a 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -19,6 +19,9 @@
 #define MTE_CTRL_GCR_USER_EXCL_SHIFT	0
 #define MTE_CTRL_GCR_USER_EXCL_MASK	0xffff
 
+#define MTE_CTRL_TCF_SYNC		(1UL << 16)
+#define MTE_CTRL_TCF_ASYNC		(1UL << 17)
+
 #ifndef __ASSEMBLY__
 
 #include <linux/build_bug.h>
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index a5269558210c..3b6b68518003 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -193,14 +193,19 @@ void mte_check_tfsr_el1(void)
 }
 #endif
 
-static void set_gcr_el1_excl(u64 excl)
+static void mte_update_sctlr_user(struct task_struct *task)
 {
-	current->thread.mte_ctrl = excl;
+	unsigned long sctlr = task->thread.sctlr_user;
+	unsigned long pref = MTE_CTRL_TCF_ASYNC;
+	unsigned long mte_ctrl = task->thread.mte_ctrl;
+	unsigned long resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
 
-	/*
-	 * SYS_GCR_EL1 will be set to current->thread.gcr_user_excl value
-	 * by mte_set_user_gcr() in kernel_exit,
-	 */
+	sctlr &= ~SCTLR_EL1_TCF0_MASK;
+	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
+		sctlr |= SCTLR_EL1_TCF0_ASYNC;
+	else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
+		sctlr |= SCTLR_EL1_TCF0_SYNC;
+	task->thread.sctlr_user = sctlr;
 }
 
 void mte_thread_init_user(void)
@@ -212,15 +217,16 @@ void mte_thread_init_user(void)
 	dsb(ish);
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
-	/* disable tag checking */
-	set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
-			   SCTLR_EL1_TCF0_NONE);
-	/* reset tag generation mask */
-	set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
+	/* disable tag checking and reset tag generation mask */
+	current->thread.mte_ctrl = MTE_CTRL_GCR_USER_EXCL_MASK;
+	mte_update_sctlr_user(current);
+	set_task_sctlr_el1(current->thread.sctlr_user);
 }
 
 void mte_thread_switch(struct task_struct *next)
 {
+	mte_update_sctlr_user(next);
+
 	/*
 	 * Check if an async tag exception occurred at EL1.
 	 *
@@ -259,33 +265,21 @@ void mte_suspend_exit(void)
 
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
-	u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
 	u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
 			SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT;
 
 	if (!system_supports_mte())
 		return 0;
 
-	switch (arg & PR_MTE_TCF_MASK) {
-	case PR_MTE_TCF_NONE:
-		sctlr |= SCTLR_EL1_TCF0_NONE;
-		break;
-	case PR_MTE_TCF_SYNC:
-		sctlr |= SCTLR_EL1_TCF0_SYNC;
-		break;
-	case PR_MTE_TCF_ASYNC:
-		sctlr |= SCTLR_EL1_TCF0_ASYNC;
-		break;
-	default:
-		return -EINVAL;
-	}
+	if (arg & PR_MTE_TCF_ASYNC)
+		mte_ctrl |= MTE_CTRL_TCF_ASYNC;
+	if (arg & PR_MTE_TCF_SYNC)
+		mte_ctrl |= MTE_CTRL_TCF_SYNC;
 
-	if (task != current) {
-		task->thread.sctlr_user = sctlr;
-		task->thread.mte_ctrl = mte_ctrl;
-	} else {
-		set_task_sctlr_el1(sctlr);
-		set_gcr_el1_excl(mte_ctrl);
+	task->thread.mte_ctrl = mte_ctrl;
+	if (task == current) {
+		mte_update_sctlr_user(task);
+		set_task_sctlr_el1(task->thread.sctlr_user);
 	}
 
 	return 0;
@@ -302,18 +296,10 @@ long get_mte_ctrl(struct task_struct *task)
 		return 0;
 
 	ret = incl << PR_MTE_TAG_SHIFT;
-
-	switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) {
-	case SCTLR_EL1_TCF0_NONE:
-		ret |= PR_MTE_TCF_NONE;
-		break;
-	case SCTLR_EL1_TCF0_SYNC:
-		ret |= PR_MTE_TCF_SYNC;
-		break;
-	case SCTLR_EL1_TCF0_ASYNC:
+	if (mte_ctrl & MTE_CTRL_TCF_ASYNC)
 		ret |= PR_MTE_TCF_ASYNC;
-		break;
-	}
+	if (mte_ctrl & MTE_CTRL_TCF_SYNC)
+		ret |= PR_MTE_TCF_SYNC;
 
 	return ret;
 }
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 967d9c55323d..89de78a14b9b 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -234,14 +234,15 @@ struct prctl_mm_map {
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
 /* MTE tag check fault modes */
-# define PR_MTE_TCF_SHIFT		1
-# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
-# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_NONE		0
+# define PR_MTE_TCF_SYNC		(1UL << 1)
+# define PR_MTE_TCF_ASYNC		(1UL << 2)
+# define PR_MTE_TCF_MASK		(PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC)
 /* MTE tag inclusion mask */
 # define PR_MTE_TAG_SHIFT		3
 # define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
+/* Unused; kept only for source compatibility */
+# define PR_MTE_TCF_SHIFT		1
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57

From d2e0d8f9746d3e09bcaf15e46c792e40819c9186 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Tue, 27 Jul 2021 13:52:57 -0700
Subject: [PATCH 04/56] arm64: move preemption disablement to prctl handlers

In the next patch, we will start reading sctlr_user from
mte_update_sctlr_user and subsequently writing a new value based on the
task's TCF setting and potentially the per-CPU TCF preference. This
means that we need to be careful to disable preemption around any
code sequences that read from sctlr_user and subsequently write to
sctlr_user and/or SCTLR_EL1, so that we don't end up writing a stale
value (based on the previous CPU's TCF preference) to either of them.

We currently have four such sequences, in the prctl handlers for
PR_SET_TAGGED_ADDR_CTRL and PR_PAC_SET_ENABLED_KEYS, as well as in
the task initialization code that resets the prctl settings. Change
the prctl handlers to disable preemption in the handlers themselves
rather than the functions that they call, and change the task
initialization code to call the respective prctl handlers instead of
setting sctlr_user directly.

As a result of this change, we no longer need the helper function
set_task_sctlr_el1, nor does its behavior make sense any more, so
remove it.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ic0e8a0c00bb47d786c1e8011df0b7fe99bee4bb5
Link: https://lore.kernel.org/r/20210727205300.2554659-4-pcc@google.com
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pointer_auth.h | 12 ++++++------
 arch/arm64/include/asm/processor.h    |  2 +-
 arch/arm64/kernel/mte.c               |  8 ++++----
 arch/arm64/kernel/pointer_auth.c      | 10 ++++++----
 arch/arm64/kernel/process.c           | 21 +++++++--------------
 5 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 28a78b67d9b4..efb098de3a84 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -10,6 +10,9 @@
 #include <asm/memory.h>
 #include <asm/sysreg.h>
 
+#define PR_PAC_ENABLED_KEYS_MASK                                               \
+	(PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
+
 #ifdef CONFIG_ARM64_PTR_AUTH
 /*
  * Each key is a 128-bit quantity which is split across a pair of 64-bit
@@ -117,9 +120,9 @@ static __always_inline void ptrauth_enable(void)
 									       \
 		/* enable all keys */                                          \
 		if (system_supports_address_auth())                            \
-			set_task_sctlr_el1(current->thread.sctlr_user |        \
-					   SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |   \
-					   SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);   \
+			ptrauth_set_enabled_keys(current,                      \
+						 PR_PAC_ENABLED_KEYS_MASK,     \
+						 PR_PAC_ENABLED_KEYS_MASK);    \
 	} while (0)
 
 #define ptrauth_thread_switch_user(tsk)                                        \
@@ -146,7 +149,4 @@ static __always_inline void ptrauth_enable(void)
 #define ptrauth_thread_switch_kernel(tsk)
 #endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
 
-#define PR_PAC_ENABLED_KEYS_MASK                                               \
-	(PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
-
 #endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ee82ebbb5e5a..ee2bdc1b9f5b 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -259,7 +259,7 @@ extern void release_thread(struct task_struct *);
 
 unsigned long get_wchan(struct task_struct *p);
 
-void set_task_sctlr_el1(u64 sctlr);
+void update_sctlr_el1(u64 sctlr);
 
 /* Thread switching */
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 3b6b68518003..737503a148bb 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -218,9 +218,7 @@ void mte_thread_init_user(void)
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 	/* disable tag checking and reset tag generation mask */
-	current->thread.mte_ctrl = MTE_CTRL_GCR_USER_EXCL_MASK;
-	mte_update_sctlr_user(current);
-	set_task_sctlr_el1(current->thread.sctlr_user);
+	set_mte_ctrl(current, 0);
 }
 
 void mte_thread_switch(struct task_struct *next)
@@ -278,8 +276,10 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 
 	task->thread.mte_ctrl = mte_ctrl;
 	if (task == current) {
+		preempt_disable();
 		mte_update_sctlr_user(task);
-		set_task_sctlr_el1(task->thread.sctlr_user);
+		update_sctlr_el1(task->thread.sctlr_user);
+		preempt_enable();
 	}
 
 	return 0;
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index 60901ab0a7fe..2708b620b4ae 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -67,7 +67,7 @@ static u64 arg_to_enxx_mask(unsigned long arg)
 int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
 			     unsigned long enabled)
 {
-	u64 sctlr = tsk->thread.sctlr_user;
+	u64 sctlr;
 
 	if (!system_supports_address_auth())
 		return -EINVAL;
@@ -78,12 +78,14 @@ int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
 	if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
 		return -EINVAL;
 
+	preempt_disable();
+	sctlr = tsk->thread.sctlr_user;
 	sctlr &= ~arg_to_enxx_mask(keys);
 	sctlr |= arg_to_enxx_mask(enabled);
+	tsk->thread.sctlr_user = sctlr;
 	if (tsk == current)
-		set_task_sctlr_el1(sctlr);
-	else
-		tsk->thread.sctlr_user = sctlr;
+		update_sctlr_el1(sctlr);
+	preempt_enable();
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c8989b999250..8811c303608b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -477,7 +477,13 @@ static void compat_thread_switch(struct task_struct *next)
 		set_tsk_thread_flag(next, TIF_NOTIFY_RESUME);
 }
 
-static void update_sctlr_el1(u64 sctlr)
+/*
+ * __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore
+ * this function must be called with preemption disabled and the update to
+ * sctlr_user must be made in the same preemption disabled block so that
+ * __switch_to() does not see the variable update before the SCTLR_EL1 one.
+ */
+void update_sctlr_el1(u64 sctlr)
 {
 	/*
 	 * EnIA must not be cleared while in the kernel as this is necessary for
@@ -489,19 +495,6 @@ static void update_sctlr_el1(u64 sctlr)
 	isb();
 }
 
-void set_task_sctlr_el1(u64 sctlr)
-{
-	/*
-	 * __switch_to() checks current->thread.sctlr as an
-	 * optimisation. Disable preemption so that it does not see
-	 * the variable update before the SCTLR_EL1 one.
-	 */
-	preempt_disable();
-	current->thread.sctlr_user = sctlr;
-	update_sctlr_el1(sctlr);
-	preempt_enable();
-}
-
 /*
  * Thread switching.
  */

From dd061616edcf7deea53d878c690c90d44785d81a Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Tue, 27 Jul 2021 13:52:58 -0700
Subject: [PATCH 05/56] arm64: mte: introduce a per-CPU tag checking mode
 preference

Add a per-CPU sysfs node, mte_tcf_preferred, that allows the preferred
tag checking mode to be configured. The current possible values are
async and sync.

Link: https://linux-review.googlesource.com/id/I7493dcd533a2785a1437b16c3f6b50919f840854
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210727205300.2554659-5-pcc@google.com
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/mte.c | 65 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 737503a148bb..20a40e5a14fa 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/prctl.h>
@@ -26,6 +27,8 @@ u64 gcr_kernel_excl __ro_after_init;
 
 static bool report_fault_once = true;
 
+static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
+
 #ifdef CONFIG_KASAN_HW_TAGS
 /* Whether the MTE asynchronous mode is enabled. */
 DEFINE_STATIC_KEY_FALSE(mte_async_mode);
@@ -195,11 +198,18 @@ void mte_check_tfsr_el1(void)
 
 static void mte_update_sctlr_user(struct task_struct *task)
 {
+	/*
+	 * This must be called with preemption disabled and can only be called
+	 * on the current or next task since the CPU must match where the thread
+	 * is going to run. The caller is responsible for calling
+	 * update_sctlr_el1() later in the same preemption disabled block.
+	 */
 	unsigned long sctlr = task->thread.sctlr_user;
-	unsigned long pref = MTE_CTRL_TCF_ASYNC;
 	unsigned long mte_ctrl = task->thread.mte_ctrl;
-	unsigned long resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
+	unsigned long pref, resolved_mte_tcf;
 
+	pref = __this_cpu_read(mte_tcf_preferred);
+	resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
 	sctlr &= ~SCTLR_EL1_TCF0_MASK;
 	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
 		sctlr |= SCTLR_EL1_TCF0_ASYNC;
@@ -438,3 +448,54 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
 
 	return ret;
 }
+
+static ssize_t mte_tcf_preferred_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	switch (per_cpu(mte_tcf_preferred, dev->id)) {
+	case MTE_CTRL_TCF_ASYNC:
+		return sysfs_emit(buf, "async\n");
+	case MTE_CTRL_TCF_SYNC:
+		return sysfs_emit(buf, "sync\n");
+	default:
+		return sysfs_emit(buf, "???\n");
+	}
+}
+
+static ssize_t mte_tcf_preferred_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	u64 tcf;
+
+	if (sysfs_streq(buf, "async"))
+		tcf = MTE_CTRL_TCF_ASYNC;
+	else if (sysfs_streq(buf, "sync"))
+		tcf = MTE_CTRL_TCF_SYNC;
+	else
+		return -EINVAL;
+
+	device_lock(dev);
+	per_cpu(mte_tcf_preferred, dev->id) = tcf;
+	device_unlock(dev);
+
+	return count;
+}
+static DEVICE_ATTR_RW(mte_tcf_preferred);
+
+static int register_mte_tcf_preferred_sysctl(void)
+{
+	unsigned int cpu;
+
+	if (!system_supports_mte())
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC;
+		device_create_file(get_cpu_device(cpu),
+				   &dev_attr_mte_tcf_preferred);
+	}
+
+	return 0;
+}
+subsys_initcall(register_mte_tcf_preferred_sysctl);

From 80c7c36fb3ddea8e06f75822bfb7634f64d0edcb Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Tue, 27 Jul 2021 13:52:59 -0700
Subject: [PATCH 06/56] Documentation: document the preferred tag checking mode
 feature

Document the functionality added in the previous patches.

Link: https://linux-review.googlesource.com/id/I48217cc3e8b8da33abc08cbaddc11cf4360a1b86
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210727205300.2554659-6-pcc@google.com
Acked-by: Will Deacon <will@kernel.org>
[catalin.marinas@arm.com: clarify that the change happens on task scheduling]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../ABI/testing/sysfs-devices-system-cpu      | 17 +++++++
 .../arm64/memory-tagging-extension.rst        | 48 ++++++++++++++++---
 2 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 160b10c029c0..edb19b31d710 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -640,3 +640,20 @@ Description:	SPURR ticks for cpuX when it was idle.
 
 		This sysfs interface exposes the number of SPURR ticks
 		for cpuX when it was idle.
+
+What: 		/sys/devices/system/cpu/cpuX/mte_tcf_preferred
+Date:		July 2021
+Contact:	Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
+Description:	Preferred MTE tag checking mode
+
+		When a user program specifies more than one MTE tag checking
+		mode, this sysfs node is used to specify which mode should
+		be preferred when scheduling a task on that CPU. Possible
+		values:
+
+		================  ==============================================
+		"sync"	  	  Prefer synchronous mode
+		"async"	  	  Prefer asynchronous mode
+		================  ==============================================
+
+		See also: Documentation/arm64/memory-tagging-extension.rst
diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index b540178a93f8..7b99c8f428eb 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -77,14 +77,20 @@ configurable behaviours:
   address is unknown).
 
 The user can select the above modes, per thread, using the
-``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where
-``flags`` contain one of the following values in the ``PR_MTE_TCF_MASK``
+``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
+contains any number of the following values in the ``PR_MTE_TCF_MASK``
 bit-field:
 
-- ``PR_MTE_TCF_NONE``  - *Ignore* tag check faults
+- ``PR_MTE_TCF_NONE``  - *Ignore* tag check faults
+                         (ignored if combined with other options)
 - ``PR_MTE_TCF_SYNC``  - *Synchronous* tag check fault mode
 - ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode
 
+If no modes are specified, tag check faults are ignored. If a single
+mode is specified, the program will run in that mode. If multiple
+modes are specified, the mode is selected as described in the "Per-CPU
+preferred tag checking modes" section below.
+
 The current tag check fault mode can be read using the
 ``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
 
@@ -120,13 +126,39 @@ in the ``PR_MTE_TAG_MASK`` bit-field.
 interface provides an include mask. An include mask of ``0`` (exclusion
 mask ``0xffff``) results in the CPU always generating tag ``0``.
 
+Per-CPU preferred tag checking mode
+-----------------------------------
+
+On some CPUs the performance of MTE in stricter tag checking modes
+is similar to that of less strict tag checking modes. This makes it
+worthwhile to enable stricter checks on those CPUs when a less strict
+checking mode is requested, in order to gain the error detection
+benefits of the stricter checks without the performance downsides. To
+support this scenario, a privileged user may configure a stricter
+tag checking mode as the CPU's preferred tag checking mode.
+
+The preferred tag checking mode for each CPU is controlled by
+``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
+privileged user may write the value ``async`` or ``sync``.  The default
+preferred mode for each CPU is ``async``.
+
+To allow a program to potentially run in the CPU's preferred tag
+checking mode, the user program may set multiple tag check fault mode
+bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
+flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
+mode is in the task's set of provided tag checking modes (this will
+always be the case at present because the kernel only supports two
+tag checking modes, but future kernels may support more modes), that
+mode will be selected. Otherwise, one of the modes in the task's mode
+set will be selected in a currently unspecified manner.
+
 Initial process state
 ---------------------
 
 On ``execve()``, the new process has the following configuration:
 
 - ``PR_TAGGED_ADDR_ENABLE`` set to 0 (disabled)
-- Tag checking mode set to ``PR_MTE_TCF_NONE``
+- No tag checking modes are selected (tag check faults ignored)
 - ``PR_MTE_TAG_MASK`` set to 0 (all tags excluded)
 - ``PSTATE.TCO`` set to 0
 - ``PROT_MTE`` not set on any of the initial memory maps
@@ -251,11 +283,13 @@ Example of correct usage
                     return EXIT_FAILURE;
 
             /*
-             * Enable the tagged address ABI, synchronous MTE tag check faults and
-             * allow all non-zero tags in the randomly generated set.
+             * Enable the tagged address ABI, synchronous or asynchronous MTE
+             * tag check faults (based on per-CPU preference) and allow all
+             * non-zero tags in the randomly generated set.
              */
             if (prctl(PR_SET_TAGGED_ADDR_CTRL,
-                      PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC | (0xfffe << PR_MTE_TAG_SHIFT),
+                      PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC |
+                      (0xfffe << PR_MTE_TAG_SHIFT),
                       0, 0, 0)) {
                     perror("prctl() failed");
                     return EXIT_FAILURE;

From afdfd93a53aea68837b34da81d442358ff7552f3 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Tue, 13 Jul 2021 18:36:38 -0700
Subject: [PATCH 07/56] arm64: mte: optimize GCR_EL1 modification on kernel
 entry/exit

Accessing GCR_EL1 and issuing an ISB can be expensive on some
microarchitectures. Although we must write to GCR_EL1, we can
restructure the code to avoid reading from it because the new value
can be derived entirely from the exclusion mask, which is already in
a GPR. Do so.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I560a190a74176ca4cc5191dad08f77f6b1577c75
Link: https://lore.kernel.org/r/20210714013638.3995315-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4b779f198486..8c8581e86a1a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -168,15 +168,11 @@ alternative_else_nop_endif
 #endif
 	.endm
 
-	.macro mte_set_gcr, tmp, tmp2
+	.macro mte_set_gcr, mte_ctrl, tmp
 #ifdef CONFIG_ARM64_MTE
-	/*
-	 * Calculate and set the exclude mask preserving
-	 * the RRND (bit[16]) setting.
-	 */
-	mrs_s	\tmp2, SYS_GCR_EL1
-	bfxil	\tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
-	msr_s	SYS_GCR_EL1, \tmp2
+	ubfx	\tmp, \mte_ctrl, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
+	orr	\tmp, \tmp, #SYS_GCR_EL1_RRND
+	msr_s	SYS_GCR_EL1, \tmp
 #endif
 	.endm
 

From d914b80a8f567d052d621197974ae08f729c963d Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Tue, 27 Jul 2021 13:54:39 -0700
Subject: [PATCH 08/56] arm64: avoid double ISB on kernel entry

Although an ISB is required in order to make the MTE-related system
register update to GCR_EL1 effective, and the same is true for
PAC-related updates to SCTLR_EL1 or APIAKey{Hi,Lo}_EL1, we issue two
ISBs on machines that support both features while we only need to
issue one. To avoid the unnecessary additional ISB, remove the ISBs
from the PAC and MTE-specific alternative blocks and add a couple
of additional blocks that cause us to only execute one ISB if both
features are supported.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Idee7e8114d5ae5a0b171d06220a0eb4bb015a51c
Link: https://lore.kernel.org/r/20210727205439.2557419-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 8c8581e86a1a..468fae024bec 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -184,7 +184,6 @@ alternative_else_nop_endif
 	ldr_l	\tmp, gcr_kernel_excl
 
 	mte_set_gcr \tmp, \tmp2
-	isb
 1:
 #endif
 	.endm
@@ -257,7 +256,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
 	orr	x0, x0, SCTLR_ELx_ENIA
 	msr	sctlr_el1, x0
 2:
-	isb
 alternative_else_nop_endif
 #endif
 
@@ -265,6 +263,19 @@ alternative_else_nop_endif
 
 	mte_set_kernel_gcr x22, x23
 
+	/*
+	 * Any non-self-synchronizing system register updates required for
+	 * kernel entry should be placed before this point.
+	 */
+alternative_if ARM64_MTE
+	isb
+	b	1f
+alternative_else_nop_endif
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	isb
+alternative_else_nop_endif
+1:
+
 	scs_load tsk
 	.else
 	add	x21, sp, #PT_REGS_SIZE

From 79d82cbcbb3d2a56c009ad6a6df92c5dee061dad Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 14 Jul 2021 10:16:15 +0530
Subject: [PATCH 09/56] arm64/kexec: Test page size support with new TGRAN
 range values

The commit 26f55386f964 ("arm64/mm: Fix __enable_mmu() for new TGRAN range
values") had already switched into testing ID_AA64MMFR0_TGRAN range values.
This just changes system_supports_[4|16|64]kb_granule() helpers to perform
similar range tests as well. While here, it standardizes page size specific
supported min and max TGRAN values.

Cc: Will Deacon <will@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/1626237975-1909-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h |  9 ++++++---
 arch/arm64/include/asm/sysreg.h     | 28 ++++++++++++++++------------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9bb9d11750d7..2395527a1bba 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -657,7 +657,8 @@ static inline bool system_supports_4kb_granule(void)
 	val = cpuid_feature_extract_unsigned_field(mmfr0,
 						ID_AA64MMFR0_TGRAN4_SHIFT);
 
-	return val == ID_AA64MMFR0_TGRAN4_SUPPORTED;
+	return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) &&
+	       (val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX);
 }
 
 static inline bool system_supports_64kb_granule(void)
@@ -669,7 +670,8 @@ static inline bool system_supports_64kb_granule(void)
 	val = cpuid_feature_extract_unsigned_field(mmfr0,
 						ID_AA64MMFR0_TGRAN64_SHIFT);
 
-	return val == ID_AA64MMFR0_TGRAN64_SUPPORTED;
+	return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) &&
+	       (val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX);
 }
 
 static inline bool system_supports_16kb_granule(void)
@@ -681,7 +683,8 @@ static inline bool system_supports_16kb_granule(void)
 	val = cpuid_feature_extract_unsigned_field(mmfr0,
 						ID_AA64MMFR0_TGRAN16_SHIFT);
 
-	return val == ID_AA64MMFR0_TGRAN16_SUPPORTED;
+	return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) &&
+	       (val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX);
 }
 
 static inline bool system_supports_mixed_endian_el0(void)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7b9c3acba684..aa53954c2f6b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -847,12 +847,16 @@
 #define ID_AA64MMFR0_ASID_SHIFT		4
 #define ID_AA64MMFR0_PARANGE_SHIFT	0
 
-#define ID_AA64MMFR0_TGRAN4_NI		0xf
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED	0x0
-#define ID_AA64MMFR0_TGRAN64_NI		0xf
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
-#define ID_AA64MMFR0_TGRAN16_NI		0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+#define ID_AA64MMFR0_TGRAN4_NI			0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN	0x0
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX	0x7
+#define ID_AA64MMFR0_TGRAN64_NI			0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN	0x0
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX	0x7
+#define ID_AA64MMFR0_TGRAN16_NI			0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN	0x1
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX	0xf
+
 #define ID_AA64MMFR0_PARANGE_48		0x5
 #define ID_AA64MMFR0_PARANGE_52		0x6
 
@@ -1028,16 +1032,16 @@
 
 #if defined(CONFIG_ARM64_4K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN4_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN4_SUPPORTED
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	0x7
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
 #elif defined(CONFIG_ARM64_16K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN16_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN16_SUPPORTED
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	0xF
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
 #elif defined(CONFIG_ARM64_64K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN64_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN64_SUPPORTED
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	0x7
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
 #endif
 
 #define MVFR2_FPMISC_SHIFT		4

From ff85f10ba8e4d3b4c6f8f5c9f6b47f3e80a067f1 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Fri, 16 Jul 2021 13:58:09 +0800
Subject: [PATCH 10/56] arm64: cpufeature: Use defined macro instead of magic
 numbers

Use defined macro to simplify the code and make it more readable.

Cc: Marc Zyngier <maz@kernel.org>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/1626415089-57584-1-git-send-email-zhangshaokun@hisilicon.com
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0ead8bfedf20..2e4ec12667b3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1734,7 +1734,7 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
 	u64 val = read_sysreg_s(SYS_CLIDR_EL1);
 
 	/* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
-	WARN_ON(val & (7 << 27 | 7 << 21));
+	WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val));
 }
 
 #ifdef CONFIG_ARM64_PAN

From ec63e300fa8be5b7bbb32cd231a211aed07c85ce Mon Sep 17 00:00:00 2001
From: Lingyan Huang <huanglingyan2@huawei.com>
Date: Thu, 22 Jul 2021 10:20:36 +0800
Subject: [PATCH 11/56] arm64: SSBS/DIT: print SSBS and DIT bit when printing
 PSTATE

The current code to print PSTATE when generating backtraces does not
include SSBS bit and DIT bit, so add this information.

Cc: Vladimir Murzin <vladimir.murzin@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Lingyan Huang <huanglingyan2@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/1626920436-54816-1-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/process.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c8989b999250..5e13a74b11b3 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -163,7 +163,7 @@ static void print_pstate(struct pt_regs *regs)
 	u64 pstate = regs->pstate;
 
 	if (compat_user_mode(regs)) {
-		printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n",
+		printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c %cDIT %cSSBS)\n",
 			pstate,
 			pstate & PSR_AA32_N_BIT ? 'N' : 'n',
 			pstate & PSR_AA32_Z_BIT ? 'Z' : 'z',
@@ -174,12 +174,14 @@ static void print_pstate(struct pt_regs *regs)
 			pstate & PSR_AA32_E_BIT ? "BE" : "LE",
 			pstate & PSR_AA32_A_BIT ? 'A' : 'a',
 			pstate & PSR_AA32_I_BIT ? 'I' : 'i',
-			pstate & PSR_AA32_F_BIT ? 'F' : 'f');
+			pstate & PSR_AA32_F_BIT ? 'F' : 'f',
+			pstate & PSR_AA32_DIT_BIT ? '+' : '-',
+			pstate & PSR_AA32_SSBS_BIT ? '+' : '-');
 	} else {
 		const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
 					       PSR_BTYPE_SHIFT];
 
-		printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n",
+		printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO %cDIT %cSSBS BTYPE=%s)\n",
 			pstate,
 			pstate & PSR_N_BIT ? 'N' : 'n',
 			pstate & PSR_Z_BIT ? 'Z' : 'z',
@@ -192,6 +194,8 @@ static void print_pstate(struct pt_regs *regs)
 			pstate & PSR_PAN_BIT ? '+' : '-',
 			pstate & PSR_UAO_BIT ? '+' : '-',
 			pstate & PSR_TCO_BIT ? '+' : '-',
+			pstate & PSR_DIT_BIT ? '+' : '-',
+			pstate & PSR_SSBS_BIT ? '+' : '-',
 			btype_str);
 	}
 }

From 2806556c5e1abf06e37e33a449a5801b02d98939 Mon Sep 17 00:00:00 2001
From: Jason Wang <wangborong@cdjrlc.com>
Date: Mon, 26 Jul 2021 20:29:07 +0800
Subject: [PATCH 12/56] arm64: use __func__ to get function name in pr_err

Prefer using '"%s...", __func__' to get current function's name in
a debug message.

Signed-off-by: Jason Wang <wangborong@cdjrlc.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210726122907.51529-1-wangborong@cdjrlc.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/lib/insn.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index b506a4b1e38c..fccfe363e567 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -185,7 +185,7 @@ u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
 		break;
 	default:
 		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
-			pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
+			pr_err("%s: unknown immediate encoding %d\n", __func__,
 			       type);
 			return 0;
 		}
@@ -215,7 +215,7 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 		break;
 	default:
 		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
-			pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+			pr_err("%s: unknown immediate encoding %d\n", __func__,
 			       type);
 			return AARCH64_BREAK_FAULT;
 		}

From a8caaa239c60ad735b92fb2e8147c8c095181afb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 20 Jul 2021 21:42:20 +0100
Subject: [PATCH 13/56] arm64/sme: Document boot requirements for SME

Document our requirements for initialisation of the Scalable Matrix
Extension (SME) at kernel start. While we do have the ability to handle
mismatched vector lengths we will reject any late CPUs that can't support
the minimum set we determine at boot so for clarity we document a
requirement that all CPUs make the same vector length available.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210720204220.22951-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/booting.rst | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index a9192e7a231b..5822d6da9fa6 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -311,6 +311,28 @@ Before jumping into the kernel, the following conditions must be met:
     - ZCR_EL2.LEN must be initialised to the same value for all CPUs the
       kernel will execute on.
 
+  For CPUs with the Scalable Matrix Extension (FEAT_SME):
+
+  - If EL3 is present:
+
+    - CPTR_EL3.ESM (bit 12) must be initialised to 0b1.
+
+    - SCR_EL3.EnTP2 (bit 41) must be initialised to 0b1.
+
+    - SMCR_EL3.LEN must be initialised to the same value for all CPUs the
+      kernel will execute on.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+    - CPTR_EL2.TSM (bit 12) must be initialised to 0b0.
+
+    - CPTR_EL2.SMEN (bits 25:24) must be initialised to 0b11.
+
+    - SCTLR_EL2.EnTP2 (bit 60) must be initialised to 0b1.
+
+    - SMCR_EL2.LEN must be initialised to the same value for all CPUs the
+      kernel will execute on.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.  Where the values documented

From 8f1fbc975b86cb6520da70fa8d8f69ee6f9306fe Mon Sep 17 00:00:00 2001
From: Jason Wang <wangborong@cdjrlc.com>
Date: Mon, 26 Jul 2021 20:39:40 +0800
Subject: [PATCH 14/56] arm64: unnecessary end 'return;' in void functions

The end 'return;' in a void function is useless and verbose. It can
be removed safely.

Signed-off-by: Jason Wang <wangborong@cdjrlc.com>
Link: https://lore.kernel.org/r/20210726123940.63232-1-wangborong@cdjrlc.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 2e4ec12667b3..9035c367d08b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1561,8 +1561,6 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 
 	if (!cpu)
 		arm64_use_ng_mappings = true;
-
-	return;
 }
 #else
 static void

From dac3ce63bffe0ef5c0a3fa9b5f6140b633cbc830 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 28 Jul 2021 18:35:39 +0100
Subject: [PATCH 15/56] kselftest/arm64: Ignore check_gcr_el1_cswitch binary

We added check_gcr_el1_cswitch but did not ignore the generated binary,
add it.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210728173539.6231-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index bc3ac63f3314..d1fe4ddf1669 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -1,4 +1,5 @@
 check_buffer_fill
+check_gcr_el1_cswitch
 check_tags_inclusion
 check_child_memory
 check_mmap_options

From b24b5205099a342ee38f7db7c18e39da2f2ae8e8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 30 Jul 2021 17:58:46 +0100
Subject: [PATCH 16/56] arm64/sve: Make fpsimd_bind_task_to_cpu() static

This function is not referenced outside fpsimd.c so can be static, making
it that little bit easier to follow what is called from where.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210730165846.18558-1-broonie@kernel.org
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/fpsimd.h | 1 -
 arch/arm64/kernel/fpsimd.c      | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index c072161d5c65..9a62884183e5 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -45,7 +45,6 @@ extern void fpsimd_preserve_current_state(void);
 extern void fpsimd_restore_current_state(void);
 extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
 
-extern void fpsimd_bind_task_to_cpu(void);
 extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
 				     void *sve_state, unsigned int sve_vl);
 
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e57b23f95284..eb8d972ad3d2 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -162,6 +162,8 @@ extern void __percpu *efi_sve_state;
 DEFINE_PER_CPU(bool, fpsimd_context_busy);
 EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
 
+static void fpsimd_bind_task_to_cpu(void);
+
 static void __get_cpu_fpsimd_context(void)
 {
 	bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
@@ -1112,7 +1114,7 @@ void fpsimd_signal_preserve_current_state(void)
  * The caller must have ownership of the cpu FPSIMD context before calling
  * this function.
  */
-void fpsimd_bind_task_to_cpu(void)
+static void fpsimd_bind_task_to_cpu(void)
 {
 	struct fpsimd_last_state_struct *last =
 		this_cpu_ptr(&fpsimd_last_state);

From 82868247897bea2d69a83dca9a6a557e2c96dac4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 14 Jul 2021 15:38:42 +0100
Subject: [PATCH 17/56] arm64: kasan: mte: use a constant kernel GCR_EL1 value

When KASAN_HW_TAGS is selected, KASAN is enabled at boot time, and the
hardware supports MTE, we'll initialize `kernel_gcr_excl` with a value
dependent on KASAN_TAG_MAX. While the resulting value is a constant
which depends on KASAN_TAG_MAX, we have to perform some runtime work to
generate the value, and have to read the value from memory during the
exception entry path. It would be better if we could generate this as a
constant at compile-time, and use it as such directly.

Early in boot within __cpu_setup(), we initialize GCR_EL1 to a safe
value, and later override this with the value required by KASAN. If
CONFIG_KASAN_HW_TAGS is not selected, or if KASAN is disabeld at boot
time, the kernel will not use IRG instructions, and so the initial value
of GCR_EL1 is does not matter to the kernel. Thus, we can instead have
__cpu_setup() initialize GCR_EL1 to a value consistent with
KASAN_TAG_MAX, and avoid the need to re-initialize it during hotplug and
resume form suspend.

This patch makes arem64 use a compile-time constant KERNEL_GCR_EL1
value, which is compatible with KASAN_HW_TAGS when this is selected.
This removes the need to re-initialize GCR_EL1 dynamically, and acts as
an optimization to the entry assembly, which no longer needs to load
this value from memory. The redundant initialization hooks are removed.

In order to do this, KASAN_TAG_MAX needs to be visible outside of the
core KASAN code. To do this, I've moved the KASAN_TAG_* values into
<linux/kasan-tags.h>.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Tested-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/r/20210714143843.56537-3-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/memory.h    |  1 -
 arch/arm64/include/asm/mte-kasan.h |  5 -----
 arch/arm64/include/asm/mte.h       |  6 ------
 arch/arm64/include/asm/sysreg.h    | 16 +++++++++++++++
 arch/arm64/kernel/entry.S          |  5 ++---
 arch/arm64/kernel/mte.c            | 31 ------------------------------
 arch/arm64/kernel/suspend.c        |  1 -
 arch/arm64/mm/proc.S               |  3 +--
 include/linux/kasan-tags.h         | 15 +++++++++++++++
 mm/kasan/hw_tags.c                 |  2 --
 mm/kasan/kasan.h                   | 15 +--------------
 11 files changed, 35 insertions(+), 65 deletions(-)
 create mode 100644 include/linux/kasan-tags.h

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 824a3655dd93..7f4e6a923aa6 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -245,7 +245,6 @@ static inline const void *__tag_set(const void *addr, u8 tag)
 #define arch_enable_tagging_async()		mte_enable_kernel_async()
 #define arch_set_tagging_report_once(state)	mte_set_report_once(state)
 #define arch_force_async_tag_fault()		mte_check_tfsr_exit()
-#define arch_init_tags(max_tag)			mte_init_tags(max_tag)
 #define arch_get_random_tag()			mte_get_random_tag()
 #define arch_get_mem_tag(addr)			mte_get_mem_tag(addr)
 #define arch_set_mem_tag_range(addr, size, tag, init)	\
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index d952352bd008..82fa4ac4ad4e 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -130,7 +130,6 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
 
 void mte_enable_kernel_sync(void);
 void mte_enable_kernel_async(void);
-void mte_init_tags(u64 max_tag);
 
 void mte_set_report_once(bool state);
 bool mte_report_once(void);
@@ -165,10 +164,6 @@ static inline void mte_enable_kernel_async(void)
 {
 }
 
-static inline void mte_init_tags(u64 max_tag)
-{
-}
-
 static inline void mte_set_report_once(bool state)
 {
 }
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 58c7f80f5596..3f93b9e0b339 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -16,8 +16,6 @@
 
 #include <asm/pgtable-types.h>
 
-extern u64 gcr_kernel_excl;
-
 void mte_clear_page_tags(void *addr);
 unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
 				      unsigned long n);
@@ -43,7 +41,6 @@ void mte_copy_page_tags(void *kto, const void *kfrom);
 void mte_thread_init_user(void);
 void mte_thread_switch(struct task_struct *next);
 void mte_suspend_enter(void);
-void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
 int mte_ptrace_copy_tags(struct task_struct *child, long request,
@@ -72,9 +69,6 @@ static inline void mte_thread_switch(struct task_struct *next)
 static inline void mte_suspend_enter(void)
 {
 }
-static inline void mte_suspend_exit(void)
-{
-}
 static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	return 0;
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7b9c3acba684..f6687f6f536b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -11,6 +11,7 @@
 
 #include <linux/bits.h>
 #include <linux/stringify.h>
+#include <linux/kasan-tags.h>
 
 /*
  * ARMv8 ARM reserves the following encoding for system registers:
@@ -1067,6 +1068,21 @@
 #define SYS_GCR_EL1_RRND	(BIT(16))
 #define SYS_GCR_EL1_EXCL_MASK	0xffffUL
 
+#ifdef CONFIG_KASAN_HW_TAGS
+/*
+ * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it
+ * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF.
+ */
+#define __MTE_TAG_MIN		(KASAN_TAG_MIN & 0xf)
+#define __MTE_TAG_MAX		(KASAN_TAG_MAX & 0xf)
+#define __MTE_TAG_INCL		GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN)
+#define KERNEL_GCR_EL1_EXCL	(SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL)
+#else
+#define KERNEL_GCR_EL1_EXCL	SYS_GCR_EL1_EXCL_MASK
+#endif
+
+#define KERNEL_GCR_EL1		(SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL)
+
 /* RGSR_EL1 Definitions */
 #define SYS_RGSR_EL1_TAG_MASK	0xfUL
 #define SYS_RGSR_EL1_SEED_SHIFT	8
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 468fae024bec..923ee2ac85fd 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -181,9 +181,8 @@ alternative_else_nop_endif
 alternative_if_not ARM64_MTE
 	b	1f
 alternative_else_nop_endif
-	ldr_l	\tmp, gcr_kernel_excl
-
-	mte_set_gcr \tmp, \tmp2
+	mov	\tmp, KERNEL_GCR_EL1
+	msr_s	SYS_GCR_EL1, \tmp
 1:
 #endif
 	.endm
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 20a40e5a14fa..b538ff27a912 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -23,8 +23,6 @@
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
-u64 gcr_kernel_excl __ro_after_init;
-
 static bool report_fault_once = true;
 
 static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
@@ -104,26 +102,6 @@ int memcmp_pages(struct page *page1, struct page *page2)
 	return ret;
 }
 
-void mte_init_tags(u64 max_tag)
-{
-	static bool gcr_kernel_excl_initialized;
-
-	if (!gcr_kernel_excl_initialized) {
-		/*
-		 * The format of the tags in KASAN is 0xFF and in MTE is 0xF.
-		 * This conversion extracts an MTE tag from a KASAN tag.
-		 */
-		u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT,
-					     max_tag), 0);
-
-		gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
-		gcr_kernel_excl_initialized = true;
-	}
-
-	/* Enable the kernel exclude mask for random tags generation. */
-	write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1);
-}
-
 static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
 {
 	/* Enable MTE Sync Mode for EL1. */
@@ -262,15 +240,6 @@ void mte_suspend_enter(void)
 	mte_check_tfsr_el1();
 }
 
-void mte_suspend_exit(void)
-{
-	if (!system_supports_mte())
-		return;
-
-	sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, gcr_kernel_excl);
-	isb();
-}
-
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 938ce6fbee8a..19ee7c33769d 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -76,7 +76,6 @@ void notrace __cpu_suspend_exit(void)
 	spectre_v4_enable_mitigation(NULL);
 
 	/* Restore additional feature-specific configuration */
-	mte_suspend_exit();
 	ptrauth_suspend_exit();
 }
 
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 35936c5ae1ce..d35c90d2e47a 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -437,8 +437,7 @@ SYM_FUNC_START(__cpu_setup)
 	mov	x10, #MAIR_ATTR_NORMAL_TAGGED
 	bfi	mair, x10, #(8 *  MT_NORMAL_TAGGED), #8
 
-	/* initialize GCR_EL1: all non-zero tags excluded by default */
-	mov	x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
+	mov	x10, #KERNEL_GCR_EL1
 	msr_s	SYS_GCR_EL1, x10
 
 	/*
diff --git a/include/linux/kasan-tags.h b/include/linux/kasan-tags.h
new file mode 100644
index 000000000000..4f85f562512c
--- /dev/null
+++ b/include/linux/kasan-tags.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KASAN_TAGS_H
+#define _LINUX_KASAN_TAGS_H
+
+#define KASAN_TAG_KERNEL	0xFF /* native kernel pointers tag */
+#define KASAN_TAG_INVALID	0xFE /* inaccessible memory tag */
+#define KASAN_TAG_MAX		0xFD /* maximum value for random tags */
+
+#ifdef CONFIG_KASAN_HW_TAGS
+#define KASAN_TAG_MIN		0xF0 /* minimum value for random tags */
+#else
+#define KASAN_TAG_MIN		0x00 /* minimum value for random tags */
+#endif
+
+#endif /* LINUX_KASAN_TAGS_H */
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 4ea8c368b5b8..2c6c6c6ddfa2 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -142,8 +142,6 @@ void kasan_init_hw_tags_cpu(void)
 	if (kasan_arg == KASAN_ARG_OFF)
 		return;
 
-	hw_init_tags(KASAN_TAG_MAX);
-
 	/*
 	 * Enable async mode only when explicitly requested through
 	 * the command line.
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index d739cdd1621a..28a16b80bbef 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -3,6 +3,7 @@
 #define __MM_KASAN_KASAN_H
 
 #include <linux/kasan.h>
+#include <linux/kasan-tags.h>
 #include <linux/kfence.h>
 #include <linux/stackdepot.h>
 
@@ -51,16 +52,6 @@ extern bool kasan_flag_async __ro_after_init;
 
 #define KASAN_MEMORY_PER_SHADOW_PAGE	(KASAN_GRANULE_SIZE << PAGE_SHIFT)
 
-#define KASAN_TAG_KERNEL	0xFF /* native kernel pointers tag */
-#define KASAN_TAG_INVALID	0xFE /* inaccessible memory tag */
-#define KASAN_TAG_MAX		0xFD /* maximum value for random tags */
-
-#ifdef CONFIG_KASAN_HW_TAGS
-#define KASAN_TAG_MIN		0xF0 /* minimum value for random tags */
-#else
-#define KASAN_TAG_MIN		0x00 /* minimum value for random tags */
-#endif
-
 #ifdef CONFIG_KASAN_GENERIC
 #define KASAN_FREE_PAGE         0xFF  /* page was freed */
 #define KASAN_PAGE_REDZONE      0xFE  /* redzone for kmalloc_large allocations */
@@ -299,9 +290,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 #ifndef arch_enable_tagging_async
 #define arch_enable_tagging_async()
 #endif
-#ifndef arch_init_tags
-#define arch_init_tags(max_tag)
-#endif
 #ifndef arch_set_tagging_report_once
 #define arch_set_tagging_report_once(state)
 #endif
@@ -320,7 +308,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 
 #define hw_enable_tagging_sync()		arch_enable_tagging_sync()
 #define hw_enable_tagging_async()		arch_enable_tagging_async()
-#define hw_init_tags(max_tag)			arch_init_tags(max_tag)
 #define hw_set_tagging_report_once(state)	arch_set_tagging_report_once(state)
 #define hw_force_async_tag_fault()		arch_force_async_tag_fault()
 #define hw_get_random_tag()			arch_get_random_tag()

From 767215030150d9d01ff65fbc1c5dff515ffdcfe3 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 14 Jul 2021 15:38:43 +0100
Subject: [PATCH 18/56] arm64: kasan: mte: remove redundant mte_report_once
 logic

We have special logic to suppress MTE tag check fault reporting, based
on a global `mte_report_once` and `reported` variables. These can be
used to suppress calling kasan_report() when taking a tag check fault,
but do not prevent taking the fault in the first place, nor does they
affect the way we disable tag checks upon taking a fault.

The core KASAN code already defaults to reporting a single fault, and
has a `multi_shot` control to permit reporting multiple faults. The only
place we transiently alter `mte_report_once` is in lib/test_kasan.c,
where we also the `multi_shot` state as the same time. Thus
`mte_report_once` and `reported` are redundant, and can be removed.

When a tag check fault is taken, tag checking will be disabled by
`do_tag_recovery` and must be explicitly re-enabled if desired. The test
code does this by calling kasan_enable_tagging_sync().

This patch removes the redundant mte_report_once() logic and associated
variables.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Tested-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/r/20210714143843.56537-4-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/memory.h    |  1 -
 arch/arm64/include/asm/mte-kasan.h | 12 ------------
 arch/arm64/kernel/mte.c            | 12 ------------
 arch/arm64/mm/fault.c              | 15 +--------------
 lib/test_kasan.c                   |  2 --
 mm/kasan/hw_tags.c                 |  6 ------
 mm/kasan/kasan.h                   |  7 -------
 7 files changed, 1 insertion(+), 54 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 7f4e6a923aa6..f1745a843414 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -243,7 +243,6 @@ static inline const void *__tag_set(const void *addr, u8 tag)
 #ifdef CONFIG_KASAN_HW_TAGS
 #define arch_enable_tagging_sync()		mte_enable_kernel_sync()
 #define arch_enable_tagging_async()		mte_enable_kernel_async()
-#define arch_set_tagging_report_once(state)	mte_set_report_once(state)
 #define arch_force_async_tag_fault()		mte_check_tfsr_exit()
 #define arch_get_random_tag()			mte_get_random_tag()
 #define arch_get_mem_tag(addr)			mte_get_mem_tag(addr)
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index 82fa4ac4ad4e..22420e1f8c03 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -131,9 +131,6 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
 void mte_enable_kernel_sync(void);
 void mte_enable_kernel_async(void);
 
-void mte_set_report_once(bool state);
-bool mte_report_once(void);
-
 #else /* CONFIG_ARM64_MTE */
 
 static inline u8 mte_get_ptr_tag(void *ptr)
@@ -164,15 +161,6 @@ static inline void mte_enable_kernel_async(void)
 {
 }
 
-static inline void mte_set_report_once(bool state)
-{
-}
-
-static inline bool mte_report_once(void)
-{
-	return false;
-}
-
 #endif /* CONFIG_ARM64_MTE */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index b538ff27a912..9d314a3bad3b 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -23,8 +23,6 @@
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
-static bool report_fault_once = true;
-
 static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
 
 #ifdef CONFIG_KASAN_HW_TAGS
@@ -141,16 +139,6 @@ void mte_enable_kernel_async(void)
 }
 #endif
 
-void mte_set_report_once(bool state)
-{
-	WRITE_ONCE(report_fault_once, state);
-}
-
-bool mte_report_once(void)
-{
-	return READ_ONCE(report_fault_once);
-}
-
 #ifdef CONFIG_KASAN_HW_TAGS
 void mte_check_tfsr_el1(void)
 {
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 349c488765ca..9ae24e3b72be 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -309,24 +309,11 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
 static void report_tag_fault(unsigned long addr, unsigned int esr,
 			     struct pt_regs *regs)
 {
-	static bool reported;
-	bool is_write;
-
-	if (READ_ONCE(reported))
-		return;
-
-	/*
-	 * This is used for KASAN tests and assumes that no MTE faults
-	 * happened before running the tests.
-	 */
-	if (mte_report_once())
-		WRITE_ONCE(reported, true);
-
 	/*
 	 * SAS bits aren't set for all faults reported in EL1, so we can't
 	 * find out access size.
 	 */
-	is_write = !!(esr & ESR_ELx_WNR);
+	bool is_write = !!(esr & ESR_ELx_WNR);
 	kasan_report(addr, 0, is_write, regs->pc);
 }
 #else
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 8f7b0b2f6e11..8be9d4b3b259 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -53,7 +53,6 @@ static int kasan_test_init(struct kunit *test)
 	}
 
 	multishot = kasan_save_enable_multi_shot();
-	kasan_set_tagging_report_once(false);
 	fail_data.report_found = false;
 	kunit_add_named_resource(test, NULL, NULL, &resource,
 					"kasan_data", &fail_data);
@@ -62,7 +61,6 @@ static int kasan_test_init(struct kunit *test)
 
 static void kasan_test_exit(struct kunit *test)
 {
-	kasan_set_tagging_report_once(true);
 	kasan_restore_multi_shot(multishot);
 	KUNIT_EXPECT_FALSE(test, fail_data.report_found);
 }
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 2c6c6c6ddfa2..e4c16f6b6680 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -248,12 +248,6 @@ void kasan_free_pages(struct page *page, unsigned int order)
 
 #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
 
-void kasan_set_tagging_report_once(bool state)
-{
-	hw_set_tagging_report_once(state);
-}
-EXPORT_SYMBOL_GPL(kasan_set_tagging_report_once);
-
 void kasan_enable_tagging_sync(void)
 {
 	hw_enable_tagging_sync();
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 28a16b80bbef..fff93b0bcb08 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -290,9 +290,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 #ifndef arch_enable_tagging_async
 #define arch_enable_tagging_async()
 #endif
-#ifndef arch_set_tagging_report_once
-#define arch_set_tagging_report_once(state)
-#endif
 #ifndef arch_force_async_tag_fault
 #define arch_force_async_tag_fault()
 #endif
@@ -308,7 +305,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 
 #define hw_enable_tagging_sync()		arch_enable_tagging_sync()
 #define hw_enable_tagging_async()		arch_enable_tagging_async()
-#define hw_set_tagging_report_once(state)	arch_set_tagging_report_once(state)
 #define hw_force_async_tag_fault()		arch_force_async_tag_fault()
 #define hw_get_random_tag()			arch_get_random_tag()
 #define hw_get_mem_tag(addr)			arch_get_mem_tag(addr)
@@ -319,19 +315,16 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 
 #define hw_enable_tagging_sync()
 #define hw_enable_tagging_async()
-#define hw_set_tagging_report_once(state)
 
 #endif /* CONFIG_KASAN_HW_TAGS */
 
 #if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
 
-void kasan_set_tagging_report_once(bool state);
 void kasan_enable_tagging_sync(void);
 void kasan_force_async_fault(void);
 
 #else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
 
-static inline void kasan_set_tagging_report_once(bool state) { }
 static inline void kasan_enable_tagging_sync(void) { }
 static inline void kasan_force_async_fault(void) { }
 

From 7a062ce31807eb402c38edbec50c1b848b4298f3 Mon Sep 17 00:00:00 2001
From: Yee Lee <yee.lee@mediatek.com>
Date: Tue, 3 Aug 2021 15:08:22 +0800
Subject: [PATCH 19/56] arm64/cpufeature: Optionally disable MTE via
 command-line

MTE support needs to be optionally disabled in runtime
for HW issue workaround, FW development and some
evaluation works on system resource and performance.

This patch makes two changes:
(1) moves init of tag-allocation bits(ATA/ATA0) to
cpu_enable_mte() as not cached in TLB.

(2) allows ID_AA64PFR1_EL1.MTE to be overridden on
its shadow value by giving "arm64.nomte" on cmdline.

When the feature value is off, ATA and TCF will not set
and the related functionalities are accordingly suppressed.

Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Suggested-by: Marc Zyngier <maz@kernel.org>
Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Yee Lee <yee.lee@mediatek.com>
Link: https://lore.kernel.org/r/20210803070824.7586-2-yee.lee@mediatek.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 3 +++
 arch/arm64/include/asm/sysreg.h                 | 3 +--
 arch/arm64/kernel/cpufeature.c                  | 3 +++
 arch/arm64/kernel/idreg-override.c              | 2 ++
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bdb22006f713..6f257e39d89e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -380,6 +380,9 @@
 	arm64.nopauth	[ARM64] Unconditionally disable Pointer Authentication
 			support
 
+	arm64.nomte	[ARM64] Unconditionally disable Memory Tagging Extension
+			support
+
 	ataflop=	[HW,M68k]
 
 	atarimouse=	[HW,MOUSE] Atari Mouse
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f6687f6f536b..fc887399c68c 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -699,8 +699,7 @@
 	(SCTLR_ELx_M    | SCTLR_ELx_C    | SCTLR_ELx_SA   | SCTLR_EL1_SA0   | \
 	 SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
 	 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
-	 SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI   | \
-	 SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
+	 ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
 
 /* MAIR_ELx memory attributes (used by Linux) */
 #define MAIR_ATTR_DEVICE_nGnRnE		UL(0x00)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0ead8bfedf20..51e6bf4bb7b5 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1843,6 +1843,9 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
 #ifdef CONFIG_ARM64_MTE
 static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
 {
+	sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
+	isb();
+
 	/*
 	 * Clear the tags in the zero page. This needs to be done via the
 	 * linear map which has the Tagged attribute.
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 53a381a7f65d..d8e606fe3c21 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -54,6 +54,7 @@ static const struct ftr_set_desc pfr1 __initconst = {
 	.override	= &id_aa64pfr1_override,
 	.fields		= {
 	        { "bt", ID_AA64PFR1_BT_SHIFT },
+		{ "mte", ID_AA64PFR1_MTE_SHIFT},
 		{}
 	},
 };
@@ -100,6 +101,7 @@ static const struct {
 	{ "arm64.nopauth",
 	  "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
 	  "id_aa64isar1.api=0 id_aa64isar1.apa=0"	   },
+	{ "arm64.nomte",		"id_aa64pfr1.mte=0" },
 	{ "nokaslr",			"kaslr.disabled=1" },
 };
 

From 70a4039bd4d723c9b86b365bf1cd2395beed2d07 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 29 Jul 2021 23:05:27 +0900
Subject: [PATCH 20/56] arm64: move the (z)install rules to arch/arm64/Makefile

Currently, the (z)install targets in arch/arm64/Makefile descend into
arch/arm64/boot/Makefile to invoke the shell script, but there is no
good reason to do so.

arch/arm64/Makefile can run the shell script directly.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210729140527.443116-1-masahiroy@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile      | 7 +++++--
 arch/arm64/boot/Makefile | 8 --------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7bc37d0a1b68..ded13230f901 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -165,8 +165,11 @@ Image: vmlinux
 Image.%: Image
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-zinstall install:
-	$(Q)$(MAKE) $(build)=$(boot) $@
+install: install-image := Image
+zinstall: install-image := Image.gz
+install zinstall:
+	$(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
+	$(boot)/$(install-image) System.map "$(INSTALL_PATH)"
 
 PHONY += vdso_install
 vdso_install:
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index cd3414898d10..ebe80faab883 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -35,11 +35,3 @@ $(obj)/Image.lzma: $(obj)/Image FORCE
 
 $(obj)/Image.lzo: $(obj)/Image FORCE
 	$(call if_changed,lzo)
-
-install:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-	$(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-	$(obj)/Image.gz System.map "$(INSTALL_PATH)"

From 312b7104f39bcb4f7fb4dceeab1e9ae82c7083c8 Mon Sep 17 00:00:00 2001
From: Jason Wang <wangborong@cdjrlc.com>
Date: Tue, 3 Aug 2021 22:20:20 +0800
Subject: [PATCH 21/56] arm64: fix typo in a comment

The double 'the' after 'If' in this comment "If the the TLB range ops
are supported..." is repeated. Consequently, one 'the' should be
removed from the comment.

Signed-off-by: Jason Wang <wangborong@cdjrlc.com>
Link: https://lore.kernel.org/r/20210803142020.124230-1-wangborong@cdjrlc.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/tlbflush.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index cc3f5a33ff9c..118dabbda553 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -308,7 +308,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 
 	/*
 	 * When the CPU does not support TLB range operations, flush the TLB
-	 * entries one by one at the granularity of 'stride'. If the the TLB
+	 * entries one by one at the granularity of 'stride'. If the TLB
 	 * range ops are supported, then:
 	 *
 	 * 1. If 'pages' is odd, flush the first page through non-range

From 7710861017ac25b9b459f3a7f8f1a3332db9ccbe Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 3 Aug 2021 15:04:47 +0100
Subject: [PATCH 22/56] kselftest/arm64: Provide a helper binary and "library"
 for SVE RDVL

SVE provides an instruction RDVL which reports the currently configured
vector length. In order to validate that our vector length configuration
interfaces are working correctly without having to build the C code for
our test programs with SVE enabled provide a trivial assembly library
with a C callable function that executes RDVL. Since these interfaces
also control behaviour on exec*() provide a trivial wrapper program which
reports the currently configured vector length on stdout, tests can use
this to verify that behaviour on exec*() is as expected.

In preparation for providing similar helper functionality for SME, the
Scalable Matrix Extension, which allows separately configured vector
lengths to be read back both the assembler function and wrapper binary
have SVE included in their name.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20210803140450.46624-2-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/.gitignore |  1 +
 tools/testing/selftests/arm64/fp/Makefile   |  6 +++++-
 tools/testing/selftests/arm64/fp/rdvl-sve.c | 14 ++++++++++++++
 tools/testing/selftests/arm64/fp/rdvl.S     | 10 ++++++++++
 tools/testing/selftests/arm64/fp/rdvl.h     |  8 ++++++++
 5 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/fp/rdvl-sve.c
 create mode 100644 tools/testing/selftests/arm64/fp/rdvl.S
 create mode 100644 tools/testing/selftests/arm64/fp/rdvl.h

diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index d66f76d2a650..6b53a7b60fee 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -1,4 +1,5 @@
 fpsimd-test
+rdvl-sve
 sve-probe-vls
 sve-ptrace
 sve-test
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index a57009d3a0dc..ed62e7003b96 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -2,12 +2,16 @@
 
 CFLAGS += -I../../../../../usr/include/
 TEST_GEN_PROGS := sve-ptrace sve-probe-vls
-TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \
+	rdvl-sve \
+	sve-test sve-stress \
+	vlset
 
 all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
 
 fpsimd-test: fpsimd-test.o
 	$(CC) -nostdlib $^ -o $@
+rdvl-sve: rdvl-sve.o rdvl.o
 sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
 sve-probe-vls: sve-probe-vls.o
 sve-test: sve-test.o
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sve.c b/tools/testing/selftests/arm64/fp/rdvl-sve.c
new file mode 100644
index 000000000000..7f8a13a18f5d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sve.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+	int vl = rdvl_sve();
+
+	printf("%d\n", vl);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S
new file mode 100644
index 000000000000..c916c1c9defd
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.S
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+.arch_extension sve
+
+.globl rdvl_sve
+rdvl_sve:
+	hint	34			// BTI C
+	rdvl	x0, #1
+	ret
diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h
new file mode 100644
index 000000000000..7c9d953fc9e7
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef RDVL_H
+#define RDVL_H
+
+int rdvl_sve(void);
+
+#endif

From b43ab36a6d86f6c693f5919ee2cf25c543446213 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 3 Aug 2021 15:04:48 +0100
Subject: [PATCH 23/56] kselftest/arm64: Validate vector lengths are set in
 sve-probe-vls

Currently sve-probe-vls does not verify that the vector lengths reported
by the prctl() interface are actually what is reported by the architecture,
use the rdvl_sve() helper to validate this.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20210803140450.46624-3-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/Makefile        | 2 +-
 tools/testing/selftests/arm64/fp/sve-probe-vls.c | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index ed62e7003b96..fa3a0167db2d 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -13,7 +13,7 @@ fpsimd-test: fpsimd-test.o
 	$(CC) -nostdlib $^ -o $@
 rdvl-sve: rdvl-sve.o rdvl.o
 sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
-sve-probe-vls: sve-probe-vls.o
+sve-probe-vls: sve-probe-vls.o rdvl.o
 sve-test: sve-test.o
 	$(CC) -nostdlib $^ -o $@
 vlset: vlset.o
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index 76e138525d55..a24eca7a4ecb 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -13,6 +13,7 @@
 #include <asm/sigcontext.h>
 
 #include "../../kselftest.h"
+#include "rdvl.h"
 
 int main(int argc, char **argv)
 {
@@ -38,6 +39,10 @@ int main(int argc, char **argv)
 
 		vl &= PR_SVE_VL_LEN_MASK;
 
+		if (rdvl_sve() != vl)
+			ksft_exit_fail_msg("PR_SVE_SET_VL reports %d, RDVL %d\n",
+					   vl, rdvl_sve());
+
 		if (!sve_vl_valid(vl))
 			ksft_exit_fail_msg("VL %d invalid\n", vl);
 		vq = sve_vq_from_vl(vl);

From 95cf3f23877b682c56899aa34f9cdd2f910b8924 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 3 Aug 2021 15:04:49 +0100
Subject: [PATCH 24/56] kselftest/arm64: Add tests for SVE vector configuration

We provide interfaces for configuring the SVE vector length seen by
processes using prctl and also via /proc for configuring the default
values. Provide tests that exercise all these interfaces and verify that
they take effect as expected, though at present no test fully enumerates
all the possible vector lengths.

A subset of this is already tested via sve-probe-vls but the /proc
interfaces are not currently covered at all.

In preparation for the forthcoming support for SME, the Scalable Matrix
Extension, which has separately but similarly configured vector lengths
which we expect to offer similar userspace interfaces for, all the actual
files and prctls used are parameterised and we don't validate that the
architectural minimum vector length is the minimum we see.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20210803140450.46624-4-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/.gitignore   |   1 +
 tools/testing/selftests/arm64/fp/Makefile     |   3 +-
 tools/testing/selftests/arm64/fp/vec-syscfg.c | 593 ++++++++++++++++++
 3 files changed, 596 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/fp/vec-syscfg.c

diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index 6b53a7b60fee..b67395903b9b 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -3,4 +3,5 @@ rdvl-sve
 sve-probe-vls
 sve-ptrace
 sve-test
+vec-syscfg
 vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index fa3a0167db2d..f2abdd6ba12e 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 CFLAGS += -I../../../../../usr/include/
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
 TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \
 	rdvl-sve \
 	sve-test sve-stress \
@@ -16,6 +16,7 @@ sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
 sve-probe-vls: sve-probe-vls.o rdvl.o
 sve-test: sve-test.o
 	$(CC) -nostdlib $^ -o $@
+vec-syscfg: vec-syscfg.o rdvl.o
 vlset: vlset.o
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
new file mode 100644
index 000000000000..c02071dcb563
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -0,0 +1,593 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/hwcap.h>
+
+#include "../../kselftest.h"
+#include "rdvl.h"
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+#define ARCH_MIN_VL SVE_VL_MIN
+
+struct vec_data {
+	const char *name;
+	unsigned long hwcap_type;
+	unsigned long hwcap;
+	const char *rdvl_binary;
+	int (*rdvl)(void);
+
+	int prctl_get;
+	int prctl_set;
+	const char *default_vl_file;
+
+	int default_vl;
+	int min_vl;
+	int max_vl;
+};
+
+
+static struct vec_data vec_data[] = {
+	{
+		.name = "SVE",
+		.hwcap_type = AT_HWCAP,
+		.hwcap = HWCAP_SVE,
+		.rdvl = rdvl_sve,
+		.rdvl_binary = "./rdvl-sve",
+		.prctl_get = PR_SVE_GET_VL,
+		.prctl_set = PR_SVE_SET_VL,
+		.default_vl_file = "/proc/sys/abi/sve_default_vector_length",
+	},
+};
+
+static int stdio_read_integer(FILE *f, const char *what, int *val)
+{
+	int n = 0;
+	int ret;
+
+	ret = fscanf(f, "%d%*1[\n]%n", val, &n);
+	if (ret < 1 || n < 1) {
+		ksft_print_msg("failed to parse integer from %s\n", what);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* Start a new process and return the vector length it sees */
+static int get_child_rdvl(struct vec_data *data)
+{
+	FILE *out;
+	int pipefd[2];
+	pid_t pid, child;
+	int read_vl, ret;
+
+	ret = pipe(pipefd);
+	if (ret == -1) {
+		ksft_print_msg("pipe() failed: %d (%s)\n",
+			       errno, strerror(errno));
+		return -1;
+	}
+
+	fflush(stdout);
+
+	child = fork();
+	if (child == -1) {
+		ksft_print_msg("fork() failed: %d (%s)\n",
+			       errno, strerror(errno));
+		close(pipefd[0]);
+		close(pipefd[1]);
+		return -1;
+	}
+
+	/* Child: put vector length on the pipe */
+	if (child == 0) {
+		/*
+		 * Replace stdout with the pipe, errors to stderr from
+		 * here as kselftest prints to stdout.
+		 */
+		ret = dup2(pipefd[1], 1);
+		if (ret == -1) {
+			fprintf(stderr, "dup2() %d\n", errno);
+			exit(EXIT_FAILURE);
+		}
+
+		/* exec() a new binary which puts the VL on stdout */
+		ret = execl(data->rdvl_binary, data->rdvl_binary, NULL);
+		fprintf(stderr, "execl(%s) failed: %d\n",
+			data->rdvl_binary, errno, strerror(errno));
+
+		exit(EXIT_FAILURE);
+	}
+
+	close(pipefd[1]);
+
+	/* Parent; wait for the exit status from the child & verify it */
+	do {
+		pid = wait(&ret);
+		if (pid == -1) {
+			ksft_print_msg("wait() failed: %d (%s)\n",
+				       errno, strerror(errno));
+			close(pipefd[0]);
+			return -1;
+		}
+	} while (pid != child);
+
+	assert(pid == child);
+
+	if (!WIFEXITED(ret)) {
+		ksft_print_msg("child exited abnormally\n");
+		close(pipefd[0]);
+		return -1;
+	}
+
+	if (WEXITSTATUS(ret) != 0) {
+		ksft_print_msg("child returned error %d\n",
+			       WEXITSTATUS(ret));
+		close(pipefd[0]);
+		return -1;
+	}
+
+	out = fdopen(pipefd[0], "r");
+	if (!out) {
+		ksft_print_msg("failed to open child stdout\n");
+		close(pipefd[0]);
+		return -1;
+	}
+
+	ret = stdio_read_integer(out, "child", &read_vl);
+	fclose(out);
+	if (ret != 0)
+		return ret;
+
+	return read_vl;
+}
+
+static int file_read_integer(const char *name, int *val)
+{
+	FILE *f;
+	int ret;
+
+	f = fopen(name, "r");
+	if (!f) {
+		ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+				      name, errno,
+				      strerror(errno));
+		return -1;
+	}
+
+	ret = stdio_read_integer(f, name, val);
+	fclose(f);
+
+	return ret;
+}
+
+static int file_write_integer(const char *name, int val)
+{
+	FILE *f;
+	int ret;
+
+	f = fopen(name, "w");
+	if (!f) {
+		ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+				      name, errno,
+				      strerror(errno));
+		return -1;
+	}
+
+	fprintf(f, "%d", val);
+	fclose(f);
+	if (ret < 0) {
+		ksft_test_result_fail("Error writing %d to %s\n",
+				      val, name);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Verify that we can read the default VL via proc, checking that it
+ * is set in a freshly spawned child.
+ */
+static void proc_read_default(struct vec_data *data)
+{
+	int default_vl, child_vl, ret;
+
+	ret = file_read_integer(data->default_vl_file, &default_vl);
+	if (ret != 0)
+		return;
+
+	/* Is this the actual default seen by new processes? */
+	child_vl = get_child_rdvl(data);
+	if (child_vl != default_vl) {
+		ksft_test_result_fail("%s is %d but child VL is %d\n",
+				      data->default_vl_file,
+				      default_vl, child_vl);
+		return;
+	}
+
+	ksft_test_result_pass("%s default vector length %d\n", data->name,
+			      default_vl);
+	data->default_vl = default_vl;
+}
+
+/* Verify that we can write a minimum value and have it take effect */
+static void proc_write_min(struct vec_data *data)
+{
+	int ret, new_default, child_vl;
+
+	if (geteuid() != 0) {
+		ksft_test_result_skip("Need to be root to write to /proc\n");
+		return;
+	}
+
+	ret = file_write_integer(data->default_vl_file, ARCH_MIN_VL);
+	if (ret != 0)
+		return;
+
+	/* What was the new value? */
+	ret = file_read_integer(data->default_vl_file, &new_default);
+	if (ret != 0)
+		return;
+
+	/* Did it take effect in a new process? */
+	child_vl = get_child_rdvl(data);
+	if (child_vl != new_default) {
+		ksft_test_result_fail("%s is %d but child VL is %d\n",
+				      data->default_vl_file,
+				      new_default, child_vl);
+		return;
+	}
+
+	ksft_test_result_pass("%s minimum vector length %d\n", data->name,
+			      new_default);
+	data->min_vl = new_default;
+
+	file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Verify that we can write a maximum value and have it take effect */
+static void proc_write_max(struct vec_data *data)
+{
+	int ret, new_default, child_vl;
+
+	if (geteuid() != 0) {
+		ksft_test_result_skip("Need to be root to write to /proc\n");
+		return;
+	}
+
+	/* -1 is accepted by the /proc interface as the maximum VL */
+	ret = file_write_integer(data->default_vl_file, -1);
+	if (ret != 0)
+		return;
+
+	/* What was the new value? */
+	ret = file_read_integer(data->default_vl_file, &new_default);
+	if (ret != 0)
+		return;
+
+	/* Did it take effect in a new process? */
+	child_vl = get_child_rdvl(data);
+	if (child_vl != new_default) {
+		ksft_test_result_fail("%s is %d but child VL is %d\n",
+				      data->default_vl_file,
+				      new_default, child_vl);
+		return;
+	}
+
+	ksft_test_result_pass("%s maximum vector length %d\n", data->name,
+			      new_default);
+	data->max_vl = new_default;
+
+	file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Can we read back a VL from prctl? */
+static void prctl_get(struct vec_data *data)
+{
+	int ret;
+
+	ret = prctl(data->prctl_get);
+	if (ret == -1) {
+		ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+				      data->name, errno, strerror(errno));
+		return;
+	}
+
+	/* Mask out any flags */
+	ret &= PR_SVE_VL_LEN_MASK;
+
+	/* Is that what we can read back directly? */
+	if (ret == data->rdvl())
+		ksft_test_result_pass("%s current VL is %d\n",
+				      data->name, ret);
+	else
+		ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
+				      data->name, ret, data->rdvl());
+}
+
+/* Does the prctl let us set the VL we already have? */
+static void prctl_set_same(struct vec_data *data)
+{
+	int cur_vl = data->rdvl();
+	int ret;
+
+	ret = prctl(data->prctl_set, cur_vl);
+	if (ret < 0) {
+		ksft_test_result_fail("%s prctl set failed: %d (%s)\n",
+				      data->name, errno, strerror(errno));
+		return;
+	}
+
+	if (cur_vl != data->rdvl())
+		ksft_test_result_pass("%s current VL is %d\n",
+				      data->name, ret);
+	else
+		ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
+				      data->name, ret, data->rdvl());
+}
+
+/* Can we set a new VL for this process? */
+static void prctl_set(struct vec_data *data)
+{
+	int ret;
+
+	if (data->min_vl == data->max_vl) {
+		ksft_test_result_skip("%s only one VL supported\n",
+				      data->name);
+		return;
+	}
+
+	/* Try to set the minimum VL */
+	ret = prctl(data->prctl_set, data->min_vl);
+	if (ret < 0) {
+		ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+				      data->name, data->min_vl,
+				      errno, strerror(errno));
+		return;
+	}
+
+	if ((ret & PR_SVE_VL_LEN_MASK) != data->min_vl) {
+		ksft_test_result_fail("%s prctl set %d but return value is %d\n",
+				      data->name, data->min_vl, data->rdvl());
+		return;
+	}
+
+	if (data->rdvl() != data->min_vl) {
+		ksft_test_result_fail("%s set %d but RDVL is %d\n",
+				      data->name, data->min_vl, data->rdvl());
+		return;
+	}
+
+	/* Try to set the maximum VL */
+	ret = prctl(data->prctl_set, data->max_vl);
+	if (ret < 0) {
+		ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+				      data->name, data->max_vl,
+				      errno, strerror(errno));
+		return;
+	}
+
+	if ((ret & PR_SVE_VL_LEN_MASK) != data->max_vl) {
+		ksft_test_result_fail("%s prctl() set %d but return value is %d\n",
+				      data->name, data->max_vl, data->rdvl());
+		return;
+	}
+
+	/* The _INHERIT flag should not be present when we read the VL */
+	ret = prctl(data->prctl_get);
+	if (ret == -1) {
+		ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+				      data->name, errno, strerror(errno));
+		return;
+	}
+
+	if (ret & PR_SVE_VL_INHERIT) {
+		ksft_test_result_fail("%s prctl() reports _INHERIT\n",
+				      data->name);
+		return;
+	}
+
+	ksft_test_result_pass("%s prctl() set min/max\n", data->name);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_no_child(struct vec_data *data)
+{
+	int ret, child_vl;
+
+	if (data->min_vl == data->max_vl) {
+		ksft_test_result_skip("%s only one VL supported\n",
+				      data->name);
+		return;
+	}
+
+	ret = prctl(data->prctl_set, data->min_vl);
+	if (ret < 0) {
+		ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+				      data->name, data->min_vl,
+				      errno, strerror(errno));
+		return;
+	}
+
+	/* Ensure the default VL is different */
+	ret = file_write_integer(data->default_vl_file, data->max_vl);
+	if (ret != 0)
+		return;
+
+	/* Check that the child has the default we just set */
+	child_vl = get_child_rdvl(data);
+	if (child_vl != data->max_vl) {
+		ksft_test_result_fail("%s is %d but child VL is %d\n",
+				      data->default_vl_file,
+				      data->max_vl, child_vl);
+		return;
+	}
+
+	ksft_test_result_pass("%s vector length used default\n", data->name);
+
+	file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_for_child(struct vec_data *data)
+{
+	int ret, child_vl;
+
+	if (data->min_vl == data->max_vl) {
+		ksft_test_result_skip("%s only one VL supported\n",
+				      data->name);
+		return;
+	}
+
+	ret = prctl(data->prctl_set, data->min_vl | PR_SVE_VL_INHERIT);
+	if (ret < 0) {
+		ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+				      data->name, data->min_vl,
+				      errno, strerror(errno));
+		return;
+	}
+
+	/* The _INHERIT flag should be present when we read the VL */
+	ret = prctl(data->prctl_get);
+	if (ret == -1) {
+		ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+				      data->name, errno, strerror(errno));
+		return;
+	}
+	if (!(ret & PR_SVE_VL_INHERIT)) {
+		ksft_test_result_fail("%s prctl() does not report _INHERIT\n",
+				      data->name);
+		return;
+	}
+
+	/* Ensure the default VL is different */
+	ret = file_write_integer(data->default_vl_file, data->max_vl);
+	if (ret != 0)
+		return;
+
+	/* Check that the child inherited our VL */
+	child_vl = get_child_rdvl(data);
+	if (child_vl != data->min_vl) {
+		ksft_test_result_fail("%s is %d but child VL is %d\n",
+				      data->default_vl_file,
+				      data->min_vl, child_vl);
+		return;
+	}
+
+	ksft_test_result_pass("%s vector length was inherited\n", data->name);
+
+	file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* _ONEXEC takes effect only in the child process */
+static void prctl_set_onexec(struct vec_data *data)
+{
+	int ret, child_vl;
+
+	if (data->min_vl == data->max_vl) {
+		ksft_test_result_skip("%s only one VL supported\n",
+				      data->name);
+		return;
+	}
+
+	/* Set a known value for the default and our current VL */
+	ret = file_write_integer(data->default_vl_file, data->max_vl);
+	if (ret != 0)
+		return;
+
+	ret = prctl(data->prctl_set, data->max_vl);
+	if (ret < 0) {
+		ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+				      data->name, data->min_vl,
+				      errno, strerror(errno));
+		return;
+	}
+
+	/* Set a different value for the child to have on exec */
+	ret = prctl(data->prctl_set, data->min_vl | PR_SVE_SET_VL_ONEXEC);
+	if (ret < 0) {
+		ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+				      data->name, data->min_vl,
+				      errno, strerror(errno));
+		return;
+	}
+
+	/* Our current VL should stay the same */
+	if (data->rdvl() != data->max_vl) {
+		ksft_test_result_fail("%s VL changed by _ONEXEC prctl()\n",
+				      data->name);
+		return;
+	}
+
+	/* Check that the child inherited our VL */
+	child_vl = get_child_rdvl(data);
+	if (child_vl != data->min_vl) {
+		ksft_test_result_fail("Set %d _ONEXEC but child VL is %d\n",
+				      data->min_vl, child_vl);
+		return;
+	}
+
+	ksft_test_result_pass("%s vector length set on exec\n", data->name);
+
+	file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+typedef void (*test_type)(struct vec_data *);
+
+static const test_type tests[] = {
+	/*
+	 * The default/min/max tests must be first and in this order
+	 * to provide data for other tests.
+	 */
+	proc_read_default,
+	proc_write_min,
+	proc_write_max,
+
+	prctl_get,
+	prctl_set,
+	prctl_set_no_child,
+	prctl_set_for_child,
+	prctl_set_onexec,
+};
+
+int main(void)
+{
+	int i, j;
+
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data));
+
+	for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+		struct vec_data *data = &vec_data[i];
+		unsigned long supported;
+
+		supported = getauxval(data->hwcap_type) & data->hwcap;
+
+		for (j = 0; j < ARRAY_SIZE(tests); j++) {
+			if (supported)
+				tests[j](data);
+			else
+				ksft_test_result_skip("%s not supported\n",
+						      data->name);
+		}
+	}
+
+	ksft_exit_pass();
+}

From e96595c55d2397dde599779d75112d21989b37b9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 3 Aug 2021 15:04:50 +0100
Subject: [PATCH 25/56] kselftest/arm64: Add a TODO list for floating point
 tests

Write down some ideas for additional coverage for floating point in case
someone feels inspired to look into them.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20210803140450.46624-5-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/fp/TODO | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/fp/TODO

diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO
new file mode 100644
index 000000000000..b6b7ebfcf362
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/TODO
@@ -0,0 +1,4 @@
+- Test unsupported values in the ABIs.
+- More coverage for ptrace (eg, vector length conversions).
+- Coverage for signals.
+- Test PR_SVE_VL_INHERITY after a double fork.

From 46a2b02d232ece64af43e9ab0b4c64a7a756e58e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 2 Aug 2021 15:07:30 +0100
Subject: [PATCH 26/56] arm64: entry: consolidate entry/exit helpers

To make the various entry/exit helpers easier to understand and easier
to compare, this patch moves all the entry/exit helpers to be adjacent
at the top of entry-common.c, rather than being spread out throughout
the file.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Link: https://lore.kernel.org/r/20210802140733.52716-2-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry-common.c | 84 ++++++++++++++++----------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index db8b2e2d02c2..6f7a98d8d60f 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -75,6 +75,24 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
 	}
 }
 
+asmlinkage void noinstr enter_from_user_mode(void)
+{
+	lockdep_hardirqs_off(CALLER_ADDR0);
+	CT_WARN_ON(ct_state() != CONTEXT_USER);
+	user_exit_irqoff();
+	trace_hardirqs_off_finish();
+}
+
+asmlinkage void noinstr exit_to_user_mode(void)
+{
+	mte_check_tfsr_exit();
+
+	trace_hardirqs_on_prepare();
+	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+	user_enter_irqoff();
+	lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
 static void noinstr arm64_enter_nmi(struct pt_regs *regs)
 {
 	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -105,6 +123,30 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
 	__nmi_exit();
 }
 
+static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+{
+	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+	lockdep_hardirqs_off(CALLER_ADDR0);
+	rcu_nmi_enter();
+
+	trace_hardirqs_off_finish();
+}
+
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+{
+	bool restore = regs->lockdep_hardirqs;
+
+	if (restore) {
+		trace_hardirqs_on_prepare();
+		lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+	}
+
+	rcu_nmi_exit();
+	if (restore)
+		lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
 static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
@@ -265,30 +307,6 @@ static void noinstr el1_undef(struct pt_regs *regs)
 	exit_to_kernel_mode(regs);
 }
 
-static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
-{
-	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
-
-	lockdep_hardirqs_off(CALLER_ADDR0);
-	rcu_nmi_enter();
-
-	trace_hardirqs_off_finish();
-}
-
-static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
-{
-	bool restore = regs->lockdep_hardirqs;
-
-	if (restore) {
-		trace_hardirqs_on_prepare();
-		lockdep_hardirqs_on_prepare(CALLER_ADDR0);
-	}
-
-	rcu_nmi_exit();
-	if (restore)
-		lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
 static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
@@ -382,24 +400,6 @@ asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
 	arm64_exit_nmi(regs);
 }
 
-asmlinkage void noinstr enter_from_user_mode(void)
-{
-	lockdep_hardirqs_off(CALLER_ADDR0);
-	CT_WARN_ON(ct_state() != CONTEXT_USER);
-	user_exit_irqoff();
-	trace_hardirqs_off_finish();
-}
-
-asmlinkage void noinstr exit_to_user_mode(void)
-{
-	mte_check_tfsr_exit();
-
-	trace_hardirqs_on_prepare();
-	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
-	user_enter_irqoff();
-	lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
 static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);

From bc29b71f53b13c9e90a53a42d2431228a869b459 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 2 Aug 2021 15:07:31 +0100
Subject: [PATCH 27/56] arm64: entry: clarify entry/exit helpers

When entering an exception, we must perform irq/context state management
before we can use instrumentable C code. Similarly, when exiting an
exception we cannot use instrumentable C code after we perform
irq/context state management.

Originally, we'd intended that the enter_from_*() and exit_to_*()
helpers would enforce this by virtue of being the first and last
functions called, respectively, in an exception handler. However, as
they now call instrumentable code themselves, this is not as clearly
true.

To make this more robust, this patch splits the irq/context state
management into separate helpers, with all the helpers commented to make
their intended purpose more obvious.

In exit_to_kernel_mode() we'll now check TFSR_EL1 before we assert that
IRQs are disabled, but this ordering is not important, and other than
this there should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Link: https://lore.kernel.org/r/20210802140733.52716-3-mark.rutland@arm.com
[catalin.marinas@arm.com: comment typos fix-up]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry-common.c | 70 ++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 6f7a98d8d60f..c4923e3ba065 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -26,10 +26,14 @@
 #include <asm/system_misc.h>
 
 /*
+ * Handle IRQ/context state management when entering from kernel mode.
+ * Before this function is called it is not safe to call regular kernel code,
+ * intrumentable code, or any code which may trigger an exception.
+ *
  * This is intended to match the logic in irqentry_enter(), handling the kernel
  * mode transitions only.
  */
-static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
+static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
 {
 	regs->exit_rcu = false;
 
@@ -45,20 +49,26 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
 	lockdep_hardirqs_off(CALLER_ADDR0);
 	rcu_irq_enter_check_tick();
 	trace_hardirqs_off_finish();
+}
 
+static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
+{
+	__enter_from_kernel_mode(regs);
 	mte_check_tfsr_entry();
 }
 
 /*
+ * Handle IRQ/context state management when exiting to kernel mode.
+ * After this function returns it is not safe to call regular kernel code,
+ * intrumentable code, or any code which may trigger an exception.
+ *
  * This is intended to match the logic in irqentry_exit(), handling the kernel
  * mode transitions only, and with preemption handled elsewhere.
  */
-static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
 {
 	lockdep_assert_irqs_disabled();
 
-	mte_check_tfsr_exit();
-
 	if (interrupts_enabled(regs)) {
 		if (regs->exit_rcu) {
 			trace_hardirqs_on_prepare();
@@ -75,7 +85,18 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
 	}
 }
 
-asmlinkage void noinstr enter_from_user_mode(void)
+static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+{
+	mte_check_tfsr_exit();
+	__exit_to_kernel_mode(regs);
+}
+
+/*
+ * Handle IRQ/context state management when entering from user mode.
+ * Before this function is called it is not safe to call regular kernel code,
+ * intrumentable code, or any code which may trigger an exception.
+ */
+static __always_inline void __enter_from_user_mode(void)
 {
 	lockdep_hardirqs_off(CALLER_ADDR0);
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
@@ -83,9 +104,18 @@ asmlinkage void noinstr enter_from_user_mode(void)
 	trace_hardirqs_off_finish();
 }
 
-asmlinkage void noinstr exit_to_user_mode(void)
+asmlinkage void noinstr enter_from_user_mode(void)
+{
+	__enter_from_user_mode();
+}
+
+/*
+ * Handle IRQ/context state management when exiting to user mode.
+ * After this function returns it is not safe to call regular kernel code,
+ * intrumentable code, or any code which may trigger an exception.
+ */
+static __always_inline void __exit_to_user_mode(void)
 {
-	mte_check_tfsr_exit();
 
 	trace_hardirqs_on_prepare();
 	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
@@ -93,6 +123,17 @@ asmlinkage void noinstr exit_to_user_mode(void)
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 
+asmlinkage void noinstr exit_to_user_mode(void)
+{
+	mte_check_tfsr_exit();
+	__exit_to_user_mode();
+}
+
+/*
+ * Handle IRQ/context state management when entering an NMI from user/kernel
+ * mode. Before this function is called it is not safe to call regular kernel
+ * code, intrumentable code, or any code which may trigger an exception.
+ */
 static void noinstr arm64_enter_nmi(struct pt_regs *regs)
 {
 	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -106,6 +147,11 @@ static void noinstr arm64_enter_nmi(struct pt_regs *regs)
 	ftrace_nmi_enter();
 }
 
+/*
+ * Handle IRQ/context state management when exiting an NMI from user/kernel
+ * mode. After this function returns it is not safe to call regular kernel
+ * code, intrumentable code, or any code which may trigger an exception.
+ */
 static void noinstr arm64_exit_nmi(struct pt_regs *regs)
 {
 	bool restore = regs->lockdep_hardirqs;
@@ -123,6 +169,11 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs)
 	__nmi_exit();
 }
 
+/*
+ * Handle IRQ/context state management when entering a debug exception from
+ * kernel mode. Before this function is called it is not safe to call regular
+ * kernel code, intrumentable code, or any code which may trigger an exception.
+ */
 static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
 {
 	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -133,6 +184,11 @@ static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
 	trace_hardirqs_off_finish();
 }
 
+/*
+ * Handle IRQ/context state management when exiting a debug exception from
+ * kernel mode. After this function returns it is not safe to call regular
+ * kernel code, intrumentable code, or any code which may trigger an exception.
+ */
 static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
 {
 	bool restore = regs->lockdep_hardirqs;

From 4d1c2ee2709fd6e1655206cafcdb22737f5d7379 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 2 Aug 2021 15:07:32 +0100
Subject: [PATCH 28/56] arm64: entry: move bulk of ret_to_user to C

In `ret_to_user` we perform some conditional work depending on the
thread flags, then perform some IRQ/context tracking which is intended
to balance with the IRQ/context tracking performed in the entry C code.

For simplicity and consistency, it would be preferable to move this all
to C. As a step towards that, this patch moves the conditional work and
IRQ/context tracking into a C helper function. To aid bisectability,
this is called from the `ret_to_user` assembly, and a subsequent patch
will move the call to C code.

As local_daif_mask() handles all necessary tracing and PMR manipulation,
we no longer need to handle this explicitly. As we call
exit_to_user_mode() directly, the `user_enter_irqoff` macro is no longer
used, and can be removed. As enter_from_user_mode() and
exit_to_user_mode() are no longer called from assembly, these can be
made static, and as these are typically very small, they are marked
__always_inline to avoid the overhead of a function call.

For now, enablement of single-step is left in entry.S, and for this we
still need to read the flags in ret_to_user(). It is safe to read this
separately as TIF_SINGLESTEP is not part of _TIF_WORK_MASK.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Link: https://lore.kernel.org/r/20210802140733.52716-4-mark.rutland@arm.com
[catalin.marinas@arm.com: removed unused gic_prio_kentry_setup macro]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/exception.h |  5 ++--
 arch/arm64/kernel/entry-common.c   | 21 +++++++++++--
 arch/arm64/kernel/entry.S          | 48 ++----------------------------
 arch/arm64/kernel/signal.c         |  3 +-
 4 files changed, 26 insertions(+), 51 deletions(-)

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 4afbc45b8bb0..339477dca551 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -55,8 +55,8 @@ asmlinkage void el0t_32_error_handler(struct pt_regs *regs);
 
 asmlinkage void call_on_irq_stack(struct pt_regs *regs,
 				  void (*func)(struct pt_regs *));
-asmlinkage void enter_from_user_mode(void);
-asmlinkage void exit_to_user_mode(void);
+asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
+
 void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
@@ -73,6 +73,7 @@ void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
 void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
 void do_serror(struct pt_regs *regs, unsigned int esr);
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
 
 void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
 #endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index c4923e3ba065..3da4859c43a1 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -104,7 +104,7 @@ static __always_inline void __enter_from_user_mode(void)
 	trace_hardirqs_off_finish();
 }
 
-asmlinkage void noinstr enter_from_user_mode(void)
+static __always_inline void enter_from_user_mode(void)
 {
 	__enter_from_user_mode();
 }
@@ -123,12 +123,29 @@ static __always_inline void __exit_to_user_mode(void)
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 
-asmlinkage void noinstr exit_to_user_mode(void)
+static __always_inline void exit_to_user_mode(void)
 {
 	mte_check_tfsr_exit();
 	__exit_to_user_mode();
 }
 
+static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs)
+{
+	unsigned long flags;
+
+	local_daif_mask();
+
+	flags = READ_ONCE(current_thread_info()->flags);
+	if (unlikely(flags & _TIF_WORK_MASK))
+		do_notify_resume(regs, flags);
+}
+
+asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
+{
+	prepare_exit_to_user_mode(regs);
+	exit_to_user_mode();
+}
+
 /*
  * Handle IRQ/context state management when entering an NMI from user/kernel
  * mode. Before this function is called it is not safe to call regular kernel
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 863d44f73028..b59eee28cc1b 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -29,16 +29,6 @@
 #include <asm/asm-uaccess.h>
 #include <asm/unistd.h>
 
-/*
- * Context tracking and irqflag tracing need to instrument transitions between
- * user and kernel mode.
- */
-	.macro user_enter_irqoff
-#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
-	bl	exit_to_user_mode
-#endif
-	.endm
-
 	.macro	clear_gp_regs
 	.irp	n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
 	mov	x\n, xzr
@@ -474,18 +464,6 @@ SYM_CODE_END(__swpan_exit_el0)
 /* GPRs used by entry code */
 tsk	.req	x28		// current thread_info
 
-/*
- * Interrupt handling.
- */
-	.macro	gic_prio_kentry_setup, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	mov	\tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
-	msr_s	SYS_ICC_PMR_EL1, \tmp
-	alternative_else_nop_endif
-#endif
-	.endm
-
 	.text
 
 /*
@@ -585,37 +563,17 @@ SYM_CODE_START_LOCAL(ret_to_kernel)
 	kernel_exit 1
 SYM_CODE_END(ret_to_kernel)
 
-/*
- * "slow" syscall return path.
- */
 SYM_CODE_START_LOCAL(ret_to_user)
-	disable_daif
-	gic_prio_kentry_setup tmp=x3
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-	ldr	x19, [tsk, #TSK_TI_FLAGS]
-	and	x2, x19, #_TIF_WORK_MASK
-	cbnz	x2, work_pending
-finish_ret_to_user:
-	user_enter_irqoff
+	mov	x0, sp
+	bl	asm_exit_to_user_mode
 	/* Ignore asynchronous tag check faults in the uaccess routines */
 	clear_mte_async_tcf
+	ldr	x19, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
 	enable_step_tsk x19, x2
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	bl	stackleak_erase
 #endif
 	kernel_exit 0
-
-/*
- * Ok, we need to do extra processing, enter the slow path.
- */
-work_pending:
-	mov	x0, sp				// 'regs'
-	mov	x1, x19
-	bl	do_notify_resume
-	ldr	x19, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
-	b	finish_ret_to_user
 SYM_CODE_END(ret_to_user)
 
 	.popsection				// .entry.text
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f8192f4ae0b8..53c2c85efb34 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -924,8 +924,7 @@ static bool cpu_affinity_invalid(struct pt_regs *regs)
 				 system_32bit_el0_cpumask());
 }
 
-asmlinkage void do_notify_resume(struct pt_regs *regs,
-				 unsigned long thread_flags)
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
 {
 	do {
 		if (thread_flags & _TIF_NEED_RESCHED) {

From e130338eed5de0f5b6b913e8619d096153cbccb0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 2 Aug 2021 15:07:33 +0100
Subject: [PATCH 29/56] arm64: entry: call exit_to_user_mode() from C

When handling an exception from EL0, we perform the entry work in that
exception's C handler, and once the C handler has finished, we return
back to the entry assembly. Subsequently in the common `ret_to_user`
assembly we perform the exit work that balances with the entry work.
This can be somewhat difficult to follow, and makes it hard to rework
the return paths (e.g. to pass additional context to the exit code, or
to have exception return logic for specific exceptions).

This patch reworks the entry code such that each EL0 C exception handler
is responsible for both the entry and exit work. This clearly balances
the two (and will permit additional variation in future), and avoids an
unnecessary bounce between assembly and C in the common case, leaving
`ret_from_fork` as the only place assembly has to call the exit code.
This means that the exit work is now inlined into the C handler, which
is already the case for the entry work, and allows the compiler to
generate better code (e.g. by immediately returning when there is no
exit work to perform).

To align with other exception entry/exit helpers, enter_from_user_mode()
is updated to take the EL0 pt_regs as a parameter, though this is
currently unused.

There should be no functional change as a result of this patch. However,
this should lead to slightly better backtraces when an error is
encountered within do_notify_resume(), as the C handler should appear in
the backtrace, indicating the specific exception that the kernel was
entered with.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Link: https://lore.kernel.org/r/20210802140733.52716-5-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry-common.c | 74 ++++++++++++++++++++------------
 arch/arm64/kernel/entry.S        |  4 +-
 2 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 3da4859c43a1..32f9796c4ffe 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -104,7 +104,7 @@ static __always_inline void __enter_from_user_mode(void)
 	trace_hardirqs_off_finish();
 }
 
-static __always_inline void enter_from_user_mode(void)
+static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 {
 	__enter_from_user_mode();
 }
@@ -116,19 +116,12 @@ static __always_inline void enter_from_user_mode(void)
  */
 static __always_inline void __exit_to_user_mode(void)
 {
-
 	trace_hardirqs_on_prepare();
 	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
 	user_enter_irqoff();
 	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 
-static __always_inline void exit_to_user_mode(void)
-{
-	mte_check_tfsr_exit();
-	__exit_to_user_mode();
-}
-
 static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs)
 {
 	unsigned long flags;
@@ -140,10 +133,16 @@ static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs)
 		do_notify_resume(regs, flags);
 }
 
-asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
+static __always_inline void exit_to_user_mode(struct pt_regs *regs)
 {
 	prepare_exit_to_user_mode(regs);
-	exit_to_user_mode();
+	mte_check_tfsr_exit();
+	__exit_to_user_mode();
+}
+
+asmlinkage void noinstr asm_exit_to_user_mode(struct pt_regs *regs)
+{
+	exit_to_user_mode(regs);
 }
 
 /*
@@ -477,9 +476,10 @@ static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_mem_abort(far, esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
@@ -494,37 +494,42 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
 	if (!is_ttbr0_addr(far))
 		arm64_apply_bp_hardening();
 
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_mem_abort(far, esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_fpsimd_acc(esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_sve_acc(esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_fpsimd_exc(esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_sysinstr(esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
@@ -534,37 +539,42 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
 	if (!is_ttbr0_addr(instruction_pointer(regs)))
 		arm64_apply_bp_hardening();
 
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(far, esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(regs->sp, esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_undef(struct pt_regs *regs)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_undefinstr(regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_bti(struct pt_regs *regs)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_bti(regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	bad_el0_sync(regs, 0, esr);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
@@ -572,23 +582,26 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
 	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
 	unsigned long far = read_sysreg(far_el1);
 
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	do_debug_exception(far, esr, regs);
 	local_daif_restore(DAIF_PROCCTX);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_svc(struct pt_regs *regs)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	cortex_a76_erratum_1463225_svc_handler();
 	do_el0_svc(regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_ptrauth_fault(regs, esr);
+	exit_to_user_mode(regs);
 }
 
 asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
@@ -647,7 +660,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
 static void noinstr el0_interrupt(struct pt_regs *regs,
 				  void (*handler)(struct pt_regs *))
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 
 	write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
 
@@ -655,6 +668,8 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
 		arm64_apply_bp_hardening();
 
 	do_interrupt_handler(regs, handler);
+
+	exit_to_user_mode(regs);
 }
 
 static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
@@ -681,12 +696,13 @@ static void noinstr __el0_error_handler_common(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_ERRCTX);
 	arm64_enter_nmi(regs);
 	do_serror(regs, esr);
 	arm64_exit_nmi(regs);
 	local_daif_restore(DAIF_PROCCTX);
+	exit_to_user_mode(regs);
 }
 
 asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
@@ -697,16 +713,18 @@ asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
 #ifdef CONFIG_COMPAT
 static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
 	do_cp15instr(esr, regs);
+	exit_to_user_mode(regs);
 }
 
 static void noinstr el0_svc_compat(struct pt_regs *regs)
 {
-	enter_from_user_mode();
+	enter_from_user_mode(regs);
 	cortex_a76_erratum_1463225_svc_handler();
 	do_el0_svc_compat(regs);
+	exit_to_user_mode(regs);
 }
 
 asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b59eee28cc1b..1b45065a22d3 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -564,8 +564,6 @@ SYM_CODE_START_LOCAL(ret_to_kernel)
 SYM_CODE_END(ret_to_kernel)
 
 SYM_CODE_START_LOCAL(ret_to_user)
-	mov	x0, sp
-	bl	asm_exit_to_user_mode
 	/* Ignore asynchronous tag check faults in the uaccess routines */
 	clear_mte_async_tcf
 	ldr	x19, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
@@ -739,6 +737,8 @@ SYM_CODE_START(ret_from_fork)
 	mov	x0, x20
 	blr	x19
 1:	get_current_task tsk
+	mov	x0, sp
+	bl	asm_exit_to_user_mode
 	b	ret_to_user
 SYM_CODE_END(ret_from_fork)
 NOKPROBE(ret_from_fork)

From ede3241a5f235811b1e66f56cf8fbdfd01266efb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 4 Aug 2021 19:17:10 +0100
Subject: [PATCH 30/56] arm64: entry: Add SYM_CODE annotation for __bad_stack

When converting arm64 to modern assembler annotations __bad_stack was left
as a raw local label without annotations. While this will have little if
any practical impact at present it may cause issues in the future if we
start using the annotations for things like reliable stack trace. Add
SYM_CODE annotations to fix this.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210804181710.19059-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 863d44f73028..fea118e68d51 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -517,12 +517,13 @@ SYM_CODE_START(vectors)
 SYM_CODE_END(vectors)
 
 #ifdef CONFIG_VMAP_STACK
+SYM_CODE_START_LOCAL(__bad_stack)
 	/*
 	 * We detected an overflow in kernel_ventry, which switched to the
 	 * overflow stack. Stash the exception regs, and head to our overflow
 	 * handler.
 	 */
-__bad_stack:
+
 	/* Restore the original x0 value */
 	mrs	x0, tpidrro_el0
 
@@ -542,6 +543,7 @@ __bad_stack:
 	/* Time to die */
 	bl	handle_bad_stack
 	ASM_BUG()
+SYM_CODE_END(__bad_stack)
 #endif /* CONFIG_VMAP_STACK */
 
 

From 5e10f9887ed85d4f59266d5c60dd09be96b5dbd4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 6 Aug 2021 12:31:04 +0100
Subject: [PATCH 31/56] arm64: mm: Fix TLBI vs ASID rollover

When switching to an 'mm_struct' for the first time following an ASID
rollover, a new ASID may be allocated and assigned to 'mm->context.id'.
This reassignment can happen concurrently with other operations on the
mm, such as unmapping pages and subsequently issuing TLB invalidation.

Consequently, we need to ensure that (a) accesses to 'mm->context.id'
are atomic and (b) all page-table updates made prior to a TLBI using the
old ASID are guaranteed to be visible to CPUs running with the new ASID.

This was found by inspection after reviewing the VMID changes from
Shameer but it looks like a real (yet hard to hit) bug.

Cc: <stable@vger.kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Jade Alglave <jade.alglave@arm.com>
Cc: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210806113109.2475-2-will@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/mmu.h      | 29 +++++++++++++++++++++++++----
 arch/arm64/include/asm/tlbflush.h | 11 ++++++-----
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 75beffe2ee8a..e9c30859f80c 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -27,11 +27,32 @@ typedef struct {
 } mm_context_t;
 
 /*
- * This macro is only used by the TLBI and low-level switch_mm() code,
- * neither of which can race with an ASID change. We therefore don't
- * need to reload the counter using atomic64_read().
+ * We use atomic64_read() here because the ASID for an 'mm_struct' can
+ * be reallocated when scheduling one of its threads following a
+ * rollover event (see new_context() and flush_context()). In this case,
+ * a concurrent TLBI (e.g. via try_to_unmap_one() and ptep_clear_flush())
+ * may use a stale ASID. This is fine in principle as the new ASID is
+ * guaranteed to be clean in the TLB, but the TLBI routines have to take
+ * care to handle the following race:
+ *
+ *    CPU 0                    CPU 1                          CPU 2
+ *
+ *    // ptep_clear_flush(mm)
+ *    xchg_relaxed(pte, 0)
+ *    DSB ISHST
+ *    old = ASID(mm)
+ *         |                                                  <rollover>
+ *         |                   new = new_context(mm)
+ *         \-----------------> atomic_set(mm->context.id, new)
+ *                             cpu_switch_mm(mm)
+ *                             // Hardware walk of pte using new ASID
+ *    TLBI(old)
+ *
+ * In this scenario, the barrier on CPU 0 and the dependency on CPU 1
+ * ensure that the page-table walker on CPU 1 *must* see the invalid PTE
+ * written by CPU 0.
  */
-#define ASID(mm)	((mm)->context.id.counter & 0xffff)
+#define ASID(mm)	(atomic64_read(&(mm)->context.id) & 0xffff)
 
 static inline bool arm64_kernel_unmapped_at_el0(void)
 {
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 118dabbda553..412a3b9a3c25 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -245,9 +245,10 @@ static inline void flush_tlb_all(void)
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-	unsigned long asid = __TLBI_VADDR(0, ASID(mm));
+	unsigned long asid;
 
 	dsb(ishst);
+	asid = __TLBI_VADDR(0, ASID(mm));
 	__tlbi(aside1is, asid);
 	__tlbi_user(aside1is, asid);
 	dsb(ish);
@@ -256,9 +257,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
 					 unsigned long uaddr)
 {
-	unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+	unsigned long addr;
 
 	dsb(ishst);
+	addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
 	__tlbi(vale1is, addr);
 	__tlbi_user(vale1is, addr);
 }
@@ -283,9 +285,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 {
 	int num = 0;
 	int scale = 0;
-	unsigned long asid = ASID(vma->vm_mm);
-	unsigned long addr;
-	unsigned long pages;
+	unsigned long asid, addr, pages;
 
 	start = round_down(start, stride);
 	end = round_up(end, stride);
@@ -305,6 +305,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	}
 
 	dsb(ishst);
+	asid = ASID(vma->vm_mm);
 
 	/*
 	 * When the CPU does not support TLB range operations, flush the TLB

From fd264b310579efc14436b2ec86defa5d4c31332e Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 11 Aug 2021 08:57:07 +0530
Subject: [PATCH 32/56] arm64/perf: Replace '0xf' instances with
 ID_AA64DFR0_PMUVER_IMP_DEF

ID_AA64DFR0_PMUVER_IMP_DEF, indicating an "implementation defined" PMU,
never actually gets used although there are '0xf' instances scattered
all around. Use the symbolic name instead of the raw hex constant.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/1628652427-24695-2-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpufeature.h | 2 +-
 arch/arm64/kernel/perf_event.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9bb9d11750d7..54474e76ad86 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -552,7 +552,7 @@ cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap)
 	u64 mask = GENMASK_ULL(field + 3, field);
 
 	/* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
-	if (val == 0xf)
+	if (val == ID_AA64DFR0_PMUVER_IMP_DEF)
 		val = 0;
 
 	if (val > cap) {
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index d07788dad388..b4044469527e 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1055,7 +1055,7 @@ static void __armv8pmu_probe_pmu(void *info)
 	dfr0 = read_sysreg(id_aa64dfr0_el1);
 	pmuver = cpuid_feature_extract_unsigned_field(dfr0,
 			ID_AA64DFR0_PMUVER_SHIFT);
-	if (pmuver == 0xf || pmuver == 0)
+	if (pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || pmuver == 0)
 		return;
 
 	cpu_pmu->pmuver = pmuver;

From 83e5dcbece4ea67ec3ad94b897e2844184802fd7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 19 Aug 2021 18:29:02 +0100
Subject: [PATCH 33/56] kselftest/arm64: mte: Fix misleading output when
 skipping tests

When skipping the tests due to a lack of system support for MTE we
currently print a message saying FAIL which makes it look like the test
failed even though the test did actually report KSFT_SKIP, creating some
confusion. Change the error message to say SKIP instead so things are
clearer.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210819172902.56211-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/mte/mte_common_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index f50ac31920d1..0328a1e08f65 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -298,7 +298,7 @@ int mte_default_setup(void)
 	int ret;
 
 	if (!(hwcaps2 & HWCAP2_MTE)) {
-		ksft_print_msg("FAIL: MTE features unavailable\n");
+		ksft_print_msg("SKIP: MTE features unavailable\n");
 		return KSFT_SKIP;
 	}
 	/* Get current mte mode */

From d82158fa6df4237c9859b27d719c53b4fe09e69e Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Jul 2021 12:24:37 +0100
Subject: [PATCH 34/56] arm64: Implement task_cpu_possible_mask()

Provide an implementation of task_cpu_possible_mask() so that we can
prevent 64-bit-only cores being added to the 'cpus_mask' for compat
tasks on systems with mismatched 32-bit support at EL0,

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210730112443.23245-11-will@kernel.org
---
 arch/arm64/include/asm/mmu_context.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index eeb210997149..f4ba93d4ffeb 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -231,6 +231,19 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	update_saved_ttbr0(tsk, next);
 }
 
+static inline const struct cpumask *
+task_cpu_possible_mask(struct task_struct *p)
+{
+	if (!static_branch_unlikely(&arm64_mismatched_32bit_el0))
+		return cpu_possible_mask;
+
+	if (!is_compat_thread(task_thread_info(p)))
+		return cpu_possible_mask;
+
+	return system_32bit_el0_cpumask();
+}
+#define task_cpu_possible_mask	task_cpu_possible_mask
+
 void verify_cpu_asid_bits(void);
 void post_ttbr_update_workaround(void);
 

From 08cd8f4112dbd33bbfe1112dd6e9f0a228a8e132 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Jul 2021 12:24:38 +0100
Subject: [PATCH 35/56] arm64: exec: Adjust affinity for compat tasks with
 mismatched 32-bit EL0

When exec'ing a 32-bit task on a system with mismatched support for
32-bit EL0, try to ensure that it starts life on a CPU that can actually
run it.

Similarly, when exec'ing a 64-bit task on such a system, try to restore
the old affinity mask if it was previously restricted.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Reviewed-by: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20210730112443.23245-12-will@kernel.org
---
 arch/arm64/include/asm/elf.h |  6 ++----
 arch/arm64/kernel/process.c  | 39 +++++++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 8d1c8dcb87fd..97932fbf973d 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -213,10 +213,8 @@ typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
 
 /* AArch32 EABI. */
 #define EF_ARM_EABI_MASK		0xff000000
-#define compat_elf_check_arch(x)	(system_supports_32bit_el0() && \
-					 ((x)->e_machine == EM_ARM) && \
-					 ((x)->e_flags & EF_ARM_EABI_MASK))
-
+int compat_elf_check_arch(const struct elf32_hdr *);
+#define compat_elf_check_arch		compat_elf_check_arch
 #define compat_start_thread		compat_start_thread
 /*
  * Unlike the native SET_PERSONALITY macro, the compat version maintains
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c8989b999250..583ee58f8c9c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -21,6 +21,7 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/nospec.h>
+#include <linux/sched.h>
 #include <linux/stddef.h>
 #include <linux/sysctl.h>
 #include <linux/unistd.h>
@@ -579,6 +580,28 @@ unsigned long arch_align_stack(unsigned long sp)
 	return sp & ~0xf;
 }
 
+#ifdef CONFIG_COMPAT
+int compat_elf_check_arch(const struct elf32_hdr *hdr)
+{
+	if (!system_supports_32bit_el0())
+		return false;
+
+	if ((hdr)->e_machine != EM_ARM)
+		return false;
+
+	if (!((hdr)->e_flags & EF_ARM_EABI_MASK))
+		return false;
+
+	/*
+	 * Prevent execve() of a 32-bit program from a deadline task
+	 * if the restricted affinity mask would be inadmissible on an
+	 * asymmetric system.
+	 */
+	return !static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
+	       !dl_task_check_affinity(current, system_32bit_el0_cpumask());
+}
+#endif
+
 /*
  * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
  */
@@ -588,8 +611,22 @@ void arch_setup_new_exec(void)
 
 	if (is_compat_task()) {
 		mmflags = MMCF_AARCH32;
-		if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+
+		/*
+		 * Restrict the CPU affinity mask for a 32-bit task so that
+		 * it contains only 32-bit-capable CPUs.
+		 *
+		 * From the perspective of the task, this looks similar to
+		 * what would happen if the 64-bit-only CPUs were hot-unplugged
+		 * at the point of execve(), although we try a bit harder to
+		 * honour the cpuset hierarchy.
+		 */
+		if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) {
+			force_compatible_cpus_allowed_ptr(current);
 			set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+		}
+	} else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) {
+		relax_compatible_cpus_allowed_ptr(current);
 	}
 
 	current->mm->context.flags = mmflags;

From df950811f4a884d08f05e22a8e0089d54bb4ba70 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Jul 2021 12:24:39 +0100
Subject: [PATCH 36/56] arm64: Prevent offlining first CPU with 32-bit EL0 on
 mismatched system

If we want to support 32-bit applications, then when we identify a CPU
with mismatched 32-bit EL0 support we must ensure that we will always
have an active 32-bit CPU available to us from then on. This is important
for the scheduler, because is_cpu_allowed() will be constrained to 32-bit
CPUs for compat tasks and forced migration due to a hotplug event will
hang if no 32-bit CPUs are available.

On detecting a mismatch, prevent offlining of either the mismatching CPU
if it is 32-bit capable, or find the first active 32-bit capable CPU
otherwise.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210730112443.23245-13-will@kernel.org
---
 arch/arm64/kernel/cpufeature.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0ead8bfedf20..0af584549499 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2901,15 +2901,38 @@ void __init setup_cpu_features(void)
 
 static int enable_mismatched_32bit_el0(unsigned int cpu)
 {
+	/*
+	 * The first 32-bit-capable CPU we detected and so can no longer
+	 * be offlined by userspace. -1 indicates we haven't yet onlined
+	 * a 32-bit-capable CPU.
+	 */
+	static int lucky_winner = -1;
+
 	struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
 	bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
 
 	if (cpu_32bit) {
 		cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
 		static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0);
-		setup_elf_hwcaps(compat_elf_hwcaps);
 	}
 
+	if (cpumask_test_cpu(0, cpu_32bit_el0_mask) == cpu_32bit)
+		return 0;
+
+	if (lucky_winner >= 0)
+		return 0;
+
+	/*
+	 * We've detected a mismatch. We need to keep one of our CPUs with
+	 * 32-bit EL0 online so that is_cpu_allowed() doesn't end up rejecting
+	 * every CPU in the system for a 32-bit task.
+	 */
+	lucky_winner = cpu_32bit ? cpu : cpumask_any_and(cpu_32bit_el0_mask,
+							 cpu_active_mask);
+	get_cpu_device(lucky_winner)->offline_disabled = true;
+	setup_elf_hwcaps(compat_elf_hwcaps);
+	pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
+		cpu, lucky_winner);
 	return 0;
 }
 

From 7af33504d1c8077b40121294b5eb6e680a859468 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Jul 2021 12:24:40 +0100
Subject: [PATCH 37/56] arm64: Advertise CPUs capable of running 32-bit
 applications in sysfs

Since 32-bit applications will be killed if they are caught trying to
execute on a 64-bit-only CPU in a mismatched system, advertise the set
of 32-bit capable CPUs to userspace in sysfs.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210730112443.23245-14-will@kernel.org
---
 .../ABI/testing/sysfs-devices-system-cpu      |  9 +++++++++
 arch/arm64/kernel/cpufeature.c                | 19 +++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 160b10c029c0..69edbd99e0b7 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -494,6 +494,15 @@ Description:	AArch64 CPU registers
 		'identification' directory exposes the CPU ID registers for
 		identifying model and revision of the CPU.
 
+What:		/sys/devices/system/cpu/aarch32_el0
+Date:		May 2021
+Contact:	Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
+Description:	Identifies the subset of CPUs in the system that can execute
+		AArch32 (32-bit ARM) applications. If present, the same format as
+		/sys/devices/system/cpu/{offline,online,possible,present} is used.
+		If absent, then all or none of the CPUs can execute AArch32
+		applications and execve() will behave accordingly.
+
 What:		/sys/devices/system/cpu/cpu#/cpu_capacity
 Date:		December 2016
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0af584549499..0a9bc7eff26e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -67,6 +67,7 @@
 #include <linux/crash_dump.h>
 #include <linux/sort.h>
 #include <linux/stop_machine.h>
+#include <linux/sysfs.h>
 #include <linux/types.h>
 #include <linux/minmax.h>
 #include <linux/mm.h>
@@ -1321,6 +1322,24 @@ const struct cpumask *system_32bit_el0_cpumask(void)
 	return cpu_possible_mask;
 }
 
+static ssize_t aarch32_el0_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	const struct cpumask *mask = system_32bit_el0_cpumask();
+
+	return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(mask));
+}
+static const DEVICE_ATTR_RO(aarch32_el0);
+
+static int __init aarch32_el0_sysfs_init(void)
+{
+	if (!allow_mismatched_32bit_el0)
+		return 0;
+
+	return device_create_file(cpu_subsys.dev_root, &dev_attr_aarch32_el0);
+}
+device_initcall(aarch32_el0_sysfs_init);
+
 static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope)
 {
 	if (!has_cpuid_feature(entry, scope))

From ead7de462ae56b2d2e56fb5a414f6e916dddc4c9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Jul 2021 12:24:41 +0100
Subject: [PATCH 38/56] arm64: Hook up cmdline parameter to allow mismatched
 32-bit EL0

Allow systems with mismatched 32-bit support at EL0 to run 32-bit
applications based on a new kernel parameter.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210730112443.23245-15-will@kernel.org
---
 Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++
 arch/arm64/kernel/cpufeature.c                  | 7 +++++++
 2 files changed, 15 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bdb22006f713..6ab625dea8c0 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -287,6 +287,14 @@
 			do not want to use tracing_snapshot_alloc() as it needs
 			to be done where GFP_KERNEL allocations are allowed.
 
+	allow_mismatched_32bit_el0 [ARM64]
+			Allow execve() of 32-bit applications and setting of the
+			PER_LINUX32 personality on systems where only a strict
+			subset of the CPUs support 32-bit EL0. When this
+			parameter is present, the set of CPUs supporting 32-bit
+			EL0 is indicated by /sys/devices/system/cpu/aarch32_el0
+			and hot-unplug operations may be restricted.
+
 	amd_iommu=	[HW,X86-64]
 			Pass parameters to the AMD IOMMU driver in the system.
 			Possible values are:
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0a9bc7eff26e..77b2149c26b8 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1322,6 +1322,13 @@ const struct cpumask *system_32bit_el0_cpumask(void)
 	return cpu_possible_mask;
 }
 
+static int __init parse_32bit_el0_param(char *str)
+{
+	allow_mismatched_32bit_el0 = true;
+	return 0;
+}
+early_param("allow_mismatched_32bit_el0", parse_32bit_el0_param);
+
 static ssize_t aarch32_el0_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {

From 94f9c00f6460c0b175226c8fe6fd137547b239bd Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Jul 2021 12:24:42 +0100
Subject: [PATCH 39/56] arm64: Remove logic to kill 32-bit tasks on 64-bit-only
 cores

The scheduler now knows enough about these braindead systems to place
32-bit tasks accordingly, so throw out the safety checks and allow the
ret-to-user path to avoid do_notify_resume() if there is nothing to do.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210730112443.23245-16-will@kernel.org
---
 arch/arm64/kernel/process.c | 14 +-------------
 arch/arm64/kernel/signal.c  | 26 --------------------------
 2 files changed, 1 insertion(+), 39 deletions(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 583ee58f8c9c..e0e7f4e9b607 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -469,15 +469,6 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
 	write_sysreg(val, cntkctl_el1);
 }
 
-static void compat_thread_switch(struct task_struct *next)
-{
-	if (!is_compat_thread(task_thread_info(next)))
-		return;
-
-	if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
-		set_tsk_thread_flag(next, TIF_NOTIFY_RESUME);
-}
-
 static void update_sctlr_el1(u64 sctlr)
 {
 	/*
@@ -519,7 +510,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	ssbs_thread_switch(next);
 	erratum_1418040_thread_switch(prev, next);
 	ptrauth_thread_switch_user(next);
-	compat_thread_switch(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
@@ -621,10 +611,8 @@ void arch_setup_new_exec(void)
 		 * at the point of execve(), although we try a bit harder to
 		 * honour the cpuset hierarchy.
 		 */
-		if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) {
+		if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
 			force_compatible_cpus_allowed_ptr(current);
-			set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
-		}
 	} else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) {
 		relax_compatible_cpus_allowed_ptr(current);
 	}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 23036334f4dc..22899c86711a 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -912,19 +912,6 @@ static void do_signal(struct pt_regs *regs)
 	restore_saved_sigmask();
 }
 
-static bool cpu_affinity_invalid(struct pt_regs *regs)
-{
-	if (!compat_user_mode(regs))
-		return false;
-
-	/*
-	 * We're preemptible, but a reschedule will cause us to check the
-	 * affinity again.
-	 */
-	return !cpumask_test_cpu(raw_smp_processor_id(),
-				 system_32bit_el0_cpumask());
-}
-
 asmlinkage void do_notify_resume(struct pt_regs *regs,
 				 unsigned long thread_flags)
 {
@@ -952,19 +939,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 			if (thread_flags & _TIF_NOTIFY_RESUME) {
 				tracehook_notify_resume(regs);
 				rseq_handle_notify_resume(NULL, regs);
-
-				/*
-				 * If we reschedule after checking the affinity
-				 * then we must ensure that TIF_NOTIFY_RESUME
-				 * is set so that we check the affinity again.
-				 * Since tracehook_notify_resume() clears the
-				 * flag, ensure that the compiler doesn't move
-				 * it after the affinity check.
-				 */
-				barrier();
-
-				if (cpu_affinity_invalid(regs))
-					force_sig(SIGKILL);
 			}
 
 			if (thread_flags & _TIF_FOREIGN_FPSTATE)

From 702f43872665e3b1cc6fdb77d238533274fc9d18 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 30 Jul 2021 12:24:43 +0100
Subject: [PATCH 40/56] Documentation: arm64: describe asymmetric 32-bit
 support

Document support for running 32-bit tasks on asymmetric 32-bit systems
and its impact on the user ABI when enabled.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210730112443.23245-17-will@kernel.org
---
 .../admin-guide/kernel-parameters.txt         |   3 +
 Documentation/arm64/asymmetric-32bit.rst      | 155 ++++++++++++++++++
 Documentation/arm64/index.rst                 |   1 +
 3 files changed, 159 insertions(+)
 create mode 100644 Documentation/arm64/asymmetric-32bit.rst

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6ab625dea8c0..b2f5dd4ea805 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -295,6 +295,9 @@
 			EL0 is indicated by /sys/devices/system/cpu/aarch32_el0
 			and hot-unplug operations may be restricted.
 
+			See Documentation/arm64/asymmetric-32bit.rst for more
+			information.
+
 	amd_iommu=	[HW,X86-64]
 			Pass parameters to the AMD IOMMU driver in the system.
 			Possible values are:
diff --git a/Documentation/arm64/asymmetric-32bit.rst b/Documentation/arm64/asymmetric-32bit.rst
new file mode 100644
index 000000000000..64a0b505da7d
--- /dev/null
+++ b/Documentation/arm64/asymmetric-32bit.rst
@@ -0,0 +1,155 @@
+======================
+Asymmetric 32-bit SoCs
+======================
+
+Author: Will Deacon <will@kernel.org>
+
+This document describes the impact of asymmetric 32-bit SoCs on the
+execution of 32-bit (``AArch32``) applications.
+
+Date: 2021-05-17
+
+Introduction
+============
+
+Some Armv9 SoCs suffer from a big.LITTLE misfeature where only a subset
+of the CPUs are capable of executing 32-bit user applications. On such
+a system, Linux by default treats the asymmetry as a "mismatch" and
+disables support for both the ``PER_LINUX32`` personality and
+``execve(2)`` of 32-bit ELF binaries, with the latter returning
+``-ENOEXEC``. If the mismatch is detected during late onlining of a
+64-bit-only CPU, then the onlining operation fails and the new CPU is
+unavailable for scheduling.
+
+Surprisingly, these SoCs have been produced with the intention of
+running legacy 32-bit binaries. Unsurprisingly, that doesn't work very
+well with the default behaviour of Linux.
+
+It seems inevitable that future SoCs will drop 32-bit support
+altogether, so if you're stuck in the unenviable position of needing to
+run 32-bit code on one of these transitionary platforms then you would
+be wise to consider alternatives such as recompilation, emulation or
+retirement. If neither of those options are practical, then read on.
+
+Enabling kernel support
+=======================
+
+Since the kernel support is not completely transparent to userspace,
+allowing 32-bit tasks to run on an asymmetric 32-bit system requires an
+explicit "opt-in" and can be enabled by passing the
+``allow_mismatched_32bit_el0`` parameter on the kernel command-line.
+
+For the remainder of this document we will refer to an *asymmetric
+system* to mean an asymmetric 32-bit SoC running Linux with this kernel
+command-line option enabled.
+
+Userspace impact
+================
+
+32-bit tasks running on an asymmetric system behave in mostly the same
+way as on a homogeneous system, with a few key differences relating to
+CPU affinity.
+
+sysfs
+-----
+
+The subset of CPUs capable of running 32-bit tasks is described in
+``/sys/devices/system/cpu/aarch32_el0`` and is documented further in
+``Documentation/ABI/testing/sysfs-devices-system-cpu``.
+
+**Note:** CPUs are advertised by this file as they are detected and so
+late-onlining of 32-bit-capable CPUs can result in the file contents
+being modified by the kernel at runtime. Once advertised, CPUs are never
+removed from the file.
+
+``execve(2)``
+-------------
+
+On a homogeneous system, the CPU affinity of a task is preserved across
+``execve(2)``. This is not always possible on an asymmetric system,
+specifically when the new program being executed is 32-bit yet the
+affinity mask contains 64-bit-only CPUs. In this situation, the kernel
+determines the new affinity mask as follows:
+
+  1. If the 32-bit-capable subset of the affinity mask is not empty,
+     then the affinity is restricted to that subset and the old affinity
+     mask is saved. This saved mask is inherited over ``fork(2)`` and
+     preserved across ``execve(2)`` of 32-bit programs.
+
+     **Note:** This step does not apply to ``SCHED_DEADLINE`` tasks.
+     See `SCHED_DEADLINE`_.
+
+  2. Otherwise, the cpuset hierarchy of the task is walked until an
+     ancestor is found containing at least one 32-bit-capable CPU. The
+     affinity of the task is then changed to match the 32-bit-capable
+     subset of the cpuset determined by the walk.
+
+  3. On failure (i.e. out of memory), the affinity is changed to the set
+     of all 32-bit-capable CPUs of which the kernel is aware.
+
+A subsequent ``execve(2)`` of a 64-bit program by the 32-bit task will
+invalidate the affinity mask saved in (1) and attempt to restore the CPU
+affinity of the task using the saved mask if it was previously valid.
+This restoration may fail due to intervening changes to the deadline
+policy or cpuset hierarchy, in which case the ``execve(2)`` continues
+with the affinity unchanged.
+
+Calls to ``sched_setaffinity(2)`` for a 32-bit task will consider only
+the 32-bit-capable CPUs of the requested affinity mask. On success, the
+affinity for the task is updated and any saved mask from a prior
+``execve(2)`` is invalidated.
+
+``SCHED_DEADLINE``
+------------------
+
+Explicit admission of a 32-bit deadline task to the default root domain
+(e.g. by calling ``sched_setattr(2)``) is rejected on an asymmetric
+32-bit system unless admission control is disabled by writing -1 to
+``/proc/sys/kernel/sched_rt_runtime_us``.
+
+``execve(2)`` of a 32-bit program from a 64-bit deadline task will
+return ``-ENOEXEC`` if the root domain for the task contains any
+64-bit-only CPUs and admission control is enabled. Concurrent offlining
+of 32-bit-capable CPUs may still necessitate the procedure described in
+`execve(2)`_, in which case step (1) is skipped and a warning is
+emitted on the console.
+
+**Note:** It is recommended that a set of 32-bit-capable CPUs are placed
+into a separate root domain if ``SCHED_DEADLINE`` is to be used with
+32-bit tasks on an asymmetric system. Failure to do so is likely to
+result in missed deadlines.
+
+Cpusets
+-------
+
+The affinity of a 32-bit task on an asymmetric system may include CPUs
+that are not explicitly allowed by the cpuset to which it is attached.
+This can occur as a result of the following two situations:
+
+  - A 64-bit task attached to a cpuset which allows only 64-bit CPUs
+    executes a 32-bit program.
+
+  - All of the 32-bit-capable CPUs allowed by a cpuset containing a
+    32-bit task are offlined.
+
+In both of these cases, the new affinity is calculated according to step
+(2) of the process described in `execve(2)`_ and the cpuset hierarchy is
+unchanged irrespective of the cgroup version.
+
+CPU hotplug
+-----------
+
+On an asymmetric system, the first detected 32-bit-capable CPU is
+prevented from being offlined by userspace and any such attempt will
+return ``-EPERM``. Note that suspend is still permitted even if the
+primary CPU (i.e. CPU 0) is 64-bit-only.
+
+KVM
+---
+
+Although KVM will not advertise 32-bit EL0 support to any vCPUs on an
+asymmetric system, a broken guest at EL1 could still attempt to execute
+32-bit code at EL0. In this case, an exit from a vCPU thread in 32-bit
+mode will return to host userspace with an ``exit_reason`` of
+``KVM_EXIT_FAIL_ENTRY`` and will remain non-runnable until successfully
+re-initialised by a subsequent ``KVM_ARM_VCPU_INIT`` operation.
diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst
index 97d65ba12a35..4f840bac083e 100644
--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@@ -10,6 +10,7 @@ ARM64 Architecture
     acpi_object_usage
     amu
     arm-acpi
+    asymmetric-32bit
     booting
     cpu-feature-registers
     elf_hwcaps

From 0c69bd2ca6ee20064dde7853cd749284e053a874 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 19 Aug 2021 17:57:23 +0100
Subject: [PATCH 41/56] kselftest/arm64: pac: Fix skipping of tests on systems
 without PAC

The PAC tests check to see if the system supports the relevant PAC features
but instead of skipping the tests if they can't be executed they fail the
tests which makes things look like they're not working when they are.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210819165723.43903-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/pauth/pac.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index 592fe538506e..b743daa772f5 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -25,13 +25,15 @@
 do { \
 	unsigned long hwcaps = getauxval(AT_HWCAP); \
 	/* data key instructions are not in NOP space. This prevents a SIGILL */ \
-	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+	if (!(hwcaps & HWCAP_PACA))					\
+		SKIP(return, "PAUTH not enabled"); \
 } while (0)
 #define ASSERT_GENERIC_PAUTH_ENABLED() \
 do { \
 	unsigned long hwcaps = getauxval(AT_HWCAP); \
 	/* generic key instructions are not in NOP space. This prevents a SIGILL */ \
-	ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+	if (!(hwcaps & HWCAP_PACG)) \
+		SKIP(return, "Generic PAUTH not enabled");	\
 } while (0)
 
 void sign_specific(struct signatures *sign, size_t val)
@@ -256,7 +258,7 @@ TEST(single_thread_different_keys)
 	unsigned long hwcaps = getauxval(AT_HWCAP);
 
 	/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
-	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+	ASSERT_PAUTH_ENABLED();
 	if (!(hwcaps & HWCAP_PACG)) {
 		TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
 		nkeys = NKEYS - 1;
@@ -299,7 +301,7 @@ TEST(exec_changed_keys)
 	unsigned long hwcaps = getauxval(AT_HWCAP);
 
 	/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
-	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+	ASSERT_PAUTH_ENABLED();
 	if (!(hwcaps & HWCAP_PACG)) {
 		TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
 		nkeys = NKEYS - 1;

From 090bf6f84b4d23457a2116891e5de93bc995da90 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Sat, 14 Aug 2021 08:54:05 +0800
Subject: [PATCH 42/56] arm64: replace in_irq() with in_hardirq()

Replace the obsolete and ambiguos macro in_irq() with new
macro in_hardirq().

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Link: https://lore.kernel.org/r/20210814005405.2658-1-changbin.du@gmail.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/simd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 89cba2622b79..6a75d7ecdcaa 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -37,7 +37,7 @@ static __must_check inline bool may_use_simd(void)
 	 */
 	return !WARN_ON(!system_capabilities_finalized()) &&
 	       system_supports_fpsimd() &&
-	       !in_irq() && !irqs_disabled() && !in_nmi() &&
+	       !in_hardirq() && !irqs_disabled() && !in_nmi() &&
 	       !this_cpu_read(fpsimd_context_busy);
 }
 

From d4e4dc4fab686c5f3f185272a19b83930664bef5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 19 Aug 2021 14:42:40 +0100
Subject: [PATCH 43/56] kselftest/arm64: signal: Add SVE to the set of features
 we can check for

Allow testcases for SVE signal handling to flag the dependency and be
skipped on systems without SVE support.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210819134245.13935-2-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/signal/test_signals.h       | 2 ++
 tools/testing/selftests/arm64/signal/test_signals_utils.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index f96baf1cef1a..ebe8694dbef0 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -33,10 +33,12 @@
  */
 enum {
 	FSSBS_BIT,
+	FSVE_BIT,
 	FMAX_END
 };
 
 #define FEAT_SSBS		(1UL << FSSBS_BIT)
+#define FEAT_SVE		(1UL << FSVE_BIT)
 
 /*
  * A descriptor used to describe and configure a test case.
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2de6e5ed5e25..6836510a522f 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -26,6 +26,7 @@ static int sig_copyctx = SIGTRAP;
 
 static char const *const feats_names[FMAX_END] = {
 	" SSBS ",
+	" SVE ",
 };
 
 #define MAX_FEATS_SZ	128
@@ -263,6 +264,8 @@ int test_init(struct tdescr *td)
 		 */
 		if (getauxval(AT_HWCAP) & HWCAP_SSBS)
 			td->feats_supported |= FEAT_SSBS;
+		if (getauxval(AT_HWCAP) & HWCAP_SVE)
+			td->feats_supported |= FEAT_SVE;
 		if (feats_ok(td))
 			fprintf(stderr,
 				"Required Features: [%s] supported\n",

From ace19b1845a5c2906d59f6358b80ed687b52b2cd Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 19 Aug 2021 14:42:41 +0100
Subject: [PATCH 44/56] kselftest/arm64: signal: Support signal frames with SVE
 register data

A signal frame with SVE may validly either be a bare struct sve_context or
a struct sve_context followed by vector length dependent register data.
Support either in the generic helpers for the signal tests, and while we're
at it validate the SVE vector length reported.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210819134245.13935-3-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../arm64/signal/testcases/testcases.c        | 34 +++++++++++++++++--
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 61ebcdf63831..8c2a57fc2f9c 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -50,12 +50,38 @@ bool validate_extra_context(struct extra_context *extra, char **err)
 	return true;
 }
 
+bool validate_sve_context(struct sve_context *sve, char **err)
+{
+	/* Size will be rounded up to a multiple of 16 bytes */
+	size_t regs_size
+		= ((SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl)) + 15) / 16) * 16;
+
+	if (!sve || !err)
+		return false;
+
+	/* Either a bare sve_context or a sve_context followed by regs data */
+	if ((sve->head.size != sizeof(struct sve_context)) &&
+	    (sve->head.size != regs_size)) {
+		*err = "bad size for SVE context";
+		return false;
+	}
+
+	if (!sve_vl_valid(sve->vl)) {
+		*err = "SVE VL invalid";
+
+		return false;
+	}
+
+	return true;
+}
+
 bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 {
 	bool terminated = false;
 	size_t offs = 0;
 	int flags = 0;
 	struct extra_context *extra = NULL;
+	struct sve_context *sve = NULL;
 	struct _aarch64_ctx *head =
 		(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
 
@@ -90,9 +116,8 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 		case SVE_MAGIC:
 			if (flags & SVE_CTX)
 				*err = "Multiple SVE_MAGIC";
-			else if (head->size !=
-				 sizeof(struct sve_context))
-				*err = "Bad size for sve_context";
+			/* Size is validated in validate_sve_context() */
+			sve = (struct sve_context *)head;
 			flags |= SVE_CTX;
 			break;
 		case EXTRA_MAGIC:
@@ -137,6 +162,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 		if (flags & EXTRA_CTX)
 			if (!validate_extra_context(extra, err))
 				return false;
+		if (flags & SVE_CTX)
+			if (!validate_sve_context(sve, err))
+				return false;
 
 		head = GET_RESV_NEXT_HEAD(head);
 	}

From c1f67a19c12eefeb67e6dc98dd09253623a213d1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 19 Aug 2021 14:42:42 +0100
Subject: [PATCH 45/56] kselftest/arm64: signal: Check SVE signal frame shows
 expected vector length

As a basic check that the SVE signal frame is being set up correctly
verify that the vector length in the signal frame is the vector length
that the process has.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210819134245.13935-4-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../testing/selftests/arm64/signal/.gitignore |  1 +
 .../selftests/arm64/signal/testcases/sve_vl.c | 68 +++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/sve_vl.c

diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 78c902045ca7..c1742755abb9 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 mangle_*
 fake_sigreturn_*
+sve_*
 !*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
new file mode 100644
index 000000000000..92904653add1
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SVE vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sve_vl(struct tdescr *td)
+{
+	int ret = prctl(PR_SVE_GET_VL);
+	if (ret == -1)
+		return false;
+
+	vl = ret;
+
+	return true;
+}
+
+static int sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	size_t resv_sz, offset;
+	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	struct sve_context *sve;
+
+	/* Get a signal context which should have a SVE frame in it */
+	if (!get_current_context(td, &sf.uc))
+		return 1;
+
+	resv_sz = GET_SF_RESV_SIZE(sf);
+	head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+	if (!head) {
+		fprintf(stderr, "No SVE context\n");
+		return 1;
+	}
+	sve = (struct sve_context *)head;
+
+	if (sve->vl != vl) {
+		fprintf(stderr, "sigframe VL %u, expected %u\n",
+			sve->vl, vl);
+		return 1;
+	} else {
+		fprintf(stderr, "got expected VL %u\n", vl);
+	}
+
+	td->pass = 1;
+
+	return 0;
+}
+
+struct tdescr tde = {
+	.name = "SVE VL",
+	.descr = "Check that we get the right SVE VL reported",
+	.feats_required = FEAT_SVE,
+	.timeout = 3,
+	.init = get_sve_vl,
+	.run = sve_vl,
+};

From d25ac50ce8f742e61fad6175cdbb172532ebfd10 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 19 Aug 2021 14:42:43 +0100
Subject: [PATCH 46/56] kselftest/arm64: signal: Verify that signals can't
 change the SVE vector length

We do not support changing the SVE vector length as part of signal return,
verify that this is the case if the system supports multiple vector lengths.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210819134245.13935-5-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../testcases/fake_sigreturn_sve_change_vl.c  | 92 +++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c

diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
new file mode 100644
index 000000000000..bb50b5adbf10
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the SVE vector length in a signal hander, this is not
+ * supported and is expected to segfault.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+	int vq, vl;
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SVE_SET_VL, vq * 16);
+		if (vl == -1)
+			return false;
+
+		vl &= PR_SVE_VL_LEN_MASK;
+
+		/* Skip missing VLs */
+		vq = sve_vq_from_vl(vl);
+
+		vls[nvls++] = vl;
+	}
+
+	/* We need at least two VLs */
+	if (nvls < 2) {
+		fprintf(stderr, "Only %d VL supported\n", nvls);
+		return false;
+	}
+
+	return true;
+}
+
+static int fake_sigreturn_sve_change_vl(struct tdescr *td,
+					siginfo_t *si, ucontext_t *uc)
+{
+	size_t resv_sz, offset;
+	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	struct sve_context *sve;
+
+	/* Get a signal context with a SVE frame in it */
+	if (!get_current_context(td, &sf.uc))
+		return 1;
+
+	resv_sz = GET_SF_RESV_SIZE(sf);
+	head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+	if (!head) {
+		fprintf(stderr, "No SVE context\n");
+		return 1;
+	}
+
+	if (head->size != sizeof(struct sve_context)) {
+		fprintf(stderr, "SVE register state active, skipping\n");
+		return 1;
+	}
+
+	sve = (struct sve_context *)head;
+
+	/* No changes are supported; init left us at minimum VL so go to max */
+	fprintf(stderr, "Attempting to change VL from %d to %d\n",
+		sve->vl, vls[0]);
+	sve->vl = vls[0];
+
+	fake_sigreturn(&sf, sizeof(sf), 0);
+
+	return 1;
+}
+
+struct tdescr tde = {
+	.name = "FAKE_SIGRETURN_SVE_CHANGE",
+	.descr = "Attempt to change SVE VL",
+	.feats_required = FEAT_SVE,
+	.sig_ok = SIGSEGV,
+	.timeout = 3,
+	.init = sve_get_vls,
+	.run = fake_sigreturn_sve_change_vl,
+};

From 5262b216f4a97f4e2f517dba296a3cad4bc42bab Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 19 Aug 2021 14:42:44 +0100
Subject: [PATCH 47/56] kselftest/arm64: signal: Add test case for SVE register
 state in signals

Currently this doesn't actually verify that the register contents do the
right thing, it just verifes that a SVE context with appropriate size
appears.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210819134245.13935-6-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .../arm64/signal/testcases/sve_regs.c         | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/sve_regs.c

diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
new file mode 100644
index 000000000000..4b2418aa08a9
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the SVE register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+	int vq, vl;
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SVE_SET_VL, vq * 16);
+		if (vl == -1)
+			return false;
+
+		vl &= PR_SVE_VL_LEN_MASK;
+
+		/* Skip missing VLs */
+		vq = sve_vq_from_vl(vl);
+
+		vls[nvls++] = vl;
+	}
+
+	/* We need at least one VL */
+	if (nvls < 1) {
+		fprintf(stderr, "Only %d VL supported\n", nvls);
+		return false;
+	}
+
+	return true;
+}
+
+static void setup_sve_regs(void)
+{
+	/* RDVL x16, #1 so we should have SVE regs; real data is TODO */
+	asm volatile(".inst 0x04bf5030" : : : "x16" );
+}
+
+static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+			 unsigned int vl)
+{
+	size_t resv_sz, offset;
+	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	struct sve_context *sve;
+
+	fprintf(stderr, "Testing VL %d\n", vl);
+
+	if (prctl(PR_SVE_SET_VL, vl) == -1) {
+		fprintf(stderr, "Failed to set VL\n");
+		return 1;
+	}
+
+	/*
+	 * Get a signal context which should have a SVE frame and registers
+	 * in it.
+	 */
+	setup_sve_regs();
+	if (!get_current_context(td, &sf.uc))
+		return 1;
+
+	resv_sz = GET_SF_RESV_SIZE(sf);
+	head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+	if (!head) {
+		fprintf(stderr, "No SVE context\n");
+		return 1;
+	}
+
+	sve = (struct sve_context *)head;
+	if (sve->vl != vl) {
+		fprintf(stderr, "Got VL %d, expected %d\n", sve->vl, vl);
+		return 1;
+	}
+
+	/* The actual size validation is done in get_current_context() */
+	fprintf(stderr, "Got expected size %u and VL %d\n",
+		head->size, sve->vl);
+
+	return 0;
+}
+
+static int sve_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	int i;
+
+	for (i = 0; i < nvls; i++) {
+		/*
+		 * TODO: the signal test helpers can't currently cope
+		 * with signal frames bigger than struct sigcontext,
+		 * skip VLs that will trigger that.
+		 */
+		if (vls[i] > 64)
+			continue;
+
+		if (do_one_sve_vl(td, si, uc, vls[i]))
+			return 1;
+	}
+
+	td->pass = 1;
+
+	return 0;
+}
+
+struct tdescr tde = {
+	.name = "SVE registers",
+	.descr = "Check that we get the right SVE registers reported",
+	.feats_required = FEAT_SVE,
+	.timeout = 3,
+	.init = sve_get_vls,
+	.run = sve_regs,
+};

From fa5ca80db89e8ee288eaafb935df77acba3534ec Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 19 Aug 2021 14:42:45 +0100
Subject: [PATCH 48/56] kselftest/arm64: signal: Add a TODO list for signal
 handling tests

Note down a few gaps in our coverage.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210819134245.13935-7-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/signal/testcases/TODO | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/signal/testcases/TODO

diff --git a/tools/testing/selftests/arm64/signal/testcases/TODO b/tools/testing/selftests/arm64/signal/testcases/TODO
new file mode 100644
index 000000000000..110ff9fd195d
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/TODO
@@ -0,0 +1,2 @@
+- Validate that register contents are saved and restored as expected.
+- Support and validate extra_context.

From 38ee3c5e36a134050df93ab911d4713e30a6cde5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 12 Aug 2021 21:11:43 +0100
Subject: [PATCH 49/56] arm64/sve: Add some comments for sve_save/load_state()

The use of macros for the actual function bodies means legibility is always
going to be a bit of a challenge, especially while we can't rely on SVE
support in the toolchain, but this helps a little.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210812201143.35578-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry-fpsimd.S | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 0a7a64753878..196e921f61de 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -33,11 +33,24 @@ SYM_FUNC_END(fpsimd_load_state)
 
 #ifdef CONFIG_ARM64_SVE
 
+/*
+ * Save the SVE state
+ *
+ * x0 - pointer to buffer for state
+ * x1 - pointer to storage for FPSR
+ */
 SYM_FUNC_START(sve_save_state)
 	sve_save 0, x1, 2
 	ret
 SYM_FUNC_END(sve_save_state)
 
+/*
+ * Load the SVE state
+ *
+ * x0 - pointer to buffer for state
+ * x1 - pointer to storage for FPSR
+ * x2 - VQ-1
+ */
 SYM_FUNC_START(sve_load_state)
 	sve_load 0, x1, x2, 3, x4
 	ret

From 04fa17d1368c1f49801c852aac874d99ba5a6d20 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 16 Aug 2021 13:50:23 +0100
Subject: [PATCH 50/56] arm64/sve: Add a comment documenting the binutils
 needed for SVE asm

At some point it would be nice to avoid the need to manually encode SVE
instructions, add a note of the binutils version required to save looking
it up.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210816125024.8112-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/fpsimdmacros.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 059204477ce6..00a2c0b69c2b 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -94,6 +94,7 @@
 .endm
 
 /* SVE instruction encodings for non-SVE-capable assemblers */
+/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
 
 /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
 .macro _sve_str_v nz, nxbase, offset=0

From 90268574a3e8a6b883bd802d702a2738577e1006 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 23 Aug 2021 11:12:53 +0100
Subject: [PATCH 51/56] arm64: head: avoid over-mapping in map_memory

The `compute_indices` and `populate_entries` macros operate on inclusive
bounds, and thus the `map_memory` macro which uses them also operates
on inclusive bounds.

We pass `_end` and `_idmap_text_end` to `map_memory`, but these are
exclusive bounds, and if one of these is sufficiently aligned (as a
result of kernel configuration, physical placement, and KASLR), then:

* In `compute_indices`, the computed `iend` will be in the page/block *after*
  the final byte of the intended mapping.

* In `populate_entries`, an unnecessary entry will be created at the end
  of each level of table. At the leaf level, this entry will map up to
  SWAPPER_BLOCK_SIZE bytes of physical addresses that we did not intend
  to map.

As we may map up to SWAPPER_BLOCK_SIZE bytes more than intended, we may
violate the boot protocol and map physical address past the 2MiB-aligned
end address we are permitted to map. As we map these with Normal memory
attributes, this may result in further problems depending on what these
physical addresses correspond to.

The final entry at each level may require an additional table at that
level. As EARLY_ENTRIES() calculates an inclusive bound, we allocate
enough memory for this.

Avoid the extraneous mapping by having map_memory convert the exclusive
end address to an inclusive end address by subtracting one, and do
likewise in EARLY_ENTRIES() when calculating the number of required
tables. For clarity, comments are updated to more clearly document which
boundaries the macros operate on.  For consistency with the other
macros, the comments in map_memory are also updated to describe `vstart`
and `vend` as virtual addresses.

Fixes: 0370b31e4845 ("arm64: Extend early page table code to allow for larger kernels")
Cc: <stable@vger.kernel.org> # 4.16.x
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Will Deacon <will@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210823101253.55567-1-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/kernel-pgtable.h |  4 ++--
 arch/arm64/kernel/head.S                | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 3512184cfec1..96dc0f7da258 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -65,8 +65,8 @@
 #define EARLY_KASLR	(0)
 #endif
 
-#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \
-					- ((vstart) >> (shift)) + 1 + EARLY_KASLR)
+#define EARLY_ENTRIES(vstart, vend, shift) \
+	((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + EARLY_KASLR)
 
 #define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT))
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index c5c994a73a64..17962452e31d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -177,7 +177,7 @@ SYM_CODE_END(preserve_boot_args)
  * to be composed of multiple pages. (This effectively scales the end index).
  *
  *	vstart:	virtual address of start of range
- *	vend:	virtual address of end of range
+ *	vend:	virtual address of end of range - we map [vstart, vend]
  *	shift:	shift used to transform virtual address into index
  *	ptrs:	number of entries in page table
  *	istart:	index in table corresponding to vstart
@@ -214,17 +214,18 @@ SYM_CODE_END(preserve_boot_args)
  *
  *	tbl:	location of page table
  *	rtbl:	address to be used for first level page table entry (typically tbl + PAGE_SIZE)
- *	vstart:	start address to map
- *	vend:	end address to map - we map [vstart, vend]
+ *	vstart:	virtual address of start of range
+ *	vend:	virtual address of end of range - we map [vstart, vend - 1]
  *	flags:	flags to use to map last level entries
  *	phys:	physical address corresponding to vstart - physical memory is contiguous
  *	pgds:	the number of pgd entries
  *
  * Temporaries:	istart, iend, tmp, count, sv - these need to be different registers
- * Preserves:	vstart, vend, flags
- * Corrupts:	tbl, rtbl, istart, iend, tmp, count, sv
+ * Preserves:	vstart, flags
+ * Corrupts:	tbl, rtbl, vend, istart, iend, tmp, count, sv
  */
 	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
+	sub \vend, \vend, #1
 	add \rtbl, \tbl, #PAGE_SIZE
 	mov \sv, \rtbl
 	mov \count, #0

From e3849765037b85e61b2432ded488ee9fb3ff126d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 12 Aug 2021 20:02:13 +0100
Subject: [PATCH 52/56] arm64: Document the requirement for SCR_EL3.HCE

It is amazing that we never documented this absolutely basic
requirement: if you boot the kernel at EL2, you'd better
enable the HVC instruction from EL3.

Really, just do it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210812190213.2601506-6-maz@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/booting.rst | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 5822d6da9fa6..3f9d86557c5e 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -207,10 +207,17 @@ Before jumping into the kernel, the following conditions must be met:
   software at a higher exception level to prevent execution in an UNKNOWN
   state.
 
-  - SCR_EL3.FIQ must have the same value across all CPUs the kernel is
-    executing on.
-  - The value of SCR_EL3.FIQ must be the same as the one present at boot
-    time whenever the kernel is executing.
+  For all systems:
+  - If EL3 is present:
+
+    - SCR_EL3.FIQ must have the same value across all CPUs the kernel is
+      executing on.
+    - The value of SCR_EL3.FIQ must be the same as the one present at boot
+      time whenever the kernel is executing.
+
+  - If EL3 is present and the kernel is entered at EL2:
+
+    - SCR_EL3.HCE (bit 8) must be initialised to 0b1.
 
   For systems with a GICv3 interrupt controller to be used in v3 mode:
   - If EL3 is present:

From 7559b7d7d651d397debbcd838bd49ec4b9e0a4a4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 24 Aug 2021 16:34:17 +0100
Subject: [PATCH 53/56] arm64/sve: Better handle failure to allocate SVE
 register storage

Currently we "handle" failure to allocate the SVE register storage by
doing a BUG_ON() and hoping for the best. This is obviously not great and
the memory allocation failure will already be loud enough without the
BUG_ON(). As the comment says it is a corner case but let's try to do a bit
better, remove the BUG_ON() and add code to handle the failure in the
callers.

For the ptrace and signal code we can return -ENOMEM gracefully however
we have no real error reporting path available to us for the SVE access
trap so instead generate a SIGKILL if the allocation fails there. This
at least means that we won't try to soldier on and end up trying to
access the nonexistant state and while it's obviously not ideal for
userspace SIGKILL doesn't allow any handling so minimises the ABI
impact, making it easier to improve the interface later if we come up
with a better idea.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210824153417.18371-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 10 ++++------
 arch/arm64/kernel/ptrace.c |  5 +++++
 arch/arm64/kernel/signal.c |  5 +++++
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index eb8d972ad3d2..5a294f20e9de 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -520,12 +520,6 @@ void sve_alloc(struct task_struct *task)
 	/* This is a small allocation (maximum ~8KB) and Should Not Fail. */
 	task->thread.sve_state =
 		kzalloc(sve_state_size(task), GFP_KERNEL);
-
-	/*
-	 * If future SVE revisions can have larger vectors though,
-	 * this may cease to be true:
-	 */
-	BUG_ON(!task->thread.sve_state);
 }
 
 
@@ -945,6 +939,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 	}
 
 	sve_alloc(current);
+	if (!current->thread.sve_state) {
+		force_sig(SIGKILL);
+		return;
+	}
 
 	get_cpu_fpsimd_context();
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 499b6b2f9757..956e2df27180 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -845,6 +845,11 @@ static int sve_set(struct task_struct *target,
 	}
 
 	sve_alloc(target);
+	if (!target->thread.sve_state) {
+		ret = -ENOMEM;
+		clear_tsk_thread_flag(target, TIF_SVE);
+		goto out;
+	}
 
 	/*
 	 * Ensure target->thread.sve_state is up to date with target's
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f8192f4ae0b8..3b4c927d31ff 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -289,6 +289,11 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
 	/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
 
 	sve_alloc(current);
+	if (!current->thread.sve_state) {
+		clear_thread_flag(TIF_SVE);
+		return -ENOMEM;
+	}
+
 	err = __copy_from_user(current->thread.sve_state,
 			       (char __user const *)user->sve +
 					SVE_SIG_REGS_OFFSET,

From 24de5838db7044401c719042e95f646d72a78c49 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 25 Aug 2021 10:39:11 +0100
Subject: [PATCH 54/56] arm64: signal32: Drop pointless call to sigdelsetmask()

Commit 77097ae503b1 ("most of set_current_blocked() callers want
SIGKILL/SIGSTOP removed from set") extended set_current_blocked() to
remove SIGKILL and SIGSTOP from the new signal set and updated all
callers accordingly.

Unfortunately, this collided with the merge of the arm64 architecture,
which duly removes these signals when restoring the compat sigframe, as
this was what was previously done by arch/arm/.

Remove the redundant call to sigdelsetmask() from
compat_restore_sigframe().

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210825093911.24493-1-will@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/signal32.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 2f507f565c48..4850333669fb 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -46,8 +46,6 @@ struct compat_aux_sigframe {
 	unsigned long			end_magic;
 } __attribute__((__aligned__(8)));
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
 {
 	compat_sigset_t	cset;
@@ -190,10 +188,8 @@ static int compat_restore_sigframe(struct pt_regs *regs,
 	unsigned long psr;
 
 	err = get_sigset_t(&set, &sf->uc.uc_sigmask);
-	if (err == 0) {
-		sigdelsetmask(&set, ~_BLOCKABLE);
+	if (err == 0)
 		set_current_blocked(&set);
-	}
 
 	__get_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err);
 	__get_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err);

From 5845e703f9b5f949dc1db4122b7fc6d8563048a2 Mon Sep 17 00:00:00 2001
From: Xujun Leng <lengxujun2007@126.com>
Date: Wed, 25 Aug 2021 23:05:26 +0800
Subject: [PATCH 55/56] arm64: mm: fix comment typo of pud_offset_phys()

Fix a typo in the comment of macro pud_offset_phys().

Signed-off-by: Xujun Leng <lengxujun2007@126.com>
Link: https://lore.kernel.org/r/20210825150526.12582-1-lengxujun2007@126.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index f09bf5c02891..dfa76afa0ccf 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -715,7 +715,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
 	return (pud_t *)__va(p4d_page_paddr(p4d));
 }
 
-/* Find an entry in the frst-level page table. */
+/* Find an entry in the first-level page table. */
 #define pud_offset_phys(dir, addr)	(p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
 
 #define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr))

From 50cb99fa89aa2bec2cab2f9917010bbd7769bfa3 Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Tue, 24 Aug 2021 16:45:23 +0100
Subject: [PATCH 56/56] arm64: Do not trap PMSNEVFR_EL1

Commit 31c00d2aeaa2 ("arm64: Disable fine grained traps on boot") zeroed
the fine grained trap registers to prevent unwanted register traps from
occuring. However, for the PMSNEVFR_EL1 register, the corresponding
HDFG{R,W}TR_EL2.nPMSNEVFR_EL1 fields must be 1 to disable trapping. Set
both fields to 1 if FEAT_SPEv1p2 is detected to disable read and write
traps.

Fixes: 31c00d2aeaa2 ("arm64: Disable fine grained traps on boot")
Cc: <stable@vger.kernel.org> # 5.13.x
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210824154523.906270-1-alexandru.elisei@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/el2_setup.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 21fa330f498d..29bcd791f793 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -150,8 +150,17 @@
 	ubfx	x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4
 	cbz	x1, .Lskip_fgt_\@
 
-	msr_s	SYS_HDFGRTR_EL2, xzr
-	msr_s	SYS_HDFGWTR_EL2, xzr
+	mov	x0, xzr
+	mrs	x1, id_aa64dfr0_el1
+	ubfx	x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+	cmp	x1, #3
+	b.lt	.Lset_fgt_\@
+	/* Disable PMSNEVFR_EL1 read and write traps */
+	orr	x0, x0, #(1 << 62)
+
+.Lset_fgt_\@:
+	msr_s	SYS_HDFGRTR_EL2, x0
+	msr_s	SYS_HDFGWTR_EL2, x0
 	msr_s	SYS_HFGRTR_EL2, xzr
 	msr_s	SYS_HFGWTR_EL2, xzr
 	msr_s	SYS_HFGITR_EL2, xzr