Merge branch 'for-next/mops' into for-next/core

* for-next/mops:
  : More FEAT_MOPS (memcpy instructions) uses - in-kernel routines
  arm64: mops: Document requirements for hypervisors
  arm64: lib: Use MOPS for copy_page() and clear_page()
  arm64: lib: Use MOPS for memcpy() routines
  arm64: mops: Document booting requirement for HCR_EL2.MCE2
  arm64: mops: Handle MOPS exceptions from EL1
  arm64: probes: Disable kprobes/uprobes on MOPS instructions

# Conflicts:
#	arch/arm64/kernel/entry-common.c
This commit is contained in:
Catalin Marinas 2024-11-14 12:07:28 +00:00
commit 437330d90c
15 changed files with 146 additions and 4 deletions

View File

@ -388,6 +388,9 @@ Before jumping into the kernel, the following conditions must be met:
- HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
- HCRX_EL2.MCE2 (bit 10) must be initialised to 0b1 and the hypervisor
must handle MOPS exceptions as described in :ref:`arm64_mops_hyp`.
For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):
- If EL3 is present:

View File

@ -22,6 +22,7 @@ ARM64 Architecture
legacy_instructions
memory
memory-tagging-extension
mops
perf
pointer-authentication
ptdump

View File

@ -0,0 +1,44 @@
.. SPDX-License-Identifier: GPL-2.0
===================================
Memory copy/set instructions (MOPS)
===================================
A MOPS memory copy/set operation consists of three consecutive CPY* or SET*
instructions: a prologue, main and epilogue (for example: CPYP, CPYM, CPYE).
A main or epilogue instruction can take a MOPS exception for various reasons,
for example when a task is migrated to a CPU with a different MOPS
implementation, or when the instruction's alignment and size requirements are
not met. The software exception handler is then expected to reset the registers
and restart execution from the prologue instruction. Normally this is handled
by the kernel.
For more details refer to "D1.3.5.7 Memory Copy and Memory Set exceptions" in
the Arm Architecture Reference Manual DDI 0487K.a (Arm ARM).
.. _arm64_mops_hyp:
Hypervisor requirements
-----------------------
A hypervisor running a Linux guest must handle all MOPS exceptions from the
guest kernel, as Linux may not be able to handle the exception at all times.
For example, a MOPS exception can be taken when the hypervisor migrates a vCPU
to another physical CPU with a different MOPS implementation.
To do this, the hypervisor must:
- Set HCRX_EL2.MCE2 to 1 so that the exception is taken to the hypervisor.
- Have an exception handler that implements the algorithm from the Arm ARM
rules CNTMJ and MWFQH.
- Set the guest's PSTATE.SS to 0 in the exception handler, to handle a
potential step of the current instruction.
Note: Clearing PSTATE.SS is needed so that a single step exception is taken
on the next instruction (the prologue instruction). Otherwise prologue
would get silently stepped over and the single step exception taken on the
main instruction. Note that if the guest instruction is not being stepped
then clearing PSTATE.SS has no effect.

View File

@ -2160,6 +2160,9 @@ config ARM64_EPAN
if the cpu does not implement the feature.
endmenu # "ARMv8.7 architectural features"
config AS_HAS_MOPS
def_bool $(as-instr,.arch_extension mops)
menu "ARMv8.9 architectural features"
config ARM64_POE

View File

@ -105,6 +105,7 @@ void kernel_enable_single_step(struct pt_regs *regs);
void kernel_disable_single_step(void);
int kernel_active_single_step(void);
void kernel_rewind_single_step(struct pt_regs *regs);
void kernel_fastforward_single_step(struct pt_regs *regs);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int reinstall_suspended_bps(struct pt_regs *regs);

View File

@ -75,6 +75,7 @@ void do_el0_svc_compat(struct pt_regs *regs);
void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
void do_el0_mops(struct pt_regs *regs, unsigned long esr);
void do_el1_mops(struct pt_regs *regs, unsigned long esr);
void do_serror(struct pt_regs *regs, unsigned long esr);
void do_signal(struct pt_regs *regs);

View File

@ -353,6 +353,7 @@ __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
__AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000)

View File

@ -440,6 +440,11 @@ void kernel_rewind_single_step(struct pt_regs *regs)
set_regs_spsr_ss(regs);
}
void kernel_fastforward_single_step(struct pt_regs *regs)
{
clear_regs_spsr_ss(regs);
}
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
{

View File

@ -472,6 +472,15 @@ static void noinstr el1_gcs(struct pt_regs *regs, unsigned long esr)
exit_to_kernel_mode(regs);
}
static void noinstr el1_mops(struct pt_regs *regs, unsigned long esr)
{
enter_from_kernel_mode(regs);
local_daif_inherit(regs);
do_el1_mops(regs, esr);
local_daif_mask();
exit_to_kernel_mode(regs);
}
static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@ -517,6 +526,9 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_GCS:
el1_gcs(regs, esr);
break;
case ESR_ELx_EC_MOPS:
el1_mops(regs, esr);
break;
case ESR_ELx_EC_BREAKPT_CUR:
case ESR_ELx_EC_SOFTSTP_CUR:
case ESR_ELx_EC_WATCHPT_CUR:

View File

@ -58,10 +58,13 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* Instructions which load PC relative literals are not going to work
* when executed from an XOL slot. Instructions doing an exclusive
* load/store are not going to complete successfully when single-step
* exception handling happens in the middle of the sequence.
* exception handling happens in the middle of the sequence. Memory
* copy/set instructions require that all three instructions be placed
* consecutively in memory.
*/
if (aarch64_insn_uses_literal(insn) ||
aarch64_insn_is_exclusive(insn))
aarch64_insn_is_exclusive(insn) ||
aarch64_insn_is_mops(insn))
return false;
return true;

View File

@ -541,6 +541,13 @@ void do_el0_mops(struct pt_regs *regs, unsigned long esr)
user_fastforward_single_step(current);
}
void do_el1_mops(struct pt_regs *regs, unsigned long esr)
{
arm64_mops_reset_regs(&regs->user_regs, esr);
kernel_fastforward_single_step(regs);
}
#define __user_cache_maint(insn, address, res) \
if (address >= TASK_SIZE_MAX) { \
res = -EFAULT; \

View File

@ -15,6 +15,19 @@
* x0 - dest
*/
SYM_FUNC_START(__pi_clear_page)
#ifdef CONFIG_AS_HAS_MOPS
.arch_extension mops
alternative_if_not ARM64_HAS_MOPS
b .Lno_mops
alternative_else_nop_endif
mov x1, #PAGE_SIZE
setpn [x0]!, x1!, xzr
setmn [x0]!, x1!, xzr
seten [x0]!, x1!, xzr
ret
.Lno_mops:
#endif
mrs x1, dczid_el0
tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */
and w1, w1, #0xf

View File

@ -18,6 +18,19 @@
* x1 - src
*/
SYM_FUNC_START(__pi_copy_page)
#ifdef CONFIG_AS_HAS_MOPS
.arch_extension mops
alternative_if_not ARM64_HAS_MOPS
b .Lno_mops
alternative_else_nop_endif
mov x2, #PAGE_SIZE
cpypwn [x0]!, [x1]!, x2!
cpymwn [x0]!, [x1]!, x2!
cpyewn [x0]!, [x1]!, x2!
ret
.Lno_mops:
#endif
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
ldp x6, x7, [x1, #32]

View File

@ -57,7 +57,7 @@
The loop tail is handled by always copying 64 bytes from the end.
*/
SYM_FUNC_START(__pi_memcpy)
SYM_FUNC_START_LOCAL(__pi_memcpy_generic)
add srcend, src, count
add dstend, dstin, count
cmp count, 128
@ -238,7 +238,24 @@ L(copy64_from_start):
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstin]
ret
SYM_FUNC_END(__pi_memcpy_generic)
#ifdef CONFIG_AS_HAS_MOPS
.arch_extension mops
SYM_FUNC_START(__pi_memcpy)
alternative_if_not ARM64_HAS_MOPS
b __pi_memcpy_generic
alternative_else_nop_endif
mov dst, dstin
cpyp [dst]!, [src]!, count!
cpym [dst]!, [src]!, count!
cpye [dst]!, [src]!, count!
ret
SYM_FUNC_END(__pi_memcpy)
#else
SYM_FUNC_ALIAS(__pi_memcpy, __pi_memcpy_generic)
#endif
SYM_FUNC_ALIAS(__memcpy, __pi_memcpy)
EXPORT_SYMBOL(__memcpy)

View File

@ -26,6 +26,7 @@
*/
dstin .req x0
val_x .req x1
val .req w1
count .req x2
tmp1 .req x3
@ -42,7 +43,7 @@ dst .req x8
tmp3w .req w9
tmp3 .req x9
SYM_FUNC_START(__pi_memset)
SYM_FUNC_START_LOCAL(__pi_memset_generic)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
orr A_lw, A_lw, A_lw, lsl #8
@ -201,7 +202,24 @@ SYM_FUNC_START(__pi_memset)
ands count, count, zva_bits_x
b.ne .Ltail_maybe_long
ret
SYM_FUNC_END(__pi_memset_generic)
#ifdef CONFIG_AS_HAS_MOPS
.arch_extension mops
SYM_FUNC_START(__pi_memset)
alternative_if_not ARM64_HAS_MOPS
b __pi_memset_generic
alternative_else_nop_endif
mov dst, dstin
setp [dst]!, count!, val_x
setm [dst]!, count!, val_x
sete [dst]!, count!, val_x
ret
SYM_FUNC_END(__pi_memset)
#else
SYM_FUNC_ALIAS(__pi_memset, __pi_memset_generic)
#endif
SYM_FUNC_ALIAS(__memset, __pi_memset)
EXPORT_SYMBOL(__memset)