linux/arch/x86/lib/copy_mc_64.S

/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */

#include <linux/linkage.h>
#include <asm/asm.h>

#ifndef CONFIG_UML

#ifdef CONFIG_X86_MCE

/*
 * copy_mc_fragile - copy memory with indication if an exception / fault happened
 *
 * The 'fragile' version is opted into by platform quirks and takes
 * pains to avoid unrecoverable corner cases like 'fast-string'
 * instruction sequences, and consuming poison across a cacheline
 * boundary. The non-fragile version is equivalent to memcpy()
 * regardless of CPU machine-check-recovery capability.
 */
SYM_FUNC_START(copy_mc_fragile)
	cmpl $8, %edx
	/* Less than 8 bytes? Go to byte copy loop */
	jb .L_no_whole_words

	/* Check for bad alignment of source */
	testl $7, %esi
	/* Already aligned */
	jz .L_8byte_aligned

	/* Copy one byte at a time until source is 8-byte aligned */
	movl %esi, %ecx
	andl $7, %ecx
	subl $8, %ecx
	negl %ecx
	subl %ecx, %edx
.L_read_leading_bytes:
	movb (%rsi), %al
.L_write_leading_bytes:
	movb %al, (%rdi)
	incq %rsi
	incq %rdi
	decl %ecx
	jnz .L_read_leading_bytes

.L_8byte_aligned:
	movl %edx, %ecx
	andl $7, %edx
	shrl $3, %ecx
	jz .L_no_whole_words

.L_read_words:
	movq (%rsi), %r8
.L_write_words:
	movq %r8, (%rdi)
	addq $8, %rsi
	addq $8, %rdi
	decl %ecx
	jnz .L_read_words

	/* Any trailing bytes? */
.L_no_whole_words:
	andl %edx, %edx
	jz .L_done_memcpy_trap

	/* Copy trailing bytes */
	movl %edx, %ecx
.L_read_trailing_bytes:
	movb (%rsi), %al
.L_write_trailing_bytes:
	movb %al, (%rdi)
	incq %rsi
	incq %rdi
	decl %ecx
	jnz .L_read_trailing_bytes

	/* Copy successful. Return zero */
.L_done_memcpy_trap:
	xorl %eax, %eax
.L_done:
	ret
SYM_FUNC_END(copy_mc_fragile)

	.section .fixup, "ax"
	/*
	 * Return number of bytes not copied for any failure. Note that
	 * there is no "tail" handling since the source buffer is 8-byte
	 * aligned and poison is cacheline aligned.
	 */
.E_read_words:
	shll	$3, %ecx
.E_leading_bytes:
	addl	%edx, %ecx
.E_trailing_bytes:
	mov	%ecx, %eax
	jmp	.L_done

	/*
	 * For write fault handling, given the destination is unaligned,
	 * we handle faults on multi-byte writes with a byte-by-byte
	 * copy up to the write-protected page.
	 */
.E_write_words:
	shll	$3, %ecx
	addl	%edx, %ecx
	movl	%ecx, %edx
	jmp copy_mc_fragile_handle_tail

	.previous

	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
	_ASM_EXTABLE(.L_write_words, .E_write_words)
	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif /* CONFIG_X86_MCE */

/*
 * copy_mc_enhanced_fast_string - memory copy with exception handling
 *
 * Fast string copy + fault / exception handling. If the CPU does
 * support machine check exception recovery, but does not support
 * recovering from fast-string exceptions then this CPU needs to be
 * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
 * machine check recovery support this version should be no slower than
 * standard memcpy.
 */
SYM_FUNC_START(copy_mc_enhanced_fast_string)
	movq %rdi, %rax
	movq %rdx, %rcx
.L_copy:
	rep movsb
	/* Copy successful. Return zero */
	xorl %eax, %eax
	ret
SYM_FUNC_END(copy_mc_enhanced_fast_string)

	.section .fixup, "ax"
.E_copy:
	/*
	 * On fault %rcx is updated such that the copy instruction could
	 * optionally be restarted at the fault position, i.e. it
	 * contains 'bytes remaining'. A non-zero return indicates error
	 * to copy_mc_generic() users, or indicate short transfers to
	 * user-copy routines.
	 */
	movq %rcx, %rax
	ret

	.previous

	_ASM_EXTABLE_FAULT(.L_copy, .E_copy)
#endif /* !CONFIG_UML */
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}() In reaction to a proposal to introduce a memcpy_mcsafe_fast() implementation Linus points out that memcpy_mcsafe() is poorly named relative to communicating the scope of the interface. Specifically what addresses are valid to pass as source, destination, and what faults / exceptions are handled. Of particular concern is that even though x86 might be able to handle the semantics of copy_mc_to_user() with its common copy_user_generic() implementation other archs likely need / want an explicit path for this case: On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote: > > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote: > > > > However now I see that copy_user_generic() works for the wrong reason. > > It works because the exception on the source address due to poison > > looks no different than a write fault on the user address to the > > caller, it's still just a short copy. So it makes copy_to_user() work > > for the wrong reason relative to the name. > > Right. > > And it won't work that way on other architectures. On x86, we have a > generic function that can take faults on either side, and we use it > for both cases (and for the "in_user" case too), but that's an > artifact of the architecture oddity. > > In fact, it's probably wrong even on x86 - because it can hide bugs - > but writing those things is painful enough that everybody prefers > having just one function. Replace a single top-level memcpy_mcsafe() with either copy_mc_to_user(), or copy_mc_to_kernel(). Introduce an x86 copy_mc_fragile() name as the rename for the low-level x86 implementation formerly named memcpy_mcsafe(). It is used as the slow / careful backend that is supplanted by a fast copy_mc_generic() in a follow-on patch. One side-effect of this reorganization is that separating copy_mc_64.S to its own file means that perf no longer needs to track dependencies for its memcpy_64.S benchmarks. [ bp: Massage a bit. ] Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Tony Luck <tony.luck@intel.com> Acked-by: Michael Ellerman <mpe@ellerman.id.au> Cc: <stable@vger.kernel.org> Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com 2020-10-06 03:40:16 +00:00			`/* SPDX-License-Identifier: GPL-2.0-only */`
			`/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */`

			`#include <linux/linkage.h>`
			`#include <asm/asm.h>`

			`#ifndef CONFIG_UML`

			`#ifdef CONFIG_X86_MCE`

			`/*`
			`* copy_mc_fragile - copy memory with indication if an exception / fault happened`
			`*`
			`* The 'fragile' version is opted into by platform quirks and takes`
			`* pains to avoid unrecoverable corner cases like 'fast-string'`
			`* instruction sequences, and consuming poison across a cacheline`
			`* boundary. The non-fragile version is equivalent to memcpy()`
			`* regardless of CPU machine-check-recovery capability.`
			`*/`
			`SYM_FUNC_START(copy_mc_fragile)`
			`cmpl $8, %edx`
			`/* Less than 8 bytes? Go to byte copy loop */`
			`jb .L_no_whole_words`

			`/* Check for bad alignment of source */`
			`testl $7, %esi`
			`/* Already aligned */`
			`jz .L_8byte_aligned`

			`/* Copy one byte at a time until source is 8-byte aligned */`
			`movl %esi, %ecx`
			`andl $7, %ecx`
			`subl $8, %ecx`
			`negl %ecx`
			`subl %ecx, %edx`
			`.L_read_leading_bytes:`
			`movb (%rsi), %al`
			`.L_write_leading_bytes:`
			`movb %al, (%rdi)`
			`incq %rsi`
			`incq %rdi`
			`decl %ecx`
			`jnz .L_read_leading_bytes`

			`.L_8byte_aligned:`
			`movl %edx, %ecx`
			`andl $7, %edx`
			`shrl $3, %ecx`
			`jz .L_no_whole_words`

			`.L_read_words:`
			`movq (%rsi), %r8`
			`.L_write_words:`
			`movq %r8, (%rdi)`
			`addq $8, %rsi`
			`addq $8, %rdi`
			`decl %ecx`
			`jnz .L_read_words`

			`/* Any trailing bytes? */`
			`.L_no_whole_words:`
			`andl %edx, %edx`
			`jz .L_done_memcpy_trap`

			`/* Copy trailing bytes */`
			`movl %edx, %ecx`
			`.L_read_trailing_bytes:`
			`movb (%rsi), %al`
			`.L_write_trailing_bytes:`
			`movb %al, (%rdi)`
			`incq %rsi`
			`incq %rdi`
			`decl %ecx`
			`jnz .L_read_trailing_bytes`

			`/* Copy successful. Return zero */`
			`.L_done_memcpy_trap:`
			`xorl %eax, %eax`
			`.L_done:`
			`ret`
			`SYM_FUNC_END(copy_mc_fragile)`

			`.section .fixup, "ax"`
			`/*`
			`* Return number of bytes not copied for any failure. Note that`
			`* there is no "tail" handling since the source buffer is 8-byte`
			`* aligned and poison is cacheline aligned.`
			`*/`
			`.E_read_words:`
			`shll $3, %ecx`
			`.E_leading_bytes:`
			`addl %edx, %ecx`
			`.E_trailing_bytes:`
			`mov %ecx, %eax`
			`jmp .L_done`

			`/*`
			`* For write fault handling, given the destination is unaligned,`
			`* we handle faults on multi-byte writes with a byte-by-byte`
			`* copy up to the write-protected page.`
			`*/`
			`.E_write_words:`
			`shll $3, %ecx`
			`addl %edx, %ecx`
			`movl %ecx, %edx`
			`jmp copy_mc_fragile_handle_tail`

			`.previous`

			`_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)`
			`_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)`
			`_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)`
			`_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)`
			`_ASM_EXTABLE(.L_write_words, .E_write_words)`
			`_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)`
			`#endif /* CONFIG_X86_MCE */`
x86/copy_mc: Introduce copy_mc_enhanced_fast_string() The motivations to go rework memcpy_mcsafe() are that the benefit of doing slow and careful copies is obviated on newer CPUs, and that the current opt-in list of CPUs to instrument recovery is broken relative to those CPUs. There is no need to keep an opt-in list up to date on an ongoing basis if pmem/dax operations are instrumented for recovery by default. With recovery enabled by default the old "mcsafe_key" opt-in to careful copying can be made a "fragile" opt-out. Where the "fragile" list takes steps to not consume poison across cachelines. The discussion with Linus made clear that the current "_mcsafe" suffix was imprecise to a fault. The operations that are needed by pmem/dax are to copy from a source address that might throw #MC to a destination that may write-fault, if it is a user page. So copy_to_user_mcsafe() becomes copy_mc_to_user() to indicate the separate precautions taken on source and destination. copy_mc_to_kernel() is introduced as a non-SMAP version that does not expect write-faults on the destination, but is still prepared to abort with an error code upon taking #MC. The original copy_mc_fragile() implementation had negative performance implications since it did not use the fast-string instruction sequence to perform copies. For this reason copy_mc_to_kernel() fell back to plain memcpy() to preserve performance on platforms that did not indicate the capability to recover from machine check exceptions. However, that capability detection was not architectural and now that some platforms can recover from fast-string consumption of memory errors the memcpy() fallback now causes these more capable platforms to fail. Introduce copy_mc_enhanced_fast_string() as the fast default implementation of copy_mc_to_kernel() and finalize the transition of copy_mc_fragile() to be a platform quirk to indicate 'copy-carefully'. With this in place, copy_mc_to_kernel() is fast and recovery-ready by default regardless of hardware capability. Thanks to Vivek for identifying that copy_user_generic() is not suitable as the copy_mc_to_user() backend since the #MC handler explicitly checks ex_has_fault_handler(). Thanks to the 0day robot for catching a performance bug in the x86/copy_mc_to_user implementation. [ bp: Add the "why" for this change from the 0/2th message, massage. ] Fixes: 92b0729c34ca ("x86/mm, x86/mce: Add memcpy_mcsafe()") Reported-by: Erwin Tsaur <erwin.tsaur@intel.com> Reported-by: 0day robot <lkp@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Tony Luck <tony.luck@intel.com> Tested-by: Erwin Tsaur <erwin.tsaur@intel.com> Cc: <stable@vger.kernel.org> Link: https://lkml.kernel.org/r/160195562556.2163339.18063423034951948973.stgit@dwillia2-desk3.amr.corp.intel.com 2020-10-06 03:40:25 +00:00
			`/*`
			`* copy_mc_enhanced_fast_string - memory copy with exception handling`
			`*`
			`* Fast string copy + fault / exception handling. If the CPU does`
			`* support machine check exception recovery, but does not support`
			`* recovering from fast-string exceptions then this CPU needs to be`
			`* added to the copy_mc_fragile_key set of quirks. Otherwise, absent any`
			`* machine check recovery support this version should be no slower than`
			`* standard memcpy.`
			`*/`
			`SYM_FUNC_START(copy_mc_enhanced_fast_string)`
			`movq %rdi, %rax`
			`movq %rdx, %rcx`
			`.L_copy:`
			`rep movsb`
			`/* Copy successful. Return zero */`
			`xorl %eax, %eax`
			`ret`
			`SYM_FUNC_END(copy_mc_enhanced_fast_string)`

			`.section .fixup, "ax"`
			`.E_copy:`
			`/*`
			`* On fault %rcx is updated such that the copy instruction could`
			`* optionally be restarted at the fault position, i.e. it`
			`* contains 'bytes remaining'. A non-zero return indicates error`
			`* to copy_mc_generic() users, or indicate short transfers to`
			`* user-copy routines.`
			`*/`
			`movq %rcx, %rax`
			`ret`

			`.previous`

			`_ASM_EXTABLE_FAULT(.L_copy, .E_copy)`
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}() In reaction to a proposal to introduce a memcpy_mcsafe_fast() implementation Linus points out that memcpy_mcsafe() is poorly named relative to communicating the scope of the interface. Specifically what addresses are valid to pass as source, destination, and what faults / exceptions are handled. Of particular concern is that even though x86 might be able to handle the semantics of copy_mc_to_user() with its common copy_user_generic() implementation other archs likely need / want an explicit path for this case: On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote: > > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote: > > > > However now I see that copy_user_generic() works for the wrong reason. > > It works because the exception on the source address due to poison > > looks no different than a write fault on the user address to the > > caller, it's still just a short copy. So it makes copy_to_user() work > > for the wrong reason relative to the name. > > Right. > > And it won't work that way on other architectures. On x86, we have a > generic function that can take faults on either side, and we use it > for both cases (and for the "in_user" case too), but that's an > artifact of the architecture oddity. > > In fact, it's probably wrong even on x86 - because it can hide bugs - > but writing those things is painful enough that everybody prefers > having just one function. Replace a single top-level memcpy_mcsafe() with either copy_mc_to_user(), or copy_mc_to_kernel(). Introduce an x86 copy_mc_fragile() name as the rename for the low-level x86 implementation formerly named memcpy_mcsafe(). It is used as the slow / careful backend that is supplanted by a fast copy_mc_generic() in a follow-on patch. One side-effect of this reorganization is that separating copy_mc_64.S to its own file means that perf no longer needs to track dependencies for its memcpy_64.S benchmarks. [ bp: Massage a bit. ] Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Tony Luck <tony.luck@intel.com> Acked-by: Michael Ellerman <mpe@ellerman.id.au> Cc: <stable@vger.kernel.org> Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com 2020-10-06 03:40:16 +00:00			`#endif /* !CONFIG_UML */`