From da7bc9c57eb0e91e048d05f7dbe5014a8b81ccfa Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 May 2018 17:06:11 -0700 Subject: [PATCH 01/10] x86/asm/memcpy_mcsafe: Remove loop unrolling In preparation for teaching memcpy_mcsafe() to return 'bytes remaining' rather than pass / fail, simplify the implementation to remove loop unrolling. The unrolling complicates the fault handling for negligible benefit given modern CPUs perform loop stream detection. Suggested-by: Linus Torvalds Signed-off-by: Dan Williams Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: hch@lst.de Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/152539237092.31796.9115692316555638048.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/string_64.h | 4 +-- arch/x86/lib/memcpy_64.S | 59 ++++++-------------------------- 2 files changed, 12 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 533f74c300c2..4752f8984923 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,7 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); +__must_check int __memcpy_mcsafe(void *dst, const void *src, size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -138,7 +138,7 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE if (static_branch_unlikely(&mcsafe_key)) - return memcpy_mcsafe_unrolled(dst, src, cnt); + return __memcpy_mcsafe(dst, src, cnt); else #endif memcpy(dst, src, cnt); diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 9a53a06e5a3e..54c971892db5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -184,11 +184,11 @@ ENDPROC(memcpy_orig) #ifndef CONFIG_UML /* - * memcpy_mcsafe_unrolled - memory copy with machine check exception handling + * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe_unrolled) +ENTRY(__memcpy_mcsafe) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -213,49 +213,18 @@ ENTRY(memcpy_mcsafe_unrolled) jnz .L_copy_leading_bytes .L_8byte_aligned: - /* Figure out how many whole cache lines (64-bytes) to copy */ - movl %edx, %ecx - andl $63, %edx - shrl $6, %ecx - jz .L_no_whole_cache_lines - - /* Loop copying whole cache lines */ -.L_cache_w0: movq (%rsi), %r8 -.L_cache_w1: movq 1*8(%rsi), %r9 -.L_cache_w2: movq 2*8(%rsi), %r10 -.L_cache_w3: movq 3*8(%rsi), %r11 - movq %r8, (%rdi) - movq %r9, 1*8(%rdi) - movq %r10, 2*8(%rdi) - movq %r11, 3*8(%rdi) -.L_cache_w4: movq 4*8(%rsi), %r8 -.L_cache_w5: movq 5*8(%rsi), %r9 -.L_cache_w6: movq 6*8(%rsi), %r10 -.L_cache_w7: movq 7*8(%rsi), %r11 - movq %r8, 4*8(%rdi) - movq %r9, 5*8(%rdi) - movq %r10, 6*8(%rdi) - movq %r11, 7*8(%rdi) - leaq 64(%rsi), %rsi - leaq 64(%rdi), %rdi - decl %ecx - jnz .L_cache_w0 - - /* Are there any trailing 8-byte words? */ -.L_no_whole_cache_lines: movl %edx, %ecx andl $7, %edx shrl $3, %ecx jz .L_no_whole_words - /* Copy trailing words */ -.L_copy_trailing_words: +.L_copy_words: movq (%rsi), %r8 - mov %r8, (%rdi) - leaq 8(%rsi), %rsi - leaq 8(%rdi), %rdi + movq %r8, (%rdi) + addq $8, %rsi + addq $8, %rdi decl %ecx - jnz .L_copy_trailing_words + jnz .L_copy_words /* Any trailing bytes? */ .L_no_whole_words: @@ -276,8 +245,8 @@ ENTRY(memcpy_mcsafe_unrolled) .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe_unrolled) -EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) +ENDPROC(__memcpy_mcsafe) +EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" /* Return -EFAULT for any failure */ @@ -288,14 +257,6 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) .previous _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) #endif From bd131544aa7e318a5735cbcbad46c4a5ee6b9d42 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 May 2018 17:06:16 -0700 Subject: [PATCH 02/10] x86/asm/memcpy_mcsafe: Add labels for __memcpy_mcsafe() write fault handling The memcpy_mcsafe() implementation handles CPU exceptions when reading from the source address. Before it can be used for user copies it needs to grow support for handling write faults. In preparation for adding that exception handling update the labels for the read cache word X case (.L_cache_rX) and write cache word X case (.L_cache_wX). Reported-by: Tony Luck Signed-off-by: Dan Williams Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hch@lst.de Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/152539237606.31796.6719743548991782264.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- arch/x86/lib/memcpy_64.S | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 54c971892db5..5709f3ec22a4 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -204,13 +204,14 @@ ENTRY(__memcpy_mcsafe) subl $8, %ecx negl %ecx subl %ecx, %edx -.L_copy_leading_bytes: +.L_read_leading_bytes: movb (%rsi), %al +.L_write_leading_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_leading_bytes + jnz .L_read_leading_bytes .L_8byte_aligned: movl %edx, %ecx @@ -218,13 +219,14 @@ ENTRY(__memcpy_mcsafe) shrl $3, %ecx jz .L_no_whole_words -.L_copy_words: +.L_read_words: movq (%rsi), %r8 +.L_write_words: movq %r8, (%rdi) addq $8, %rsi addq $8, %rdi decl %ecx - jnz .L_copy_words + jnz .L_read_words /* Any trailing bytes? */ .L_no_whole_words: @@ -233,13 +235,14 @@ ENTRY(__memcpy_mcsafe) /* Copy trailing bytes */ movl %edx, %ecx -.L_copy_trailing_bytes: +.L_read_trailing_bytes: movb (%rsi), %al +.L_write_trailing_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_trailing_bytes + jnz .L_read_trailing_bytes /* Copy successful. Return zero */ .L_done_memcpy_trap: @@ -256,7 +259,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .previous - _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_words, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .L_memcpy_mcsafe_fail) #endif From 60622d68227d6d71fdfba5fb39f7f3d44cdd8815 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 May 2018 17:06:21 -0700 Subject: [PATCH 03/10] x86/asm/memcpy_mcsafe: Return bytes remaining Machine check safe memory copies are currently deployed in the pmem driver whenever reading from persistent memory media, so that -EIO is returned rather than triggering a kernel panic. While this protects most pmem accesses, it is not complete in the filesystem-dax case. When filesystem-dax is enabled reads may bypass the block layer and the driver via dax_iomap_actor() and its usage of copy_to_iter(). In preparation for creating a copy_to_iter() variant that can handle machine checks, teach memcpy_mcsafe() to return the number of bytes remaining rather than -EFAULT when an exception occurs. Co-developed-by: Tony Luck Signed-off-by: Dan Williams Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: hch@lst.de Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/152539238119.31796.14318473522414462886.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/string_64.h | 8 +++++--- arch/x86/lib/memcpy_64.S | 20 ++++++++++++++------ drivers/nvdimm/claim.c | 3 ++- drivers/nvdimm/pmem.c | 6 +++--- include/linux/string.h | 4 ++-- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 4752f8984923..d33f92b9fa22 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int __memcpy_mcsafe(void *dst, const void *src, size_t cnt); +__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, + size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -131,9 +132,10 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key); * actually do machine check recovery. Everyone else can just * use memcpy(). * - * Return 0 for success, -EFAULT for fail + * Return 0 for success, or number of bytes not copied if there was an + * exception. */ -static __always_inline __must_check int +static __always_inline __must_check unsigned long memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 5709f3ec22a4..f01a88391c98 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -252,14 +252,22 @@ ENDPROC(__memcpy_mcsafe) EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" - /* Return -EFAULT for any failure */ -.L_memcpy_mcsafe_fail: - mov $-EFAULT, %rax + /* + * Return number of bytes not copied for any failure. Note that + * there is no "tail" handling since the source buffer is 8-byte + * aligned and poison is cacheline aligned. + */ +.E_read_words: + shll $3, %ecx +.E_leading_bytes: + addl %edx, %ecx +.E_trailing_bytes: + mov %ecx, %eax ret .previous - _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_read_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) #endif diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 30852270484f..2e96b34bc936 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -276,7 +276,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, if (rw == READ) { if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) return -EIO; - return memcpy_mcsafe(buf, nsio->addr + offset, size); + if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0) + return -EIO; } if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 9d714926ecf5..e023d6aa22b5 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -101,15 +101,15 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, void *pmem_addr, unsigned int len) { unsigned int chunk; - int rc; + unsigned long rem; void *mem; while (len) { mem = kmap_atomic(page); chunk = min_t(unsigned int, len, PAGE_SIZE); - rc = memcpy_mcsafe(mem + off, pmem_addr, chunk); + rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); kunmap_atomic(mem); - if (rc) + if (rem) return BLK_STS_IOERR; len -= chunk; off = 0; diff --git a/include/linux/string.h b/include/linux/string.h index dd39a690c841..4a5a0eb7df51 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -147,8 +147,8 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif #ifndef __HAVE_ARCH_MEMCPY_MCSAFE -static inline __must_check int memcpy_mcsafe(void *dst, const void *src, - size_t cnt) +static inline __must_check unsigned long memcpy_mcsafe(void *dst, + const void *src, size_t cnt) { memcpy(dst, src, cnt); return 0; From 12c89130a56ae8e8d85db753d70333c4ee0ea835 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 May 2018 17:06:26 -0700 Subject: [PATCH 04/10] x86/asm/memcpy_mcsafe: Add write-protection-fault handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for using memcpy_mcsafe() to handle user copies it needs to be to handle write-protection faults while writing user pages. Add MMU-fault handlers alongside the machine-check exception handlers. Note that the machine check fault exception handling makes assumptions about source buffer alignment and poison alignment. In the write fault case, given the destination buffer is arbitrarily aligned, it needs a separate / additional fault handling approach. The mcsafe_handle_tail() helper is reused. The @limit argument is set to @len since there is no safety concern about retriggering an MMU fault, and this simplifies the assembly. Co-developed-by: Tony Luck Reported-by: Mika Penttilä Signed-off-by: Dan Williams Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: hch@lst.de Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/152539238635.31796.14056325365122961778.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_64.h | 3 +++ arch/x86/lib/memcpy_64.S | 14 ++++++++++++++ arch/x86/lib/usercopy_64.c | 21 +++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 62546b3a398e..c63efc07891f 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -194,4 +194,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) unsigned long copy_user_handle_tail(char *to, char *from, unsigned len); +unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index f01a88391c98..c3b527a9f95d 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -265,9 +265,23 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe) mov %ecx, %eax ret + /* + * For write fault handling, given the destination is unaligned, + * we handle faults on multi-byte writes with a byte-by-byte + * copy up to the write-protected page. + */ +.E_write_words: + shll $3, %ecx + addl %edx, %ecx + movl %ecx, %edx + jmp mcsafe_handle_tail + .previous _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) #endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 75d3776123cc..7ebc9901dd05 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -75,6 +75,27 @@ copy_user_handle_tail(char *to, char *from, unsigned len) return len; } +/* + * Similar to copy_user_handle_tail, probe for the write fault point, + * but reuse __memcpy_mcsafe in case a new read error is encountered. + * clac() is handled in _copy_to_iter_mcsafe(). + */ +__visible unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++, from++) { + /* + * Call the assembly routine back directly since + * memcpy_mcsafe() may silently fallback to memcpy. + */ + unsigned long rem = __memcpy_mcsafe(to, from, 1); + + if (rem) + break; + } + return len; +} + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB From 8780356ef630aa577fd4daa49e49b79674711fae Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 May 2018 17:06:31 -0700 Subject: [PATCH 05/10] x86/asm/memcpy_mcsafe: Define copy_to_iter_mcsafe() Use the updated memcpy_mcsafe() implementation to define copy_user_mcsafe() and copy_to_iter_mcsafe(). The most significant difference from typical copy_to_iter() is that the ITER_KVEC and ITER_BVEC iterator types can fail to complete a full transfer. Signed-off-by: Dan Williams Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: hch@lst.de Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/152539239150.31796.9189779163576449784.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/include/asm/uaccess_64.h | 11 ++++++ include/linux/uio.h | 15 ++++++++ lib/iov_iter.c | 61 +++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..6ca22706cd64 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 + select ARCH_HAS_UACCESS_MCSAFE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c63efc07891f..62acb613114b 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -46,6 +46,17 @@ copy_user_generic(void *to, const void *from, unsigned len) return ret; } +static __always_inline __must_check unsigned long +copy_to_user_mcsafe(void *to, const void *from, unsigned len) +{ + unsigned long ret; + + __uaccess_begin(); + ret = memcpy_mcsafe(to, from, len); + __uaccess_end(); + return ret; +} + static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { diff --git a/include/linux/uio.h b/include/linux/uio.h index e67e12adb136..f5766e853a77 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -154,6 +154,12 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #define _copy_from_iter_flushcache _copy_from_iter_nocache #endif +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i); +#else +#define _copy_to_iter_mcsafe _copy_to_iter +#endif + static __always_inline __must_check size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { @@ -163,6 +169,15 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) return _copy_from_iter_flushcache(addr, bytes, i); } +static __always_inline __must_check +size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, false))) + return 0; + else + return _copy_to_iter_mcsafe(addr, bytes, i); +} + size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 970212670b6a..70ebc8ede143 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(_copy_to_iter); +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +static int copyout_mcsafe(void __user *to, const void *from, size_t n) +{ + if (access_ok(VERIFY_WRITE, to, n)) { + kasan_check_read(from, n); + n = copy_to_user_mcsafe((__force void *) to, from, n); + } + return n; +} + +static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, + const char *from, size_t len) +{ + unsigned long ret; + char *to; + + to = kmap_atomic(page); + ret = memcpy_mcsafe(to + offset, from, len); + kunmap_atomic(to); + + return ret; +} + +size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) +{ + const char *from = addr; + unsigned long rem, curr_addr, s_addr = (unsigned long) addr; + + if (unlikely(i->type & ITER_PIPE)) { + WARN_ON(1); + return 0; + } + if (iter_is_iovec(i)) + might_fault(); + iterate_and_advance(i, bytes, v, + copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), + ({ + rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; + return bytes; + } + }), + ({ + rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, + v.iov_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; + return bytes; + } + }) + ) + + return bytes; +} +EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); +#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ + size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; From 522239b445a2de988edb81672963708a6aaf9046 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 22 May 2018 23:17:03 -0700 Subject: [PATCH 06/10] uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation Add a common Kconfig CONFIG_ARCH_HAS_UACCESS_MCSAFE that archs can optionally select, and fixup the declaration of _copy_to_iter_mcsafe(). Fixes: 8780356ef630 ("x86/asm/memcpy_mcsafe: Define copy_to_iter_mcsafe()") Signed-off-by: Dan Williams --- include/linux/uio.h | 2 +- lib/Kconfig | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index f5766e853a77..409c845d4cd3 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -155,7 +155,7 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #endif #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE -size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i); +size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i); #else #define _copy_to_iter_mcsafe _copy_to_iter #endif diff --git a/lib/Kconfig b/lib/Kconfig index 5fe577673b98..907f6e4f1cf2 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -586,6 +586,9 @@ config ARCH_HAS_PMEM_API config ARCH_HAS_UACCESS_FLUSHCACHE bool +config ARCH_HAS_UACCESS_MCSAFE + bool + config STACKDEPOT bool select STACKTRACE From b3a9a0c36e1f7b9e2e6cf965c2bb973624f2b3b9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 2 May 2018 06:46:33 -0700 Subject: [PATCH 07/10] dax: Introduce a ->copy_to_iter dax operation Similar to the ->copy_from_iter() operation, a platform may want to deploy an architecture or device specific routine for handling reads from a dax_device like /dev/pmemX. On x86 this routine will point to a machine check safe version of copy_to_iter(). For now, add the plumbing to device-mapper and the dax core. Cc: Ross Zwisler Cc: Mike Snitzer Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/dax/super.c | 10 ++++++++++ drivers/md/dm-linear.c | 16 ++++++++++++++++ drivers/md/dm-log-writes.c | 15 +++++++++++++++ drivers/md/dm-stripe.c | 21 +++++++++++++++++++++ drivers/md/dm.c | 25 +++++++++++++++++++++++++ drivers/nvdimm/pmem.c | 7 +++++++ drivers/s390/block/dcssblk.c | 7 +++++++ fs/dax.c | 3 ++- include/linux/dax.h | 5 +++++ include/linux/device-mapper.h | 5 +++-- 10 files changed, 111 insertions(+), 3 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 2b2332b605e4..31b839113399 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -282,6 +282,16 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, } EXPORT_SYMBOL_GPL(dax_copy_from_iter); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) +{ + if (!dax_alive(dax_dev)) + return 0; + + return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} +EXPORT_SYMBOL_GPL(dax_copy_to_iter); + #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 775c06d953b7..d10964d41fd7 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -185,9 +185,24 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct linear_c *lc = ti->private; + struct block_device *bdev = lc->dev->bdev; + struct dax_device *dax_dev = lc->dev->dax_dev; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + + dev_sector = linear_map_sector(ti, sector); + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} + #else #define linear_dax_direct_access NULL #define linear_dax_copy_from_iter NULL +#define linear_dax_copy_to_iter NULL #endif static struct target_type linear_target = { @@ -204,6 +219,7 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, + .dax_copy_to_iter = linear_dax_copy_to_iter, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index c90c7c08a77f..9ea2b0291f20 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -962,9 +962,23 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, dax_copy: return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); } + +static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, + pgoff_t pgoff, void *addr, size_t bytes, + struct iov_iter *i) +{ + struct log_writes_c *lc = ti->private; + sector_t sector = pgoff * PAGE_SECTORS; + + if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); +} + #else #define log_writes_dax_direct_access NULL #define log_writes_dax_copy_from_iter NULL +#define log_writes_dax_copy_to_iter NULL #endif static struct target_type log_writes_target = { @@ -982,6 +996,7 @@ static struct target_type log_writes_target = { .io_hints = log_writes_io_hints, .direct_access = log_writes_dax_direct_access, .dax_copy_from_iter = log_writes_dax_copy_from_iter, + .dax_copy_to_iter = log_writes_dax_copy_to_iter, }; static int __init dm_log_writes_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index fe7fb9b1aec3..8547d7594338 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -354,9 +354,29 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct stripe_c *sc = ti->private; + struct dax_device *dax_dev; + struct block_device *bdev; + uint32_t stripe; + + stripe_map_sector(sc, sector, &stripe, &dev_sector); + dev_sector += sc->stripe[stripe].physical_start; + dax_dev = sc->stripe[stripe].dev->dax_dev; + bdev = sc->stripe[stripe].dev->bdev; + + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} + #else #define stripe_dax_direct_access NULL #define stripe_dax_copy_from_iter NULL +#define stripe_dax_copy_to_iter NULL #endif /* @@ -478,6 +498,7 @@ static struct target_type stripe_target = { .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, .dax_copy_from_iter = stripe_dax_copy_from_iter, + .dax_copy_to_iter = stripe_dax_copy_to_iter, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0a7b0107ca78..6752f1c25258 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1089,6 +1089,30 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct mapped_device *md = dax_get_private(dax_dev); + sector_t sector = pgoff * PAGE_SECTORS; + struct dm_target *ti; + long ret = 0; + int srcu_idx; + + ti = dm_dax_get_live_target(md, sector, &srcu_idx); + + if (!ti) + goto out; + if (!ti->type->dax_copy_to_iter) { + ret = copy_to_iter(addr, bytes, i); + goto out; + } + ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i); + out: + dm_put_live_table(md, srcu_idx); + + return ret; +} + /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET. @@ -3134,6 +3158,7 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, .copy_from_iter = dm_dax_copy_from_iter, + .copy_to_iter = dm_dax_copy_to_iter, }; /* diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index e023d6aa22b5..1b8ab48365de 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -264,9 +264,16 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return copy_from_iter_flushcache(addr, bytes, i); } +static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter(addr, bytes, i); +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, .copy_from_iter = pmem_copy_from_iter, + .copy_to_iter = pmem_copy_to_iter, }; static const struct attribute_group *pmem_attribute_groups[] = { diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 0a312e450207..29024492b8ed 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -51,9 +51,16 @@ static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev, return copy_from_iter(addr, bytes, i); } +static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter(addr, bytes, i); +} + static const struct dax_operations dcssblk_dax_ops = { .direct_access = dcssblk_dax_direct_access, .copy_from_iter = dcssblk_dax_copy_from_iter, + .copy_to_iter = dcssblk_dax_copy_to_iter, }; struct dcssblk_dev_info { diff --git a/fs/dax.c b/fs/dax.c index aaec72ded1b6..a64afdf7ec0d 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1057,7 +1057,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); else - map_len = copy_to_iter(kaddr, map_len, iter); + map_len = dax_copy_to_iter(dax_dev, pgoff, kaddr, + map_len, iter); if (map_len <= 0) { ret = map_len ? map_len : -EFAULT; break; diff --git a/include/linux/dax.h b/include/linux/dax.h index f9eb22ad341e..a43b396fb336 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -20,6 +20,9 @@ struct dax_operations { /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); + /* copy_to_iter: required operation for fs-dax direct-i/o */ + size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, + struct iov_iter *); }; extern struct attribute_group dax_attribute_group; @@ -118,6 +121,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 31fef7c34185..6fb0808e87c8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -133,7 +133,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); -typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, +typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); #define PAGE_SECTORS (PAGE_SIZE / 512) @@ -184,7 +184,8 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; - dm_dax_copy_from_iter_fn dax_copy_from_iter; + dm_dax_copy_iter_fn dax_copy_from_iter; + dm_dax_copy_iter_fn dax_copy_to_iter; /* For internal device-mapper use. */ struct list_head list; From a77d478642f12ade58fdfac335ae464df1944a1e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Mar 2018 17:36:44 -0700 Subject: [PATCH 08/10] dax: Report bytes remaining in dax_iomap_actor() In preparation for protecting the dax read(2) path from media errors with copy_to_iter_mcsafe() (via dax_copy_to_iter()), convert the implementation to report the bytes successfully transferred. Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Dan Williams --- fs/dax.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index a64afdf7ec0d..34a2d435ae4b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -991,6 +991,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iov_iter *iter = data; loff_t end = pos + length, done = 0; ssize_t ret = 0; + size_t xfer; int id; if (iov_iter_rw(iter) == READ) { @@ -1054,19 +1055,20 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * vfs_write(), depending on which operation we are doing. */ if (iov_iter_rw(iter) == WRITE) - map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr, + xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); else - map_len = dax_copy_to_iter(dax_dev, pgoff, kaddr, + xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, map_len, iter); - if (map_len <= 0) { - ret = map_len ? map_len : -EFAULT; - break; - } - pos += map_len; - length -= map_len; - done += map_len; + pos += xfer; + length -= xfer; + done += xfer; + + if (xfer == 0) + ret = -EFAULT; + if (xfer < map_len) + break; } dax_read_unlock(id); From 6dfdb2b6d877d654c8c7b59d7166f4d672fba4e8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 2 May 2018 13:41:02 -0700 Subject: [PATCH 09/10] pmem: Switch to copy_to_iter_mcsafe() Use the machine check safe version of copy_to_iter() for the ->copy_to_iter() operation published by the pmem driver. Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1b8ab48365de..6d3da8c92868 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -267,7 +267,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - return copy_to_iter(addr, bytes, i); + return copy_to_iter_mcsafe(addr, bytes, i); } static const struct dax_operations pmem_dax_ops = { From 5d8beee20d89e34ff1dcb0da84adf7607858c59d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 1 May 2018 06:49:45 -0700 Subject: [PATCH 10/10] x86, nfit_test: Add unit test for memcpy_mcsafe() Given the fact that the ACPI "EINJ" (error injection) facility is not universally available, implement software infrastructure to validate the memcpy_mcsafe() exception handling implementation. For each potential read exception point in memcpy_mcsafe(), inject a emulated exception point at the address identified by 'mcsafe_inject' variable. With this infrastructure implement a test to validate that the 'bytes remaining' calculation is correct for a range of various source buffer alignments. This code is compiled out by default. The CONFIG_MCSAFE_DEBUG configuration symbol needs to be manually enabled by editing Kconfig.debug. I.e. this functionality can not be accidentally enabled by a user / distro, it's only for development. Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Reported-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/Kconfig.debug | 3 + arch/x86/include/asm/mcsafe_test.h | 75 +++++++++++++++++++++ arch/x86/lib/memcpy_64.S | 10 +++ tools/testing/nvdimm/test/nfit.c | 104 +++++++++++++++++++++++++++++ 4 files changed, 192 insertions(+) create mode 100644 arch/x86/include/asm/mcsafe_test.h diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 192e4d2f9efc..c6dd1d980081 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. +config MCSAFE_TEST + def_bool n + config X86_PTDUMP_CORE def_bool n diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h new file mode 100644 index 000000000000..eb59804b6201 --- /dev/null +++ b/arch/x86/include/asm/mcsafe_test.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_TEST_H_ +#define _MCSAFE_TEST_H_ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_MCSAFE_TEST +extern unsigned long mcsafe_test_src; +extern unsigned long mcsafe_test_dst; + +static inline void mcsafe_inject_src(void *addr) +{ + if (addr) + mcsafe_test_src = (unsigned long) addr; + else + mcsafe_test_src = ~0UL; +} + +static inline void mcsafe_inject_dst(void *addr) +{ + if (addr) + mcsafe_test_dst = (unsigned long) addr; + else + mcsafe_test_dst = ~0UL; +} +#else /* CONFIG_MCSAFE_TEST */ +static inline void mcsafe_inject_src(void *addr) +{ +} + +static inline void mcsafe_inject_dst(void *addr) +{ +} +#endif /* CONFIG_MCSAFE_TEST */ + +#else /* __ASSEMBLY__ */ +#include + +#ifdef CONFIG_MCSAFE_TEST +.macro MCSAFE_TEST_CTL + .pushsection .data + .align 8 + .globl mcsafe_test_src + mcsafe_test_src: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_src) + .globl mcsafe_test_dst + mcsafe_test_dst: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_dst) + .popsection +.endm + +.macro MCSAFE_TEST_SRC reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_src, %r9 + ja \target +.endm + +.macro MCSAFE_TEST_DST reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_dst, %r9 + ja \target +.endm +#else +.macro MCSAFE_TEST_CTL +.endm + +.macro MCSAFE_TEST_SRC reg count target +.endm + +.macro MCSAFE_TEST_DST reg count target +.endm +#endif /* CONFIG_MCSAFE_TEST */ +#endif /* __ASSEMBLY__ */ +#endif /* _MCSAFE_TEST_H_ */ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index c3b527a9f95d..298ef1479240 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -183,6 +184,9 @@ ENTRY(memcpy_orig) ENDPROC(memcpy_orig) #ifndef CONFIG_UML + +MCSAFE_TEST_CTL + /* * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. @@ -206,6 +210,8 @@ ENTRY(__memcpy_mcsafe) subl %ecx, %edx .L_read_leading_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes + MCSAFE_TEST_DST %rdi 1 .E_leading_bytes .L_write_leading_bytes: movb %al, (%rdi) incq %rsi @@ -221,6 +227,8 @@ ENTRY(__memcpy_mcsafe) .L_read_words: movq (%rsi), %r8 + MCSAFE_TEST_SRC %rsi 8 .E_read_words + MCSAFE_TEST_DST %rdi 8 .E_write_words .L_write_words: movq %r8, (%rdi) addq $8, %rsi @@ -237,6 +245,8 @@ ENTRY(__memcpy_mcsafe) movl %edx, %ecx .L_read_trailing_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes + MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes .L_write_trailing_bytes: movb %al, (%rdi) incq %rsi diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4ea385be528f..a8fb63edcf89 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -29,6 +29,8 @@ #include "nfit_test.h" #include "../watermark.h" +#include + /* * Generate an NFIT table to describe the following topology: * @@ -2681,6 +2683,107 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; +static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); + +enum INJECT { + INJECT_NONE, + INJECT_SRC, + INJECT_DST, +}; + +static void mcsafe_test_init(char *dst, char *src, size_t size) +{ + size_t i; + + memset(dst, 0xff, size); + for (i = 0; i < size; i++) + src[i] = (char) i; +} + +static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, + size_t size, unsigned long rem) +{ + size_t i; + + for (i = 0; i < size - rem; i++) + if (dst[i] != (unsigned char) i) { + pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n", + __func__, __LINE__, i, dst[i], + (unsigned char) i); + return false; + } + for (i = size - rem; i < size; i++) + if (dst[i] != 0xffU) { + pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n", + __func__, __LINE__, i, dst[i]); + return false; + } + return true; +} + +void mcsafe_test(void) +{ + char *inject_desc[] = { "none", "source", "destination" }; + enum INJECT inj; + + if (IS_ENABLED(CONFIG_MCSAFE_TEST)) { + pr_info("%s: run...\n", __func__); + } else { + pr_info("%s: disabled, skip.\n", __func__); + return; + } + + for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) { + int i; + + pr_info("%s: inject: %s\n", __func__, inject_desc[inj]); + for (i = 0; i < 512; i++) { + unsigned long expect, rem; + void *src, *dst; + bool valid; + + switch (inj) { + case INJECT_NONE: + mcsafe_inject_src(NULL); + mcsafe_inject_dst(NULL); + dst = &mcsafe_buf[2048]; + src = &mcsafe_buf[1024 - i]; + expect = 0; + break; + case INJECT_SRC: + mcsafe_inject_src(&mcsafe_buf[1024]); + mcsafe_inject_dst(NULL); + dst = &mcsafe_buf[2048]; + src = &mcsafe_buf[1024 - i]; + expect = 512 - i; + break; + case INJECT_DST: + mcsafe_inject_src(NULL); + mcsafe_inject_dst(&mcsafe_buf[2048]); + dst = &mcsafe_buf[2048 - i]; + src = &mcsafe_buf[1024]; + expect = 512 - i; + break; + } + + mcsafe_test_init(dst, src, 512); + rem = __memcpy_mcsafe(dst, src, 512); + valid = mcsafe_test_validate(dst, src, 512, expect); + if (rem == expect && valid) + continue; + pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", + __func__, + ((unsigned long) dst) & ~PAGE_MASK, + ((unsigned long ) src) & ~PAGE_MASK, + 512, i, rem, valid ? "valid" : "bad", + expect); + } + } + + mcsafe_inject_src(NULL); + mcsafe_inject_dst(NULL); +} + static __init int nfit_test_init(void) { int rc, i; @@ -2689,6 +2792,7 @@ static __init int nfit_test_init(void) libnvdimm_test(); acpi_nfit_test(); device_dax_test(); + mcsafe_test(); nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);