CONFIG_AS_AVX was introduced by commitea4d26ae24("raid5: add AVX optimized RAID5 checksumming"). We raise the minimal supported binutils version from time to time. The last bump was commit1fb12b35e5("kbuild: Raise the minimum required binutils version to 2.21"). I confirmed the code in $(call as-instr,...) can be assembled by the binutils 2.21 assembler and also by LLVM integrated assembler. Remove CONFIG_AS_AVX, which is always defined. Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com> Acked-by: Ingo Molnar <mingo@kernel.org>
		
			
				
	
	
		
			172 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			172 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0-only */
 | |
| #ifndef _ASM_X86_XOR_AVX_H
 | |
| #define _ASM_X86_XOR_AVX_H
 | |
| 
 | |
| /*
 | |
|  * Optimized RAID-5 checksumming functions for AVX
 | |
|  *
 | |
|  * Copyright (C) 2012 Intel Corporation
 | |
|  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
 | |
|  *
 | |
|  * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
 | |
|  */
 | |
| 
 | |
| #include <linux/compiler.h>
 | |
| #include <asm/fpu/api.h>
 | |
| 
 | |
| #define BLOCK4(i) \
 | |
| 		BLOCK(32 * i, 0) \
 | |
| 		BLOCK(32 * (i + 1), 1) \
 | |
| 		BLOCK(32 * (i + 2), 2) \
 | |
| 		BLOCK(32 * (i + 3), 3)
 | |
| 
 | |
| #define BLOCK16() \
 | |
| 		BLOCK4(0) \
 | |
| 		BLOCK4(4) \
 | |
| 		BLOCK4(8) \
 | |
| 		BLOCK4(12)
 | |
| 
 | |
| static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
 | |
| {
 | |
| 	unsigned long lines = bytes >> 9;
 | |
| 
 | |
| 	kernel_fpu_begin();
 | |
| 
 | |
| 	while (lines--) {
 | |
| #undef BLOCK
 | |
| #define BLOCK(i, reg) \
 | |
| do { \
 | |
| 	asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm"  #reg : : \
 | |
| 		"m" (p0[i / sizeof(*p0)])); \
 | |
| 	asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 | |
| 		"=m" (p0[i / sizeof(*p0)])); \
 | |
| } while (0);
 | |
| 
 | |
| 		BLOCK16()
 | |
| 
 | |
| 		p0 = (unsigned long *)((uintptr_t)p0 + 512);
 | |
| 		p1 = (unsigned long *)((uintptr_t)p1 + 512);
 | |
| 	}
 | |
| 
 | |
| 	kernel_fpu_end();
 | |
| }
 | |
| 
 | |
| static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 | |
| 	unsigned long *p2)
 | |
| {
 | |
| 	unsigned long lines = bytes >> 9;
 | |
| 
 | |
| 	kernel_fpu_begin();
 | |
| 
 | |
| 	while (lines--) {
 | |
| #undef BLOCK
 | |
| #define BLOCK(i, reg) \
 | |
| do { \
 | |
| 	asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p1[i / sizeof(*p1)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p0[i / sizeof(*p0)])); \
 | |
| 	asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 | |
| 		"=m" (p0[i / sizeof(*p0)])); \
 | |
| } while (0);
 | |
| 
 | |
| 		BLOCK16()
 | |
| 
 | |
| 		p0 = (unsigned long *)((uintptr_t)p0 + 512);
 | |
| 		p1 = (unsigned long *)((uintptr_t)p1 + 512);
 | |
| 		p2 = (unsigned long *)((uintptr_t)p2 + 512);
 | |
| 	}
 | |
| 
 | |
| 	kernel_fpu_end();
 | |
| }
 | |
| 
 | |
| static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 | |
| 	unsigned long *p2, unsigned long *p3)
 | |
| {
 | |
| 	unsigned long lines = bytes >> 9;
 | |
| 
 | |
| 	kernel_fpu_begin();
 | |
| 
 | |
| 	while (lines--) {
 | |
| #undef BLOCK
 | |
| #define BLOCK(i, reg) \
 | |
| do { \
 | |
| 	asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p2[i / sizeof(*p2)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p1[i / sizeof(*p1)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p0[i / sizeof(*p0)])); \
 | |
| 	asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 | |
| 		"=m" (p0[i / sizeof(*p0)])); \
 | |
| } while (0);
 | |
| 
 | |
| 		BLOCK16();
 | |
| 
 | |
| 		p0 = (unsigned long *)((uintptr_t)p0 + 512);
 | |
| 		p1 = (unsigned long *)((uintptr_t)p1 + 512);
 | |
| 		p2 = (unsigned long *)((uintptr_t)p2 + 512);
 | |
| 		p3 = (unsigned long *)((uintptr_t)p3 + 512);
 | |
| 	}
 | |
| 
 | |
| 	kernel_fpu_end();
 | |
| }
 | |
| 
 | |
| static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 | |
| 	unsigned long *p2, unsigned long *p3, unsigned long *p4)
 | |
| {
 | |
| 	unsigned long lines = bytes >> 9;
 | |
| 
 | |
| 	kernel_fpu_begin();
 | |
| 
 | |
| 	while (lines--) {
 | |
| #undef BLOCK
 | |
| #define BLOCK(i, reg) \
 | |
| do { \
 | |
| 	asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p3[i / sizeof(*p3)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p2[i / sizeof(*p2)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p1[i / sizeof(*p1)])); \
 | |
| 	asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 | |
| 		"m" (p0[i / sizeof(*p0)])); \
 | |
| 	asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 | |
| 		"=m" (p0[i / sizeof(*p0)])); \
 | |
| } while (0);
 | |
| 
 | |
| 		BLOCK16()
 | |
| 
 | |
| 		p0 = (unsigned long *)((uintptr_t)p0 + 512);
 | |
| 		p1 = (unsigned long *)((uintptr_t)p1 + 512);
 | |
| 		p2 = (unsigned long *)((uintptr_t)p2 + 512);
 | |
| 		p3 = (unsigned long *)((uintptr_t)p3 + 512);
 | |
| 		p4 = (unsigned long *)((uintptr_t)p4 + 512);
 | |
| 	}
 | |
| 
 | |
| 	kernel_fpu_end();
 | |
| }
 | |
| 
 | |
| static struct xor_block_template xor_block_avx = {
 | |
| 	.name = "avx",
 | |
| 	.do_2 = xor_avx_2,
 | |
| 	.do_3 = xor_avx_3,
 | |
| 	.do_4 = xor_avx_4,
 | |
| 	.do_5 = xor_avx_5,
 | |
| };
 | |
| 
 | |
| #define AVX_XOR_SPEED \
 | |
| do { \
 | |
| 	if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
 | |
| 		xor_speed(&xor_block_avx); \
 | |
| } while (0)
 | |
| 
 | |
| #define AVX_SELECT(FASTEST) \
 | |
| 	(boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
 | |
| 
 | |
| #endif
 |