riscv: Add vector extension XOR implementation

This patch adds support for vector optimized XOR and it is tested in
qemu.

Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Link: https://lore.kernel.org/r/20240115055929.4736-4-andy.chiu@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Greentime Hu 2024-01-15 05:59:22 +00:00 committed by Palmer Dabbelt
parent 956895b9d8
commit c5674d00ca
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
4 changed files with 168 additions and 0 deletions

View File

@ -9,6 +9,24 @@ long long __lshrti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __ashlti3(long long a, int b);
#ifdef CONFIG_RISCV_ISA_V
void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2);
void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3);
void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4);
void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4,
const unsigned long *__restrict p5);
#endif /* CONFIG_RISCV_ISA_V */
#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)

View File

@ -0,0 +1,68 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2021 SiFive
*/
#include <linux/hardirq.h>
#include <asm-generic/xor.h>
#ifdef CONFIG_RISCV_ISA_V
#include <asm/vector.h>
#include <asm/switch_to.h>
#include <asm/asm-prototypes.h>
static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2)
{
kernel_vector_begin();
xor_regs_2_(bytes, p1, p2);
kernel_vector_end();
}
static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3)
{
kernel_vector_begin();
xor_regs_3_(bytes, p1, p2, p3);
kernel_vector_end();
}
static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4)
{
kernel_vector_begin();
xor_regs_4_(bytes, p1, p2, p3, p4);
kernel_vector_end();
}
static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
const unsigned long *__restrict p2,
const unsigned long *__restrict p3,
const unsigned long *__restrict p4,
const unsigned long *__restrict p5)
{
kernel_vector_begin();
xor_regs_5_(bytes, p1, p2, p3, p4, p5);
kernel_vector_end();
}
static struct xor_block_template xor_block_rvv = {
.name = "rvv",
.do_2 = xor_vector_2,
.do_3 = xor_vector_3,
.do_4 = xor_vector_4,
.do_5 = xor_vector_5
};
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
xor_speed(&xor_block_8regs); \
xor_speed(&xor_block_32regs); \
if (has_vector()) { \
xor_speed(&xor_block_rvv);\
} \
} while (0)
#endif

View File

@ -11,3 +11,4 @@ lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o

81
arch/riscv/lib/xor.S Normal file
View File

@ -0,0 +1,81 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2021 SiFive
*/
#include <linux/linkage.h>
#include <linux/export.h>
#include <asm/asm.h>
SYM_FUNC_START(xor_regs_2_)
vsetvli a3, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a3
vxor.vv v16, v0, v8
add a2, a2, a3
vse8.v v16, (a1)
add a1, a1, a3
bnez a0, xor_regs_2_
ret
SYM_FUNC_END(xor_regs_2_)
EXPORT_SYMBOL(xor_regs_2_)
SYM_FUNC_START(xor_regs_3_)
vsetvli a4, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a4
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a4
vxor.vv v16, v0, v16
add a3, a3, a4
vse8.v v16, (a1)
add a1, a1, a4
bnez a0, xor_regs_3_
ret
SYM_FUNC_END(xor_regs_3_)
EXPORT_SYMBOL(xor_regs_3_)
SYM_FUNC_START(xor_regs_4_)
vsetvli a5, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a5
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a5
vxor.vv v0, v0, v16
vle8.v v24, (a4)
add a3, a3, a5
vxor.vv v16, v0, v24
add a4, a4, a5
vse8.v v16, (a1)
add a1, a1, a5
bnez a0, xor_regs_4_
ret
SYM_FUNC_END(xor_regs_4_)
EXPORT_SYMBOL(xor_regs_4_)
SYM_FUNC_START(xor_regs_5_)
vsetvli a6, a0, e8, m8, ta, ma
vle8.v v0, (a1)
vle8.v v8, (a2)
sub a0, a0, a6
vxor.vv v0, v0, v8
vle8.v v16, (a3)
add a2, a2, a6
vxor.vv v0, v0, v16
vle8.v v24, (a4)
add a3, a3, a6
vxor.vv v0, v0, v24
vle8.v v8, (a5)
add a4, a4, a6
vxor.vv v16, v0, v8
add a5, a5, a6
vse8.v v16, (a1)
add a1, a1, a6
bnez a0, xor_regs_5_
ret
SYM_FUNC_END(xor_regs_5_)
EXPORT_SYMBOL(xor_regs_5_)