LoongArch: Add vector extensions support

Add LoongArch's vector extensions support, which including 128bit LSX
(i.e., Loongson SIMD eXtension) and 256bit LASX (i.e., Loongson Advanced
SIMD eXtension).

Linux kernel doesn't use vector itself, it only handle exceptions and
context save/restore. So it only needs a subset of these instructions:

* Vector load/store:   vld vst vldx vstx xvld xvst xvldx xvstx
* 8bit-elements move:  vpickve2gr.b xvpickve2gr.b vinsgr2vr.b xvinsgr2vr.b
* 16bit-elements move: vpickve2gr.h xvpickve2gr.h vinsgr2vr.h xvinsgr2vr.h
* 32bit-elements move: vpickve2gr.w xvpickve2gr.w vinsgr2vr.w xvinsgr2vr.w
* 64bit-elements move: vpickve2gr.d xvpickve2gr.d vinsgr2vr.d xvinsgr2vr.d
* Elements permute:    vpermi.w vpermi.d xvpermi.w xvpermi.d xvpermi.q

Introduce AS_HAS_LSX_EXTENSION and AS_HAS_LASX_EXTENSION to avoid non-
vector toolchains complains unsupported instructions.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
This commit is contained in:
Huacai Chen 2023-06-29 20:58:43 +08:00
parent aa5e65dc08
commit 616500232e
11 changed files with 1452 additions and 23 deletions

View File

@ -164,14 +164,6 @@ config 32BIT
config 64BIT
def_bool y
config CPU_HAS_FPU
bool
default y
config CPU_HAS_PREFETCH
bool
default y
config GENERIC_BUG
def_bool y
depends on BUG
@ -247,6 +239,12 @@ config AS_HAS_EXPLICIT_RELOCS
config AS_HAS_FCSR_CLASS
def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0)
config AS_HAS_LSX_EXTENSION
def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0)
config AS_HAS_LASX_EXTENSION
def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
menu "Kernel type and options"
source "kernel/Kconfig.hz"
@ -487,6 +485,43 @@ config ARCH_STRICT_ALIGN
to run kernel only on systems with h/w unaligned access support in
order to optimise for performance.
config CPU_HAS_FPU
bool
default y
config CPU_HAS_LSX
bool "Support for the Loongson SIMD Extension"
depends on AS_HAS_LSX_EXTENSION
help
Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers
and a set of SIMD instructions to operate on them. When this option
is enabled the kernel will support allocating & switching LSX
vector register contexts. If you know that your kernel will only be
running on CPUs which do not support LSX or that your userland will
not be making use of it then you may wish to say N here to reduce
the size & complexity of your kernel.
If unsure, say Y.
config CPU_HAS_LASX
bool "Support for the Loongson Advanced SIMD Extension"
depends on CPU_HAS_LSX
depends on AS_HAS_LASX_EXTENSION
help
Loongson Advanced SIMD Extension (LASX) introduces 256 bit wide vector
registers and a set of SIMD instructions to operate on them. When this
option is enabled the kernel will support allocating & switching LASX
vector register contexts. If you know that your kernel will only be
running on CPUs which do not support LASX or that your userland will
not be making use of it then you may wish to say N here to reduce
the size & complexity of your kernel.
If unsure, say Y.
config CPU_HAS_PREFETCH
bool
default y
config KEXEC
bool "Kexec system call"
select KEXEC_CORE

View File

@ -270,6 +270,399 @@
fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0
.endm
.macro lsx_save_data thread tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \tmp, \thread, \tmp
vst $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0
vst $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0
vst $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0
vst $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0
vst $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0
vst $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0
vst $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0
vst $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0
vst $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0
vst $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0
vst $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0
vst $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0
vst $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0
vst $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0
vst $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0
vst $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0
vst $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0
vst $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0
vst $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0
vst $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0
vst $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0
vst $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0
vst $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0
vst $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0
vst $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0
vst $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0
vst $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0
vst $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0
vst $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0
vst $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0
vst $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0
vst $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0
.endm
.macro lsx_restore_data thread tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \tmp, \thread, \tmp
vld $vr0, \tmp, THREAD_FPR0 - THREAD_FPR0
vld $vr1, \tmp, THREAD_FPR1 - THREAD_FPR0
vld $vr2, \tmp, THREAD_FPR2 - THREAD_FPR0
vld $vr3, \tmp, THREAD_FPR3 - THREAD_FPR0
vld $vr4, \tmp, THREAD_FPR4 - THREAD_FPR0
vld $vr5, \tmp, THREAD_FPR5 - THREAD_FPR0
vld $vr6, \tmp, THREAD_FPR6 - THREAD_FPR0
vld $vr7, \tmp, THREAD_FPR7 - THREAD_FPR0
vld $vr8, \tmp, THREAD_FPR8 - THREAD_FPR0
vld $vr9, \tmp, THREAD_FPR9 - THREAD_FPR0
vld $vr10, \tmp, THREAD_FPR10 - THREAD_FPR0
vld $vr11, \tmp, THREAD_FPR11 - THREAD_FPR0
vld $vr12, \tmp, THREAD_FPR12 - THREAD_FPR0
vld $vr13, \tmp, THREAD_FPR13 - THREAD_FPR0
vld $vr14, \tmp, THREAD_FPR14 - THREAD_FPR0
vld $vr15, \tmp, THREAD_FPR15 - THREAD_FPR0
vld $vr16, \tmp, THREAD_FPR16 - THREAD_FPR0
vld $vr17, \tmp, THREAD_FPR17 - THREAD_FPR0
vld $vr18, \tmp, THREAD_FPR18 - THREAD_FPR0
vld $vr19, \tmp, THREAD_FPR19 - THREAD_FPR0
vld $vr20, \tmp, THREAD_FPR20 - THREAD_FPR0
vld $vr21, \tmp, THREAD_FPR21 - THREAD_FPR0
vld $vr22, \tmp, THREAD_FPR22 - THREAD_FPR0
vld $vr23, \tmp, THREAD_FPR23 - THREAD_FPR0
vld $vr24, \tmp, THREAD_FPR24 - THREAD_FPR0
vld $vr25, \tmp, THREAD_FPR25 - THREAD_FPR0
vld $vr26, \tmp, THREAD_FPR26 - THREAD_FPR0
vld $vr27, \tmp, THREAD_FPR27 - THREAD_FPR0
vld $vr28, \tmp, THREAD_FPR28 - THREAD_FPR0
vld $vr29, \tmp, THREAD_FPR29 - THREAD_FPR0
vld $vr30, \tmp, THREAD_FPR30 - THREAD_FPR0
vld $vr31, \tmp, THREAD_FPR31 - THREAD_FPR0
.endm
.macro lsx_save_all thread tmp0 tmp1
fpu_save_cc \thread, \tmp0, \tmp1
fpu_save_csr \thread, \tmp0
lsx_save_data \thread, \tmp0
.endm
.macro lsx_restore_all thread tmp0 tmp1
lsx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
fpu_restore_csr \thread, \tmp0
.endm
.macro lsx_save_upper vd base tmp off
vpickve2gr.d \tmp, \vd, 1
st.d \tmp, \base, (\off+8)
.endm
.macro lsx_save_all_upper thread base tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \base, \thread, \tmp
lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
.endm
.macro lsx_restore_upper vd base tmp off
ld.d \tmp, \base, (\off+8)
vinsgr2vr.d \vd, \tmp, 1
.endm
.macro lsx_restore_all_upper thread base tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \base, \thread, \tmp
lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0)
lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0)
lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0)
lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0)
lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0)
lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0)
lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0)
lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0)
lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0)
lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0)
lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0)
lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0)
lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0)
lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0)
lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0)
lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0)
lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0)
lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0)
lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0)
lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0)
lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0)
lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0)
lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0)
lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0)
lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0)
lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0)
lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0)
lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0)
lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0)
lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0)
lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0)
lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0)
.endm
.macro lsx_init_upper vd tmp
vinsgr2vr.d \vd, \tmp, 1
.endm
.macro lsx_init_all_upper tmp
not \tmp, zero
lsx_init_upper $vr0 \tmp
lsx_init_upper $vr1 \tmp
lsx_init_upper $vr2 \tmp
lsx_init_upper $vr3 \tmp
lsx_init_upper $vr4 \tmp
lsx_init_upper $vr5 \tmp
lsx_init_upper $vr6 \tmp
lsx_init_upper $vr7 \tmp
lsx_init_upper $vr8 \tmp
lsx_init_upper $vr9 \tmp
lsx_init_upper $vr10 \tmp
lsx_init_upper $vr11 \tmp
lsx_init_upper $vr12 \tmp
lsx_init_upper $vr13 \tmp
lsx_init_upper $vr14 \tmp
lsx_init_upper $vr15 \tmp
lsx_init_upper $vr16 \tmp
lsx_init_upper $vr17 \tmp
lsx_init_upper $vr18 \tmp
lsx_init_upper $vr19 \tmp
lsx_init_upper $vr20 \tmp
lsx_init_upper $vr21 \tmp
lsx_init_upper $vr22 \tmp
lsx_init_upper $vr23 \tmp
lsx_init_upper $vr24 \tmp
lsx_init_upper $vr25 \tmp
lsx_init_upper $vr26 \tmp
lsx_init_upper $vr27 \tmp
lsx_init_upper $vr28 \tmp
lsx_init_upper $vr29 \tmp
lsx_init_upper $vr30 \tmp
lsx_init_upper $vr31 \tmp
.endm
.macro lasx_save_data thread tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \tmp, \thread, \tmp
xvst $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0
xvst $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0
xvst $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0
xvst $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0
xvst $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0
xvst $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0
xvst $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0
xvst $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0
xvst $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0
xvst $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0
xvst $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0
xvst $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0
xvst $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0
xvst $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0
xvst $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0
xvst $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0
xvst $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0
xvst $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0
xvst $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0
xvst $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0
xvst $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0
xvst $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0
xvst $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0
xvst $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0
xvst $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0
xvst $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0
xvst $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0
xvst $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0
xvst $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0
xvst $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0
xvst $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0
xvst $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0
.endm
.macro lasx_restore_data thread tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \tmp, \thread, \tmp
xvld $xr0, \tmp, THREAD_FPR0 - THREAD_FPR0
xvld $xr1, \tmp, THREAD_FPR1 - THREAD_FPR0
xvld $xr2, \tmp, THREAD_FPR2 - THREAD_FPR0
xvld $xr3, \tmp, THREAD_FPR3 - THREAD_FPR0
xvld $xr4, \tmp, THREAD_FPR4 - THREAD_FPR0
xvld $xr5, \tmp, THREAD_FPR5 - THREAD_FPR0
xvld $xr6, \tmp, THREAD_FPR6 - THREAD_FPR0
xvld $xr7, \tmp, THREAD_FPR7 - THREAD_FPR0
xvld $xr8, \tmp, THREAD_FPR8 - THREAD_FPR0
xvld $xr9, \tmp, THREAD_FPR9 - THREAD_FPR0
xvld $xr10, \tmp, THREAD_FPR10 - THREAD_FPR0
xvld $xr11, \tmp, THREAD_FPR11 - THREAD_FPR0
xvld $xr12, \tmp, THREAD_FPR12 - THREAD_FPR0
xvld $xr13, \tmp, THREAD_FPR13 - THREAD_FPR0
xvld $xr14, \tmp, THREAD_FPR14 - THREAD_FPR0
xvld $xr15, \tmp, THREAD_FPR15 - THREAD_FPR0
xvld $xr16, \tmp, THREAD_FPR16 - THREAD_FPR0
xvld $xr17, \tmp, THREAD_FPR17 - THREAD_FPR0
xvld $xr18, \tmp, THREAD_FPR18 - THREAD_FPR0
xvld $xr19, \tmp, THREAD_FPR19 - THREAD_FPR0
xvld $xr20, \tmp, THREAD_FPR20 - THREAD_FPR0
xvld $xr21, \tmp, THREAD_FPR21 - THREAD_FPR0
xvld $xr22, \tmp, THREAD_FPR22 - THREAD_FPR0
xvld $xr23, \tmp, THREAD_FPR23 - THREAD_FPR0
xvld $xr24, \tmp, THREAD_FPR24 - THREAD_FPR0
xvld $xr25, \tmp, THREAD_FPR25 - THREAD_FPR0
xvld $xr26, \tmp, THREAD_FPR26 - THREAD_FPR0
xvld $xr27, \tmp, THREAD_FPR27 - THREAD_FPR0
xvld $xr28, \tmp, THREAD_FPR28 - THREAD_FPR0
xvld $xr29, \tmp, THREAD_FPR29 - THREAD_FPR0
xvld $xr30, \tmp, THREAD_FPR30 - THREAD_FPR0
xvld $xr31, \tmp, THREAD_FPR31 - THREAD_FPR0
.endm
.macro lasx_save_all thread tmp0 tmp1
fpu_save_cc \thread, \tmp0, \tmp1
fpu_save_csr \thread, \tmp0
lasx_save_data \thread, \tmp0
.endm
.macro lasx_restore_all thread tmp0 tmp1
lasx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
fpu_restore_csr \thread, \tmp0
.endm
.macro lasx_save_upper xd base tmp off
/* Nothing */
.endm
.macro lasx_save_all_upper thread base tmp
/* Nothing */
.endm
.macro lasx_restore_upper xd base tmp0 tmp1 off
vld \tmp0, \base, (\off+16)
xvpermi.q \xd, \tmp1, 0x2
.endm
.macro lasx_restore_all_upper thread base tmp
li.w \tmp, THREAD_FPR0
PTR_ADD \base, \thread, \tmp
/* Save $vr31 ($xr31 lower bits) with xvpickve2gr */
xvpickve2gr.d $r17, $xr31, 0
xvpickve2gr.d $r18, $xr31, 1
lasx_restore_upper $xr0, \base, $vr31, $xr31, (THREAD_FPR0-THREAD_FPR0)
lasx_restore_upper $xr1, \base, $vr31, $xr31, (THREAD_FPR1-THREAD_FPR0)
lasx_restore_upper $xr2, \base, $vr31, $xr31, (THREAD_FPR2-THREAD_FPR0)
lasx_restore_upper $xr3, \base, $vr31, $xr31, (THREAD_FPR3-THREAD_FPR0)
lasx_restore_upper $xr4, \base, $vr31, $xr31, (THREAD_FPR4-THREAD_FPR0)
lasx_restore_upper $xr5, \base, $vr31, $xr31, (THREAD_FPR5-THREAD_FPR0)
lasx_restore_upper $xr6, \base, $vr31, $xr31, (THREAD_FPR6-THREAD_FPR0)
lasx_restore_upper $xr7, \base, $vr31, $xr31, (THREAD_FPR7-THREAD_FPR0)
lasx_restore_upper $xr8, \base, $vr31, $xr31, (THREAD_FPR8-THREAD_FPR0)
lasx_restore_upper $xr9, \base, $vr31, $xr31, (THREAD_FPR9-THREAD_FPR0)
lasx_restore_upper $xr10, \base, $vr31, $xr31, (THREAD_FPR10-THREAD_FPR0)
lasx_restore_upper $xr11, \base, $vr31, $xr31, (THREAD_FPR11-THREAD_FPR0)
lasx_restore_upper $xr12, \base, $vr31, $xr31, (THREAD_FPR12-THREAD_FPR0)
lasx_restore_upper $xr13, \base, $vr31, $xr31, (THREAD_FPR13-THREAD_FPR0)
lasx_restore_upper $xr14, \base, $vr31, $xr31, (THREAD_FPR14-THREAD_FPR0)
lasx_restore_upper $xr15, \base, $vr31, $xr31, (THREAD_FPR15-THREAD_FPR0)
lasx_restore_upper $xr16, \base, $vr31, $xr31, (THREAD_FPR16-THREAD_FPR0)
lasx_restore_upper $xr17, \base, $vr31, $xr31, (THREAD_FPR17-THREAD_FPR0)
lasx_restore_upper $xr18, \base, $vr31, $xr31, (THREAD_FPR18-THREAD_FPR0)
lasx_restore_upper $xr19, \base, $vr31, $xr31, (THREAD_FPR19-THREAD_FPR0)
lasx_restore_upper $xr20, \base, $vr31, $xr31, (THREAD_FPR20-THREAD_FPR0)
lasx_restore_upper $xr21, \base, $vr31, $xr31, (THREAD_FPR21-THREAD_FPR0)
lasx_restore_upper $xr22, \base, $vr31, $xr31, (THREAD_FPR22-THREAD_FPR0)
lasx_restore_upper $xr23, \base, $vr31, $xr31, (THREAD_FPR23-THREAD_FPR0)
lasx_restore_upper $xr24, \base, $vr31, $xr31, (THREAD_FPR24-THREAD_FPR0)
lasx_restore_upper $xr25, \base, $vr31, $xr31, (THREAD_FPR25-THREAD_FPR0)
lasx_restore_upper $xr26, \base, $vr31, $xr31, (THREAD_FPR26-THREAD_FPR0)
lasx_restore_upper $xr27, \base, $vr31, $xr31, (THREAD_FPR27-THREAD_FPR0)
lasx_restore_upper $xr28, \base, $vr31, $xr31, (THREAD_FPR28-THREAD_FPR0)
lasx_restore_upper $xr29, \base, $vr31, $xr31, (THREAD_FPR29-THREAD_FPR0)
lasx_restore_upper $xr30, \base, $vr31, $xr31, (THREAD_FPR30-THREAD_FPR0)
lasx_restore_upper $xr31, \base, $vr31, $xr31, (THREAD_FPR31-THREAD_FPR0)
/* Restore $vr31 ($xr31 lower bits) with xvinsgr2vr */
xvinsgr2vr.d $xr31, $r17, 0
xvinsgr2vr.d $xr31, $r18, 1
.endm
.macro lasx_init_upper xd tmp
xvinsgr2vr.d \xd, \tmp, 2
xvinsgr2vr.d \xd, \tmp, 3
.endm
.macro lasx_init_all_upper tmp
not \tmp, zero
lasx_init_upper $xr0 \tmp
lasx_init_upper $xr1 \tmp
lasx_init_upper $xr2 \tmp
lasx_init_upper $xr3 \tmp
lasx_init_upper $xr4 \tmp
lasx_init_upper $xr5 \tmp
lasx_init_upper $xr6 \tmp
lasx_init_upper $xr7 \tmp
lasx_init_upper $xr8 \tmp
lasx_init_upper $xr9 \tmp
lasx_init_upper $xr10 \tmp
lasx_init_upper $xr11 \tmp
lasx_init_upper $xr12 \tmp
lasx_init_upper $xr13 \tmp
lasx_init_upper $xr14 \tmp
lasx_init_upper $xr15 \tmp
lasx_init_upper $xr16 \tmp
lasx_init_upper $xr17 \tmp
lasx_init_upper $xr18 \tmp
lasx_init_upper $xr19 \tmp
lasx_init_upper $xr20 \tmp
lasx_init_upper $xr21 \tmp
lasx_init_upper $xr22 \tmp
lasx_init_upper $xr23 \tmp
lasx_init_upper $xr24 \tmp
lasx_init_upper $xr25 \tmp
lasx_init_upper $xr26 \tmp
lasx_init_upper $xr27 \tmp
lasx_init_upper $xr28 \tmp
lasx_init_upper $xr29 \tmp
lasx_init_upper $xr30 \tmp
lasx_init_upper $xr31 \tmp
.endm
.macro not dst src
nor \dst, \src, zero
.endm

View File

@ -28,6 +28,26 @@ extern void _init_fpu(unsigned int);
extern void _save_fp(struct loongarch_fpu *);
extern void _restore_fp(struct loongarch_fpu *);
extern void _save_lsx(struct loongarch_fpu *fpu);
extern void _restore_lsx(struct loongarch_fpu *fpu);
extern void _init_lsx_upper(void);
extern void _restore_lsx_upper(struct loongarch_fpu *fpu);
extern void _save_lasx(struct loongarch_fpu *fpu);
extern void _restore_lasx(struct loongarch_fpu *fpu);
extern void _init_lasx_upper(void);
extern void _restore_lasx_upper(struct loongarch_fpu *fpu);
static inline void enable_lsx(void);
static inline void disable_lsx(void);
static inline void save_lsx(struct task_struct *t);
static inline void restore_lsx(struct task_struct *t);
static inline void enable_lasx(void);
static inline void disable_lasx(void);
static inline void save_lasx(struct task_struct *t);
static inline void restore_lasx(struct task_struct *t);
/*
* Mask the FCSR Cause bits according to the Enable bits, observing
* that Unimplemented is always enabled.
@ -44,6 +64,29 @@ static inline int is_fp_enabled(void)
1 : 0;
}
static inline int is_lsx_enabled(void)
{
if (!cpu_has_lsx)
return 0;
return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ?
1 : 0;
}
static inline int is_lasx_enabled(void)
{
if (!cpu_has_lasx)
return 0;
return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ?
1 : 0;
}
static inline int is_simd_enabled(void)
{
return is_lsx_enabled() | is_lasx_enabled();
}
#define enable_fpu() set_csr_euen(CSR_EUEN_FPEN)
#define disable_fpu() clear_csr_euen(CSR_EUEN_FPEN)
@ -81,9 +124,22 @@ static inline void own_fpu(int restore)
static inline void lose_fpu_inatomic(int save, struct task_struct *tsk)
{
if (is_fpu_owner()) {
if (save)
_save_fp(&tsk->thread.fpu);
disable_fpu();
if (!is_simd_enabled()) {
if (save)
_save_fp(&tsk->thread.fpu);
disable_fpu();
} else {
if (save) {
if (!is_lasx_enabled())
save_lsx(tsk);
else
save_lasx(tsk);
}
disable_fpu();
disable_lsx();
disable_lasx();
clear_tsk_thread_flag(tsk, TIF_USEDSIMD);
}
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
}
KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
@ -129,4 +185,127 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
return tsk->thread.fpu.fpr;
}
static inline int is_simd_owner(void)
{
return test_thread_flag(TIF_USEDSIMD);
}
#ifdef CONFIG_CPU_HAS_LSX
static inline void enable_lsx(void)
{
if (cpu_has_lsx)
csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
}
static inline void disable_lsx(void)
{
if (cpu_has_lsx)
csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN);
}
static inline void save_lsx(struct task_struct *t)
{
if (cpu_has_lsx)
_save_lsx(&t->thread.fpu);
}
static inline void restore_lsx(struct task_struct *t)
{
if (cpu_has_lsx)
_restore_lsx(&t->thread.fpu);
}
static inline void init_lsx_upper(void)
{
/*
* Check cpu_has_lsx only if it's a constant. This will allow the
* compiler to optimise out code for CPUs without LSX without adding
* an extra redundant check for CPUs with LSX.
*/
if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
return;
_init_lsx_upper();
}
static inline void restore_lsx_upper(struct task_struct *t)
{
if (cpu_has_lsx)
_restore_lsx_upper(&t->thread.fpu);
}
#else
static inline void enable_lsx(void) {}
static inline void disable_lsx(void) {}
static inline void save_lsx(struct task_struct *t) {}
static inline void restore_lsx(struct task_struct *t) {}
static inline void init_lsx_upper(void) {}
static inline void restore_lsx_upper(struct task_struct *t) {}
#endif
#ifdef CONFIG_CPU_HAS_LASX
static inline void enable_lasx(void)
{
if (cpu_has_lasx)
csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
}
static inline void disable_lasx(void)
{
if (cpu_has_lasx)
csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN);
}
static inline void save_lasx(struct task_struct *t)
{
if (cpu_has_lasx)
_save_lasx(&t->thread.fpu);
}
static inline void restore_lasx(struct task_struct *t)
{
if (cpu_has_lasx)
_restore_lasx(&t->thread.fpu);
}
static inline void init_lasx_upper(void)
{
if (cpu_has_lasx)
_init_lasx_upper();
}
static inline void restore_lasx_upper(struct task_struct *t)
{
if (cpu_has_lasx)
_restore_lasx_upper(&t->thread.fpu);
}
#else
static inline void enable_lasx(void) {}
static inline void disable_lasx(void) {}
static inline void save_lasx(struct task_struct *t) {}
static inline void restore_lasx(struct task_struct *t) {}
static inline void init_lasx_upper(void) {}
static inline void restore_lasx_upper(struct task_struct *t) {}
#endif
static inline int thread_lsx_context_live(void)
{
if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
return 0;
return test_thread_flag(TIF_LSX_CTX_LIVE);
}
static inline int thread_lasx_context_live(void)
{
if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx)
return 0;
return test_thread_flag(TIF_LASX_CTX_LIVE);
}
#endif /* _ASM_FPU_H */

View File

@ -41,9 +41,19 @@ struct user_pt_regs {
} __attribute__((aligned(8)));
struct user_fp_state {
uint64_t fpr[32];
uint64_t fcc;
uint32_t fcsr;
uint64_t fpr[32];
uint64_t fcc;
uint32_t fcsr;
};
struct user_lsx_state {
/* 32 registers, 128 bits width per register. */
uint64_t vregs[32*2];
};
struct user_lasx_state {
/* 32 registers, 256 bits width per register. */
uint64_t vregs[32*4];
};
struct user_watch_state {

View File

@ -41,4 +41,22 @@ struct fpu_context {
__u32 fcsr;
};
/* LSX context */
#define LSX_CTX_MAGIC 0x53580001
#define LSX_CTX_ALIGN 16
struct lsx_context {
__u64 regs[2*32];
__u64 fcc;
__u32 fcsr;
};
/* LASX context */
#define LASX_CTX_MAGIC 0x41535801
#define LASX_CTX_ALIGN 32
struct lasx_context {
__u64 regs[4*32];
__u64 fcc;
__u32 fcsr;
};
#endif /* _UAPI_ASM_SIGCONTEXT_H */

View File

@ -116,6 +116,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_FPU;
elf_hwcap |= HWCAP_LOONGARCH_FPU;
}
#ifdef CONFIG_CPU_HAS_LSX
if (config & CPUCFG2_LSX) {
c->options |= LOONGARCH_CPU_LSX;
elf_hwcap |= HWCAP_LOONGARCH_LSX;
}
#endif
#ifdef CONFIG_CPU_HAS_LASX
if (config & CPUCFG2_LASX) {
c->options |= LOONGARCH_CPU_LASX;
elf_hwcap |= HWCAP_LOONGARCH_LASX;
}
#endif
if (config & CPUCFG2_COMPLEX) {
c->options |= LOONGARCH_CPU_COMPLEX;
elf_hwcap |= HWCAP_LOONGARCH_COMPLEX;

View File

@ -145,6 +145,154 @@
movgr2fcsr fcsr0, \tmp0
.endm
.macro sc_save_lsx base
#ifdef CONFIG_CPU_HAS_LSX
EX vst $vr0, \base, (0 * LSX_REG_WIDTH)
EX vst $vr1, \base, (1 * LSX_REG_WIDTH)
EX vst $vr2, \base, (2 * LSX_REG_WIDTH)
EX vst $vr3, \base, (3 * LSX_REG_WIDTH)
EX vst $vr4, \base, (4 * LSX_REG_WIDTH)
EX vst $vr5, \base, (5 * LSX_REG_WIDTH)
EX vst $vr6, \base, (6 * LSX_REG_WIDTH)
EX vst $vr7, \base, (7 * LSX_REG_WIDTH)
EX vst $vr8, \base, (8 * LSX_REG_WIDTH)
EX vst $vr9, \base, (9 * LSX_REG_WIDTH)
EX vst $vr10, \base, (10 * LSX_REG_WIDTH)
EX vst $vr11, \base, (11 * LSX_REG_WIDTH)
EX vst $vr12, \base, (12 * LSX_REG_WIDTH)
EX vst $vr13, \base, (13 * LSX_REG_WIDTH)
EX vst $vr14, \base, (14 * LSX_REG_WIDTH)
EX vst $vr15, \base, (15 * LSX_REG_WIDTH)
EX vst $vr16, \base, (16 * LSX_REG_WIDTH)
EX vst $vr17, \base, (17 * LSX_REG_WIDTH)
EX vst $vr18, \base, (18 * LSX_REG_WIDTH)
EX vst $vr19, \base, (19 * LSX_REG_WIDTH)
EX vst $vr20, \base, (20 * LSX_REG_WIDTH)
EX vst $vr21, \base, (21 * LSX_REG_WIDTH)
EX vst $vr22, \base, (22 * LSX_REG_WIDTH)
EX vst $vr23, \base, (23 * LSX_REG_WIDTH)
EX vst $vr24, \base, (24 * LSX_REG_WIDTH)
EX vst $vr25, \base, (25 * LSX_REG_WIDTH)
EX vst $vr26, \base, (26 * LSX_REG_WIDTH)
EX vst $vr27, \base, (27 * LSX_REG_WIDTH)
EX vst $vr28, \base, (28 * LSX_REG_WIDTH)
EX vst $vr29, \base, (29 * LSX_REG_WIDTH)
EX vst $vr30, \base, (30 * LSX_REG_WIDTH)
EX vst $vr31, \base, (31 * LSX_REG_WIDTH)
#endif
.endm
.macro sc_restore_lsx base
#ifdef CONFIG_CPU_HAS_LSX
EX vld $vr0, \base, (0 * LSX_REG_WIDTH)
EX vld $vr1, \base, (1 * LSX_REG_WIDTH)
EX vld $vr2, \base, (2 * LSX_REG_WIDTH)
EX vld $vr3, \base, (3 * LSX_REG_WIDTH)
EX vld $vr4, \base, (4 * LSX_REG_WIDTH)
EX vld $vr5, \base, (5 * LSX_REG_WIDTH)
EX vld $vr6, \base, (6 * LSX_REG_WIDTH)
EX vld $vr7, \base, (7 * LSX_REG_WIDTH)
EX vld $vr8, \base, (8 * LSX_REG_WIDTH)
EX vld $vr9, \base, (9 * LSX_REG_WIDTH)
EX vld $vr10, \base, (10 * LSX_REG_WIDTH)
EX vld $vr11, \base, (11 * LSX_REG_WIDTH)
EX vld $vr12, \base, (12 * LSX_REG_WIDTH)
EX vld $vr13, \base, (13 * LSX_REG_WIDTH)
EX vld $vr14, \base, (14 * LSX_REG_WIDTH)
EX vld $vr15, \base, (15 * LSX_REG_WIDTH)
EX vld $vr16, \base, (16 * LSX_REG_WIDTH)
EX vld $vr17, \base, (17 * LSX_REG_WIDTH)
EX vld $vr18, \base, (18 * LSX_REG_WIDTH)
EX vld $vr19, \base, (19 * LSX_REG_WIDTH)
EX vld $vr20, \base, (20 * LSX_REG_WIDTH)
EX vld $vr21, \base, (21 * LSX_REG_WIDTH)
EX vld $vr22, \base, (22 * LSX_REG_WIDTH)
EX vld $vr23, \base, (23 * LSX_REG_WIDTH)
EX vld $vr24, \base, (24 * LSX_REG_WIDTH)
EX vld $vr25, \base, (25 * LSX_REG_WIDTH)
EX vld $vr26, \base, (26 * LSX_REG_WIDTH)
EX vld $vr27, \base, (27 * LSX_REG_WIDTH)
EX vld $vr28, \base, (28 * LSX_REG_WIDTH)
EX vld $vr29, \base, (29 * LSX_REG_WIDTH)
EX vld $vr30, \base, (30 * LSX_REG_WIDTH)
EX vld $vr31, \base, (31 * LSX_REG_WIDTH)
#endif
.endm
.macro sc_save_lasx base
#ifdef CONFIG_CPU_HAS_LASX
EX xvst $xr0, \base, (0 * LASX_REG_WIDTH)
EX xvst $xr1, \base, (1 * LASX_REG_WIDTH)
EX xvst $xr2, \base, (2 * LASX_REG_WIDTH)
EX xvst $xr3, \base, (3 * LASX_REG_WIDTH)
EX xvst $xr4, \base, (4 * LASX_REG_WIDTH)
EX xvst $xr5, \base, (5 * LASX_REG_WIDTH)
EX xvst $xr6, \base, (6 * LASX_REG_WIDTH)
EX xvst $xr7, \base, (7 * LASX_REG_WIDTH)
EX xvst $xr8, \base, (8 * LASX_REG_WIDTH)
EX xvst $xr9, \base, (9 * LASX_REG_WIDTH)
EX xvst $xr10, \base, (10 * LASX_REG_WIDTH)
EX xvst $xr11, \base, (11 * LASX_REG_WIDTH)
EX xvst $xr12, \base, (12 * LASX_REG_WIDTH)
EX xvst $xr13, \base, (13 * LASX_REG_WIDTH)
EX xvst $xr14, \base, (14 * LASX_REG_WIDTH)
EX xvst $xr15, \base, (15 * LASX_REG_WIDTH)
EX xvst $xr16, \base, (16 * LASX_REG_WIDTH)
EX xvst $xr17, \base, (17 * LASX_REG_WIDTH)
EX xvst $xr18, \base, (18 * LASX_REG_WIDTH)
EX xvst $xr19, \base, (19 * LASX_REG_WIDTH)
EX xvst $xr20, \base, (20 * LASX_REG_WIDTH)
EX xvst $xr21, \base, (21 * LASX_REG_WIDTH)
EX xvst $xr22, \base, (22 * LASX_REG_WIDTH)
EX xvst $xr23, \base, (23 * LASX_REG_WIDTH)
EX xvst $xr24, \base, (24 * LASX_REG_WIDTH)
EX xvst $xr25, \base, (25 * LASX_REG_WIDTH)
EX xvst $xr26, \base, (26 * LASX_REG_WIDTH)
EX xvst $xr27, \base, (27 * LASX_REG_WIDTH)
EX xvst $xr28, \base, (28 * LASX_REG_WIDTH)
EX xvst $xr29, \base, (29 * LASX_REG_WIDTH)
EX xvst $xr30, \base, (30 * LASX_REG_WIDTH)
EX xvst $xr31, \base, (31 * LASX_REG_WIDTH)
#endif
.endm
.macro sc_restore_lasx base
#ifdef CONFIG_CPU_HAS_LASX
EX xvld $xr0, \base, (0 * LASX_REG_WIDTH)
EX xvld $xr1, \base, (1 * LASX_REG_WIDTH)
EX xvld $xr2, \base, (2 * LASX_REG_WIDTH)
EX xvld $xr3, \base, (3 * LASX_REG_WIDTH)
EX xvld $xr4, \base, (4 * LASX_REG_WIDTH)
EX xvld $xr5, \base, (5 * LASX_REG_WIDTH)
EX xvld $xr6, \base, (6 * LASX_REG_WIDTH)
EX xvld $xr7, \base, (7 * LASX_REG_WIDTH)
EX xvld $xr8, \base, (8 * LASX_REG_WIDTH)
EX xvld $xr9, \base, (9 * LASX_REG_WIDTH)
EX xvld $xr10, \base, (10 * LASX_REG_WIDTH)
EX xvld $xr11, \base, (11 * LASX_REG_WIDTH)
EX xvld $xr12, \base, (12 * LASX_REG_WIDTH)
EX xvld $xr13, \base, (13 * LASX_REG_WIDTH)
EX xvld $xr14, \base, (14 * LASX_REG_WIDTH)
EX xvld $xr15, \base, (15 * LASX_REG_WIDTH)
EX xvld $xr16, \base, (16 * LASX_REG_WIDTH)
EX xvld $xr17, \base, (17 * LASX_REG_WIDTH)
EX xvld $xr18, \base, (18 * LASX_REG_WIDTH)
EX xvld $xr19, \base, (19 * LASX_REG_WIDTH)
EX xvld $xr20, \base, (20 * LASX_REG_WIDTH)
EX xvld $xr21, \base, (21 * LASX_REG_WIDTH)
EX xvld $xr22, \base, (22 * LASX_REG_WIDTH)
EX xvld $xr23, \base, (23 * LASX_REG_WIDTH)
EX xvld $xr24, \base, (24 * LASX_REG_WIDTH)
EX xvld $xr25, \base, (25 * LASX_REG_WIDTH)
EX xvld $xr26, \base, (26 * LASX_REG_WIDTH)
EX xvld $xr27, \base, (27 * LASX_REG_WIDTH)
EX xvld $xr28, \base, (28 * LASX_REG_WIDTH)
EX xvld $xr29, \base, (29 * LASX_REG_WIDTH)
EX xvld $xr30, \base, (30 * LASX_REG_WIDTH)
EX xvld $xr31, \base, (31 * LASX_REG_WIDTH)
#endif
.endm
/*
* Save a thread's fp context.
*/
@ -166,6 +314,76 @@ SYM_FUNC_START(_restore_fp)
jr ra
SYM_FUNC_END(_restore_fp)
#ifdef CONFIG_CPU_HAS_LSX
/*
* Save a thread's LSX vector context.
*/
SYM_FUNC_START(_save_lsx)
lsx_save_all a0 t1 t2
jr ra
SYM_FUNC_END(_save_lsx)
EXPORT_SYMBOL(_save_lsx)
/*
* Restore a thread's LSX vector context.
*/
SYM_FUNC_START(_restore_lsx)
lsx_restore_all a0 t1 t2
jr ra
SYM_FUNC_END(_restore_lsx)
SYM_FUNC_START(_save_lsx_upper)
lsx_save_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_save_lsx_upper)
SYM_FUNC_START(_restore_lsx_upper)
lsx_restore_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_restore_lsx_upper)
SYM_FUNC_START(_init_lsx_upper)
lsx_init_all_upper t1
jr ra
SYM_FUNC_END(_init_lsx_upper)
#endif
#ifdef CONFIG_CPU_HAS_LASX
/*
* Save a thread's LASX vector context.
*/
SYM_FUNC_START(_save_lasx)
lasx_save_all a0 t1 t2
jr ra
SYM_FUNC_END(_save_lasx)
EXPORT_SYMBOL(_save_lasx)
/*
* Restore a thread's LASX vector context.
*/
SYM_FUNC_START(_restore_lasx)
lasx_restore_all a0 t1 t2
jr ra
SYM_FUNC_END(_restore_lasx)
SYM_FUNC_START(_save_lasx_upper)
lasx_save_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_save_lasx_upper)
SYM_FUNC_START(_restore_lasx_upper)
lasx_restore_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_restore_lasx_upper)
SYM_FUNC_START(_init_lasx_upper)
lasx_init_all_upper t1
jr ra
SYM_FUNC_END(_init_lasx_upper)
#endif
/*
* Load the FPU with signalling NANS. This bit pattern we're using has
* the property that no matter whether considered as single or as double
@ -244,6 +462,58 @@ SYM_FUNC_START(_restore_fp_context)
jr ra
SYM_FUNC_END(_restore_fp_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_save_lsx_context)
sc_save_fcc a1, t0, t1
sc_save_fcsr a2, t0
sc_save_lsx a0
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_save_lsx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_restore_lsx_context)
sc_restore_lsx a0
sc_restore_fcc a1, t1, t2
sc_restore_fcsr a2, t1
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_restore_lsx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_save_lasx_context)
sc_save_fcc a1, t0, t1
sc_save_fcsr a2, t0
sc_save_lasx a0
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_save_lasx_context)
/*
* a0: fpregs
* a1: fcc
* a2: fcsr
*/
SYM_FUNC_START(_restore_lasx_context)
sc_restore_lasx a0
sc_restore_fcc a1, t1, t2
sc_restore_fcsr a2, t1
li.w a0, 0 # success
jr ra
SYM_FUNC_END(_restore_lasx_context)
SYM_FUNC_START(fault)
li.w a0, -EFAULT # failure
jr ra

View File

@ -117,8 +117,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*/
preempt_disable();
if (is_fpu_owner())
save_fp(current);
if (is_fpu_owner()) {
if (is_lasx_enabled())
save_lasx(current);
else if (is_lsx_enabled())
save_lsx(current);
else
save_fp(current);
}
preempt_enable();

View File

@ -250,6 +250,90 @@ static int cfg_set(struct task_struct *target,
return 0;
}
#ifdef CONFIG_CPU_HAS_LSX
static void copy_pad_fprs(struct task_struct *target,
const struct user_regset *regset,
struct membuf *to, unsigned int live_sz)
{
int i, j;
unsigned long long fill = ~0ull;
unsigned int cp_sz, pad_sz;
cp_sz = min(regset->size, live_sz);
pad_sz = regset->size - cp_sz;
WARN_ON(pad_sz % sizeof(fill));
for (i = 0; i < NUM_FPU_REGS; i++) {
membuf_write(to, &target->thread.fpu.fpr[i], cp_sz);
for (j = 0; j < (pad_sz / sizeof(fill)); j++) {
membuf_store(to, fill);
}
}
}
static int simd_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
const unsigned int wr_size = NUM_FPU_REGS * regset->size;
if (!tsk_used_math(target)) {
/* The task hasn't used FP or LSX, fill with 0xff */
copy_pad_fprs(target, regset, &to, 0);
} else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) {
/* Copy scalar FP context, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, 8);
#ifdef CONFIG_CPU_HAS_LASX
} else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) {
/* Copy LSX 128 Bit context, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, 16);
#endif
} else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) {
/* Trivially copy the vector registers */
membuf_write(&to, &target->thread.fpu.fpr, wr_size);
} else {
/* Copy as much context as possible, fill the rest with 0xff */
copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0]));
}
return 0;
}
static int simd_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
const unsigned int wr_size = NUM_FPU_REGS * regset->size;
unsigned int cp_sz;
int i, err, start;
init_fp_ctx(target);
if (sizeof(target->thread.fpu.fpr[0]) == regset->size) {
/* Trivially copy the vector registers */
err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.fpr,
0, wr_size);
} else {
/* Copy as much context as possible */
cp_sz = min_t(unsigned int, regset->size,
sizeof(target->thread.fpu.fpr[0]));
i = start = err = 0;
for (; i < NUM_FPU_REGS; i++, start += regset->size) {
err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.fpr[i],
start, start + cp_sz);
}
}
return err;
}
#endif /* CONFIG_CPU_HAS_LSX */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
@ -708,6 +792,12 @@ enum loongarch_regset {
REGSET_GPR,
REGSET_FPR,
REGSET_CPUCFG,
#ifdef CONFIG_CPU_HAS_LSX
REGSET_LSX,
#endif
#ifdef CONFIG_CPU_HAS_LASX
REGSET_LASX,
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
REGSET_HW_BREAK,
REGSET_HW_WATCH,
@ -739,6 +829,26 @@ static const struct user_regset loongarch64_regsets[] = {
.regset_get = cfg_get,
.set = cfg_set,
},
#ifdef CONFIG_CPU_HAS_LSX
[REGSET_LSX] = {
.core_note_type = NT_LOONGARCH_LSX,
.n = NUM_FPU_REGS,
.size = 16,
.align = 16,
.regset_get = simd_get,
.set = simd_set,
},
#endif
#ifdef CONFIG_CPU_HAS_LASX
[REGSET_LASX] = {
.core_note_type = NT_LOONGARCH_LASX,
.n = NUM_FPU_REGS,
.size = 32,
.align = 32,
.regset_get = simd_get,
.set = simd_set,
},
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
.core_note_type = NT_LOONGARCH_HW_BREAK,

View File

@ -50,6 +50,14 @@ extern asmlinkage int
_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr);
extern asmlinkage int
_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr);
extern asmlinkage int
_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int
_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int
_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
extern asmlinkage int
_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
struct rt_sigframe {
struct siginfo rs_info;
@ -65,6 +73,8 @@ struct extctx_layout {
unsigned long size;
unsigned int flags;
struct _ctx_layout fpu;
struct _ctx_layout lsx;
struct _ctx_layout lasx;
struct _ctx_layout end;
};
@ -115,6 +125,96 @@ static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx)
return err;
}
static int copy_lsx_to_sigcontext(struct lsx_context __user *ctx)
{
int i;
int err = 0;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 0),
&regs[2*i]);
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 1),
&regs[2*i+1]);
}
err |= __put_user(current->thread.fpu.fcc, fcc);
err |= __put_user(current->thread.fpu.fcsr, fcsr);
return err;
}
static int copy_lsx_from_sigcontext(struct lsx_context __user *ctx)
{
int i;
int err = 0;
u64 fpr_val;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __get_user(fpr_val, &regs[2*i]);
set_fpr64(&current->thread.fpu.fpr[i], 0, fpr_val);
err |= __get_user(fpr_val, &regs[2*i+1]);
set_fpr64(&current->thread.fpu.fpr[i], 1, fpr_val);
}
err |= __get_user(current->thread.fpu.fcc, fcc);
err |= __get_user(current->thread.fpu.fcsr, fcsr);
return err;
}
static int copy_lasx_to_sigcontext(struct lasx_context __user *ctx)
{
int i;
int err = 0;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 0),
&regs[4*i]);
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 1),
&regs[4*i+1]);
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 2),
&regs[4*i+2]);
err |= __put_user(get_fpr64(&current->thread.fpu.fpr[i], 3),
&regs[4*i+3]);
}
err |= __put_user(current->thread.fpu.fcc, fcc);
err |= __put_user(current->thread.fpu.fcsr, fcsr);
return err;
}
static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
{
int i;
int err = 0;
u64 fpr_val;
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
for (i = 0; i < NUM_FPU_REGS; i++) {
err |= __get_user(fpr_val, &regs[4*i]);
set_fpr64(&current->thread.fpu.fpr[i], 0, fpr_val);
err |= __get_user(fpr_val, &regs[4*i+1]);
set_fpr64(&current->thread.fpu.fpr[i], 1, fpr_val);
err |= __get_user(fpr_val, &regs[4*i+2]);
set_fpr64(&current->thread.fpu.fpr[i], 2, fpr_val);
err |= __get_user(fpr_val, &regs[4*i+3]);
set_fpr64(&current->thread.fpu.fpr[i], 3, fpr_val);
}
err |= __get_user(current->thread.fpu.fcc, fcc);
err |= __get_user(current->thread.fpu.fcsr, fcsr);
return err;
}
/*
* Wrappers for the assembly _{save,restore}_fp_context functions.
*/
@ -136,6 +236,42 @@ static int restore_hw_fpu_context(struct fpu_context __user *ctx)
return _restore_fp_context(regs, fcc, fcsr);
}
static int save_hw_lsx_context(struct lsx_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
return _save_lsx_context(regs, fcc, fcsr);
}
static int restore_hw_lsx_context(struct lsx_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
return _restore_lsx_context(regs, fcc, fcsr);
}
static int save_hw_lasx_context(struct lasx_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
return _save_lasx_context(regs, fcc, fcsr);
}
static int restore_hw_lasx_context(struct lasx_context __user *ctx)
{
uint64_t __user *regs = (uint64_t *)&ctx->regs;
uint64_t __user *fcc = &ctx->fcc;
uint32_t __user *fcsr = &ctx->fcsr;
return _restore_lasx_context(regs, fcc, fcsr);
}
static int fcsr_pending(unsigned int __user *fcsr)
{
int err, sig = 0;
@ -227,6 +363,162 @@ static int protected_restore_fpu_context(struct extctx_layout *extctx)
return err ?: sig;
}
static int protected_save_lsx_context(struct extctx_layout *extctx)
{
int err = 0;
struct sctx_info __user *info = extctx->lsx.addr;
struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs;
uint64_t __user *fcc = &lsx_ctx->fcc;
uint32_t __user *fcsr = &lsx_ctx->fcsr;
while (1) {
lock_fpu_owner();
if (is_lsx_enabled())
err = save_hw_lsx_context(lsx_ctx);
else {
if (is_fpu_owner())
save_fp(current);
err = copy_lsx_to_sigcontext(lsx_ctx);
}
unlock_fpu_owner();
err |= __put_user(LSX_CTX_MAGIC, &info->magic);
err |= __put_user(extctx->lsx.size, &info->size);
if (likely(!err))
break;
/* Touch the LSX context and try again */
err = __put_user(0, &regs[0]) |
__put_user(0, &regs[32*2-1]) |
__put_user(0, fcc) |
__put_user(0, fcsr);
if (err)
return err; /* really bad sigcontext */
}
return err;
}
static int protected_restore_lsx_context(struct extctx_layout *extctx)
{
int err = 0, sig = 0, tmp __maybe_unused;
struct sctx_info __user *info = extctx->lsx.addr;
struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs;
uint64_t __user *fcc = &lsx_ctx->fcc;
uint32_t __user *fcsr = &lsx_ctx->fcsr;
err = sig = fcsr_pending(fcsr);
if (err < 0)
return err;
while (1) {
lock_fpu_owner();
if (is_lsx_enabled())
err = restore_hw_lsx_context(lsx_ctx);
else {
err = copy_lsx_from_sigcontext(lsx_ctx);
if (is_fpu_owner())
restore_fp(current);
}
unlock_fpu_owner();
if (likely(!err))
break;
/* Touch the LSX context and try again */
err = __get_user(tmp, &regs[0]) |
__get_user(tmp, &regs[32*2-1]) |
__get_user(tmp, fcc) |
__get_user(tmp, fcsr);
if (err)
break; /* really bad sigcontext */
}
return err ?: sig;
}
static int protected_save_lasx_context(struct extctx_layout *extctx)
{
int err = 0;
struct sctx_info __user *info = extctx->lasx.addr;
struct lasx_context __user *lasx_ctx =
(struct lasx_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs;
uint64_t __user *fcc = &lasx_ctx->fcc;
uint32_t __user *fcsr = &lasx_ctx->fcsr;
while (1) {
lock_fpu_owner();
if (is_lasx_enabled())
err = save_hw_lasx_context(lasx_ctx);
else {
if (is_lsx_enabled())
save_lsx(current);
else if (is_fpu_owner())
save_fp(current);
err = copy_lasx_to_sigcontext(lasx_ctx);
}
unlock_fpu_owner();
err |= __put_user(LASX_CTX_MAGIC, &info->magic);
err |= __put_user(extctx->lasx.size, &info->size);
if (likely(!err))
break;
/* Touch the LASX context and try again */
err = __put_user(0, &regs[0]) |
__put_user(0, &regs[32*4-1]) |
__put_user(0, fcc) |
__put_user(0, fcsr);
if (err)
return err; /* really bad sigcontext */
}
return err;
}
static int protected_restore_lasx_context(struct extctx_layout *extctx)
{
int err = 0, sig = 0, tmp __maybe_unused;
struct sctx_info __user *info = extctx->lasx.addr;
struct lasx_context __user *lasx_ctx =
(struct lasx_context *)get_ctx_through_ctxinfo(info);
uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs;
uint64_t __user *fcc = &lasx_ctx->fcc;
uint32_t __user *fcsr = &lasx_ctx->fcsr;
err = sig = fcsr_pending(fcsr);
if (err < 0)
return err;
while (1) {
lock_fpu_owner();
if (is_lasx_enabled())
err = restore_hw_lasx_context(lasx_ctx);
else {
err = copy_lasx_from_sigcontext(lasx_ctx);
if (is_lsx_enabled())
restore_lsx(current);
else if (is_fpu_owner())
restore_fp(current);
}
unlock_fpu_owner();
if (likely(!err))
break;
/* Touch the LASX context and try again */
err = __get_user(tmp, &regs[0]) |
__get_user(tmp, &regs[32*4-1]) |
__get_user(tmp, fcc) |
__get_user(tmp, fcsr);
if (err)
break; /* really bad sigcontext */
}
return err ?: sig;
}
static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
struct extctx_layout *extctx)
{
@ -240,7 +532,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
for (i = 1; i < 32; i++)
err |= __put_user(regs->regs[i], &sc->sc_regs[i]);
if (extctx->fpu.addr)
if (extctx->lasx.addr)
err |= protected_save_lasx_context(extctx);
else if (extctx->lsx.addr)
err |= protected_save_lsx_context(extctx);
else if (extctx->fpu.addr)
err |= protected_save_fpu_context(extctx);
/* Set the "end" magic */
@ -274,6 +570,20 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
extctx->fpu.addr = info;
break;
case LSX_CTX_MAGIC:
if (size < (sizeof(struct sctx_info) +
sizeof(struct lsx_context)))
goto invalid;
extctx->lsx.addr = info;
break;
case LASX_CTX_MAGIC:
if (size < (sizeof(struct sctx_info) +
sizeof(struct lasx_context)))
goto invalid;
extctx->lasx.addr = info;
break;
default:
goto invalid;
}
@ -319,7 +629,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
for (i = 1; i < 32; i++)
err |= __get_user(regs->regs[i], &sc->sc_regs[i]);
if (extctx.fpu.addr)
if (extctx.lasx.addr)
err |= protected_restore_lasx_context(&extctx);
else if (extctx.lsx.addr)
err |= protected_restore_lsx_context(&extctx);
else if (extctx.fpu.addr)
err |= protected_restore_fpu_context(&extctx);
bad:
@ -375,7 +689,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
extctx->size += extctx->end.size;
if (extctx->flags & SC_USED_FP) {
if (cpu_has_fpu)
if (cpu_has_lasx && thread_lasx_context_live())
new_sp = extframe_alloc(extctx, &extctx->lasx,
sizeof(struct lasx_context), LASX_CTX_ALIGN, new_sp);
else if (cpu_has_lsx && thread_lsx_context_live())
new_sp = extframe_alloc(extctx, &extctx->lsx,
sizeof(struct lsx_context), LSX_CTX_ALIGN, new_sp);
else if (cpu_has_fpu)
new_sp = extframe_alloc(extctx, &extctx->fpu,
sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
}

View File

@ -852,12 +852,67 @@ static void init_restore_fp(void)
BUG_ON(!is_fp_enabled());
}
static void init_restore_lsx(void)
{
enable_lsx();
if (!thread_lsx_context_live()) {
/* First time LSX context user */
init_restore_fp();
init_lsx_upper();
set_thread_flag(TIF_LSX_CTX_LIVE);
} else {
if (!is_simd_owner()) {
if (is_fpu_owner()) {
restore_lsx_upper(current);
} else {
__own_fpu();
restore_lsx(current);
}
}
}
set_thread_flag(TIF_USEDSIMD);
BUG_ON(!is_fp_enabled());
BUG_ON(!is_lsx_enabled());
}
static void init_restore_lasx(void)
{
enable_lasx();
if (!thread_lasx_context_live()) {
/* First time LASX context user */
init_restore_lsx();
init_lasx_upper();
set_thread_flag(TIF_LASX_CTX_LIVE);
} else {
if (is_fpu_owner() || is_simd_owner()) {
init_restore_lsx();
restore_lasx_upper(current);
} else {
__own_fpu();
enable_lsx();
restore_lasx(current);
}
}
set_thread_flag(TIF_USEDSIMD);
BUG_ON(!is_fp_enabled());
BUG_ON(!is_lsx_enabled());
BUG_ON(!is_lasx_enabled());
}
asmlinkage void noinstr do_fpu(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
die_if_kernel("do_fpu invoked from kernel context!", regs);
BUG_ON(is_lsx_enabled());
BUG_ON(is_lasx_enabled());
preempt_disable();
init_restore_fp();
@ -872,9 +927,20 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
force_sig(SIGILL);
local_irq_disable();
if (!cpu_has_lsx) {
force_sig(SIGILL);
goto out;
}
die_if_kernel("do_lsx invoked from kernel context!", regs);
BUG_ON(is_lasx_enabled());
preempt_disable();
init_restore_lsx();
preempt_enable();
out:
local_irq_disable();
irqentry_exit(regs, state);
}
@ -883,9 +949,19 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs)
irqentry_state_t state = irqentry_enter(regs);
local_irq_enable();
force_sig(SIGILL);
local_irq_disable();
if (!cpu_has_lasx) {
force_sig(SIGILL);
goto out;
}
die_if_kernel("do_lasx invoked from kernel context!", regs);
preempt_disable();
init_restore_lasx();
preempt_enable();
out:
local_irq_disable();
irqentry_exit(regs, state);
}