mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
arm64: vDSO: Wire up getrandom() vDSO implementation
Hook up the generic vDSO implementation to the aarch64 vDSO data page. The _vdso_rng_data required data is placed within the _vdso_data vvar page, by using a offset larger than the vdso_data. The vDSO function requires a ChaCha20 implementation that does not write to the stack, and that can do an entire ChaCha20 permutation. The one provided uses NEON on the permute operation, with a fallback to the syscall for chips that do not support AdvSIMD. This also passes the vdso_test_chacha test along with vdso_test_getrandom. The vdso_test_getrandom bench-single result on Neoverse-N1 shows: vdso: 25000000 times in 0.783884250 seconds libc: 25000000 times in 8.780275399 seconds syscall: 25000000 times in 8.786581518 seconds A small fixup to arch/arm64/include/asm/mman.h was required to avoid pulling kernel code into the vDSO, similar to what's already done in arch/arm64/include/asm/rwonce.h. Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Acked-by: Will Deacon <will@kernel.org> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
This commit is contained in:
parent
2c2ca3416b
commit
712676ea2b
@ -262,6 +262,7 @@ config ARM64
|
||||
select TRACE_IRQFLAGS_NMI_SUPPORT
|
||||
select HAVE_SOFTIRQ_ON_OWN_STACK
|
||||
select USER_STACKTRACE_SUPPORT
|
||||
select VDSO_GETRANDOM
|
||||
help
|
||||
ARM 64-bit (AArch64) Linux support.
|
||||
|
||||
|
@ -2,9 +2,11 @@
|
||||
#ifndef __ASM_MMAN_H__
|
||||
#define __ASM_MMAN_H__
|
||||
|
||||
#include <uapi/asm/mman.h>
|
||||
|
||||
#ifndef BUILD_VDSO
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/types.h>
|
||||
#include <uapi/asm/mman.h>
|
||||
|
||||
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
|
||||
unsigned long pkey __always_unused)
|
||||
@ -60,4 +62,6 @@ static inline bool arch_validate_flags(unsigned long vm_flags)
|
||||
}
|
||||
#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags)
|
||||
|
||||
#endif /* !BUILD_VDSO */
|
||||
|
||||
#endif /* ! __ASM_MMAN_H__ */
|
||||
|
50
arch/arm64/include/asm/vdso/getrandom.h
Normal file
50
arch/arm64/include/asm/vdso/getrandom.h
Normal file
@ -0,0 +1,50 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef __ASM_VDSO_GETRANDOM_H
|
||||
#define __ASM_VDSO_GETRANDOM_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/vdso/vsyscall.h>
|
||||
#include <vdso/datapage.h>
|
||||
|
||||
/**
|
||||
* getrandom_syscall - Invoke the getrandom() syscall.
|
||||
* @buffer: Destination buffer to fill with random bytes.
|
||||
* @len: Size of @buffer in bytes.
|
||||
* @flags: Zero or more GRND_* flags.
|
||||
* Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
|
||||
*/
|
||||
static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags)
|
||||
{
|
||||
register void *buffer asm ("x0") = _buffer;
|
||||
register size_t len asm ("x1") = _len;
|
||||
register unsigned int flags asm ("x2") = _flags;
|
||||
register long ret asm ("x0");
|
||||
register long nr asm ("x8") = __NR_getrandom;
|
||||
|
||||
asm volatile(
|
||||
" svc #0\n"
|
||||
: "=r" (ret)
|
||||
: "r" (buffer), "r" (len), "r" (flags), "r" (nr)
|
||||
: "memory");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
|
||||
{
|
||||
/*
|
||||
* The RNG data is in the real VVAR data page, but if a task belongs to a time namespace
|
||||
* then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_
|
||||
* PAGE_OFFSET points to the real VVAR page.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS)
|
||||
return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * (1UL << CONFIG_PAGE_SHIFT);
|
||||
return &_vdso_rng_data;
|
||||
}
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif /* __ASM_VDSO_GETRANDOM_H */
|
@ -2,11 +2,19 @@
|
||||
#ifndef __ASM_VDSO_VSYSCALL_H
|
||||
#define __ASM_VDSO_VSYSCALL_H
|
||||
|
||||
#define __VDSO_RND_DATA_OFFSET 480
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/timekeeper_internal.h>
|
||||
#include <vdso/datapage.h>
|
||||
|
||||
enum vvar_pages {
|
||||
VVAR_DATA_PAGE_OFFSET,
|
||||
VVAR_TIMENS_PAGE_OFFSET,
|
||||
VVAR_NR_PAGES,
|
||||
};
|
||||
|
||||
#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
|
||||
|
||||
extern struct vdso_data *vdso_data;
|
||||
@ -21,6 +29,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void)
|
||||
}
|
||||
#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
|
||||
|
||||
static __always_inline
|
||||
struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
|
||||
{
|
||||
return (void *)vdso_data + __VDSO_RND_DATA_OFFSET;
|
||||
}
|
||||
#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
|
||||
|
||||
static __always_inline
|
||||
void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
|
||||
{
|
||||
|
@ -34,12 +34,6 @@ enum vdso_abi {
|
||||
VDSO_ABI_AA32,
|
||||
};
|
||||
|
||||
enum vvar_pages {
|
||||
VVAR_DATA_PAGE_OFFSET,
|
||||
VVAR_TIMENS_PAGE_OFFSET,
|
||||
VVAR_NR_PAGES,
|
||||
};
|
||||
|
||||
struct vdso_abi_info {
|
||||
const char *name;
|
||||
const char *vdso_code_start;
|
||||
|
@ -9,7 +9,7 @@
|
||||
# Include the generic Makefile to check the built vdso.
|
||||
include $(srctree)/lib/vdso/Makefile
|
||||
|
||||
obj-vdso := vgettimeofday.o note.o sigreturn.o
|
||||
obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
|
||||
|
||||
# Build rules
|
||||
targets := $(obj-vdso) vdso.so vdso.so.dbg
|
||||
@ -34,19 +34,28 @@ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
|
||||
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
|
||||
|
||||
# -Wmissing-prototypes and -Wmissing-declarations are removed from
|
||||
# the CFLAGS of vgettimeofday.c to make possible to build the
|
||||
# kernel with CONFIG_WERROR enabled.
|
||||
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
|
||||
$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
|
||||
$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
|
||||
-Wmissing-prototypes -Wmissing-declarations
|
||||
# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled.
|
||||
CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
|
||||
$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
|
||||
$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
|
||||
-Wmissing-prototypes -Wmissing-declarations
|
||||
|
||||
CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
|
||||
CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
|
||||
|
||||
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO)
|
||||
CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO)
|
||||
|
||||
CFLAGS_vgettimeofday.o = $(CC_FLAGS_ADD_VDSO)
|
||||
CFLAGS_vgetrandom.o = $(CC_FLAGS_ADD_VDSO)
|
||||
|
||||
ifneq ($(c-gettimeofday-y),)
|
||||
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
|
||||
endif
|
||||
|
||||
ifneq ($(c-getrandom-y),)
|
||||
CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
|
||||
endif
|
||||
|
||||
targets += vdso.lds
|
||||
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
|
||||
|
||||
|
@ -11,7 +11,9 @@
|
||||
#include <linux/const.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/vdso/vsyscall.h>
|
||||
#include <asm-generic/vmlinux.lds.h>
|
||||
#include <vdso/datapage.h>
|
||||
|
||||
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
|
||||
OUTPUT_ARCH(aarch64)
|
||||
@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64)
|
||||
SECTIONS
|
||||
{
|
||||
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
|
||||
PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
|
||||
#ifdef CONFIG_TIME_NS
|
||||
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
|
||||
#endif
|
||||
@ -102,6 +105,7 @@ VERSION
|
||||
__kernel_gettimeofday;
|
||||
__kernel_clock_gettime;
|
||||
__kernel_clock_getres;
|
||||
__kernel_getrandom;
|
||||
local: *;
|
||||
};
|
||||
}
|
||||
|
172
arch/arm64/kernel/vdso/vgetrandom-chacha.S
Normal file
172
arch/arm64/kernel/vdso/vgetrandom-chacha.S
Normal file
@ -0,0 +1,172 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
|
||||
#define state0 v0
|
||||
#define state1 v1
|
||||
#define state2 v2
|
||||
#define state3 v3
|
||||
#define copy0 v4
|
||||
#define copy0_q q4
|
||||
#define copy1 v5
|
||||
#define copy2 v6
|
||||
#define copy3 v7
|
||||
#define copy3_d d7
|
||||
#define one_d d16
|
||||
#define one_q q16
|
||||
#define one_v v16
|
||||
#define tmp v17
|
||||
#define rot8 v18
|
||||
|
||||
/*
|
||||
* ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive
|
||||
* number of blocks of output with nonce 0, taking an input key and 8-bytes
|
||||
* counter. Importantly does not spill to the stack.
|
||||
*
|
||||
* This implementation avoids d8-d15 because they are callee-save in user
|
||||
* space.
|
||||
*
|
||||
* void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
|
||||
* const uint8_t *key,
|
||||
* uint32_t *counter,
|
||||
* size_t nblocks)
|
||||
*
|
||||
* x0: output bytes
|
||||
* x1: 32-byte key input
|
||||
* x2: 8-byte counter input/output
|
||||
* x3: number of 64-byte block to write to output
|
||||
*/
|
||||
SYM_FUNC_START(__arch_chacha20_blocks_nostack)
|
||||
|
||||
/* copy0 = "expand 32-byte k" */
|
||||
mov_q x8, 0x3320646e61707865
|
||||
mov_q x9, 0x6b20657479622d32
|
||||
mov copy0.d[0], x8
|
||||
mov copy0.d[1], x9
|
||||
|
||||
/* copy1,copy2 = key */
|
||||
ld1 { copy1.4s, copy2.4s }, [x1]
|
||||
/* copy3 = counter || zero nonce */
|
||||
ld1 { copy3.2s }, [x2]
|
||||
|
||||
movi one_v.2s, #1
|
||||
uzp1 one_v.4s, one_v.4s, one_v.4s
|
||||
|
||||
.Lblock:
|
||||
/* copy state to auxiliary vectors for the final add after the permute. */
|
||||
mov state0.16b, copy0.16b
|
||||
mov state1.16b, copy1.16b
|
||||
mov state2.16b, copy2.16b
|
||||
mov state3.16b, copy3.16b
|
||||
|
||||
mov w4, 20
|
||||
.Lpermute:
|
||||
/*
|
||||
* Permute one 64-byte block where the state matrix is stored in the four NEON
|
||||
* registers state0-state3. It performs matrix operations on four words in parallel,
|
||||
* but requires shuffling to rearrange the words after each round.
|
||||
*/
|
||||
|
||||
.Ldoubleround:
|
||||
/* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
|
||||
add state0.4s, state0.4s, state1.4s
|
||||
eor state3.16b, state3.16b, state0.16b
|
||||
rev32 state3.8h, state3.8h
|
||||
|
||||
/* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
|
||||
add state2.4s, state2.4s, state3.4s
|
||||
eor tmp.16b, state1.16b, state2.16b
|
||||
shl state1.4s, tmp.4s, #12
|
||||
sri state1.4s, tmp.4s, #20
|
||||
|
||||
/* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
|
||||
add state0.4s, state0.4s, state1.4s
|
||||
eor tmp.16b, state3.16b, state0.16b
|
||||
shl state3.4s, tmp.4s, #8
|
||||
sri state3.4s, tmp.4s, #24
|
||||
|
||||
/* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
|
||||
add state2.4s, state2.4s, state3.4s
|
||||
eor tmp.16b, state1.16b, state2.16b
|
||||
shl state1.4s, tmp.4s, #7
|
||||
sri state1.4s, tmp.4s, #25
|
||||
|
||||
/* state1[0,1,2,3] = state1[1,2,3,0] */
|
||||
ext state1.16b, state1.16b, state1.16b, #4
|
||||
/* state2[0,1,2,3] = state2[2,3,0,1] */
|
||||
ext state2.16b, state2.16b, state2.16b, #8
|
||||
/* state3[0,1,2,3] = state3[1,2,3,0] */
|
||||
ext state3.16b, state3.16b, state3.16b, #12
|
||||
|
||||
/* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
|
||||
add state0.4s, state0.4s, state1.4s
|
||||
eor state3.16b, state3.16b, state0.16b
|
||||
rev32 state3.8h, state3.8h
|
||||
|
||||
/* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
|
||||
add state2.4s, state2.4s, state3.4s
|
||||
eor tmp.16b, state1.16b, state2.16b
|
||||
shl state1.4s, tmp.4s, #12
|
||||
sri state1.4s, tmp.4s, #20
|
||||
|
||||
/* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
|
||||
add state0.4s, state0.4s, state1.4s
|
||||
eor tmp.16b, state3.16b, state0.16b
|
||||
shl state3.4s, tmp.4s, #8
|
||||
sri state3.4s, tmp.4s, #24
|
||||
|
||||
/* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
|
||||
add state2.4s, state2.4s, state3.4s
|
||||
eor tmp.16b, state1.16b, state2.16b
|
||||
shl state1.4s, tmp.4s, #7
|
||||
sri state1.4s, tmp.4s, #25
|
||||
|
||||
/* state1[0,1,2,3] = state1[3,0,1,2] */
|
||||
ext state1.16b, state1.16b, state1.16b, #12
|
||||
/* state2[0,1,2,3] = state2[2,3,0,1] */
|
||||
ext state2.16b, state2.16b, state2.16b, #8
|
||||
/* state3[0,1,2,3] = state3[1,2,3,0] */
|
||||
ext state3.16b, state3.16b, state3.16b, #4
|
||||
|
||||
subs w4, w4, #2
|
||||
b.ne .Ldoubleround
|
||||
|
||||
/* output0 = state0 + state0 */
|
||||
add state0.4s, state0.4s, copy0.4s
|
||||
/* output1 = state1 + state1 */
|
||||
add state1.4s, state1.4s, copy1.4s
|
||||
/* output2 = state2 + state2 */
|
||||
add state2.4s, state2.4s, copy2.4s
|
||||
/* output2 = state3 + state3 */
|
||||
add state3.4s, state3.4s, copy3.4s
|
||||
st1 { state0.16b - state3.16b }, [x0]
|
||||
|
||||
/*
|
||||
* ++copy3.counter, the 'add' clears the upper half of the SIMD register
|
||||
* which is the expected behaviour here.
|
||||
*/
|
||||
add copy3_d, copy3_d, one_d
|
||||
|
||||
/* output += 64, --nblocks */
|
||||
add x0, x0, 64
|
||||
subs x3, x3, #1
|
||||
b.ne .Lblock
|
||||
|
||||
/* counter = copy3.counter */
|
||||
st1 { copy3.2s }, [x2]
|
||||
|
||||
/* Zero out the potentially sensitive regs, in case nothing uses these again. */
|
||||
movi state0.16b, #0
|
||||
movi state1.16b, #0
|
||||
movi state2.16b, #0
|
||||
movi state3.16b, #0
|
||||
movi copy1.16b, #0
|
||||
movi copy2.16b, #0
|
||||
ret
|
||||
SYM_FUNC_END(__arch_chacha20_blocks_nostack)
|
||||
|
||||
emit_aarch64_feature_1_and
|
15
arch/arm64/kernel/vdso/vgetrandom.c
Normal file
15
arch/arm64/kernel/vdso/vgetrandom.c
Normal file
@ -0,0 +1,15 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <uapi/asm-generic/errno.h>
|
||||
|
||||
typeof(__cvdso_getrandom) __kernel_getrandom;
|
||||
|
||||
ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
|
||||
{
|
||||
if (alternative_has_cap_likely(ARM64_HAS_FPSIMD))
|
||||
return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
|
||||
|
||||
if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
|
||||
return -ENOSYS;
|
||||
return getrandom_syscall(buffer, len, flags);
|
||||
}
|
1
tools/arch/arm64/vdso
Symbolic link
1
tools/arch/arm64/vdso
Symbolic link
@ -0,0 +1 @@
|
||||
../../../arch/arm64/kernel/vdso
|
@ -2,6 +2,8 @@
|
||||
#ifndef _TOOLS_LINUX_COMPILER_H_
|
||||
#define _TOOLS_LINUX_COMPILER_H_
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/compiler_types.h>
|
||||
|
||||
#ifndef __compiletime_error
|
||||
@ -224,4 +226,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
||||
__asm__ ("" : "=r" (var) : "0" (var))
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _TOOLS_LINUX_COMPILER_H */
|
||||
|
@ -9,7 +9,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
|
||||
TEST_GEN_PROGS += vdso_standalone_test_x86
|
||||
endif
|
||||
TEST_GEN_PROGS += vdso_test_correctness
|
||||
ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch))
|
||||
ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64))
|
||||
TEST_GEN_PROGS += vdso_test_getrandom
|
||||
TEST_GEN_PROGS += vdso_test_chacha
|
||||
endif
|
||||
@ -40,5 +40,6 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
|
||||
$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S
|
||||
$(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
|
||||
-idirafter $(top_srcdir)/arch/$(SRCARCH)/include \
|
||||
-idirafter $(top_srcdir)/arch/$(SRCARCH)/include/generated \
|
||||
-idirafter $(top_srcdir)/include \
|
||||
-D__ASSEMBLY__ -Wa,--noexecstack
|
||||
|
Loading…
Reference in New Issue
Block a user