linux/arch/x86/kernel/shstk.c
Rick Edgecombe b2926a36b9 x86/shstk: Handle thread shadow stack
When a process is duplicated, but the child shares the address space with
the parent, there is potential for the threads sharing a single stack to
cause conflicts for each other. In the normal non-CET case this is handled
in two ways.

With regular CLONE_VM a new stack is provided by userspace such that the
parent and child have different stacks.

For vfork, the parent is suspended until the child exits. So as long as
the child doesn't return from the vfork()/CLONE_VFORK calling function and
sticks to a limited set of operations, the parent and child can share the
same stack.

For shadow stack, these scenarios present similar sharing problems. For the
CLONE_VM case, the child and the parent must have separate shadow stacks.
Instead of changing clone to take a shadow stack, have the kernel just
allocate one and switch to it.

Use stack_size passed from clone3() syscall for thread shadow stack size. A
compat-mode thread shadow stack size is further reduced to 1/4. This
allows more threads to run in a 32-bit address space. The clone() does not
pass stack_size, which was added to clone3(). In that case, use
RLIMIT_STACK size and cap to 4 GB.

For shadow stack enabled vfork(), the parent and child can share the same
shadow stack, like they can share a normal stack. Since the parent is
suspended until the child terminates, the child will not interfere with
the parent while executing as long as it doesn't return from the vfork()
and overwrite up the shadow stack. The child can safely overwrite down
the shadow stack, as the parent can just overwrite this later. So CET does
not add any additional limitations for vfork().

Free the shadow stack on thread exit by doing it in mm_release(). Skip
this when exiting a vfork() child since the stack is shared in the
parent.

During this operation, the shadow stack pointer of the new thread needs
to be updated to point to the newly allocated shadow stack. Since the
ability to do this is confined to the FPU subsystem, change
fpu_clone() to take the new shadow stack pointer, and update it
internally inside the FPU subsystem. This part was suggested by Thomas
Gleixner.

Co-developed-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/all/20230613001108.3040476-30-rick.p.edgecombe%40intel.com
2023-08-02 15:01:50 -07:00

227 lines
4.9 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* shstk.c - Intel shadow stack support
*
* Copyright (c) 2021, Intel Corporation.
* Yu-cheng Yu <yu-cheng.yu@intel.com>
*/
#include <linux/sched.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/sched/signal.h>
#include <linux/compat.h>
#include <linux/sizes.h>
#include <linux/user.h>
#include <asm/msr.h>
#include <asm/fpu/xstate.h>
#include <asm/fpu/types.h>
#include <asm/shstk.h>
#include <asm/special_insns.h>
#include <asm/fpu/api.h>
#include <asm/prctl.h>
static bool features_enabled(unsigned long features)
{
return current->thread.features & features;
}
static void features_set(unsigned long features)
{
current->thread.features |= features;
}
static void features_clr(unsigned long features)
{
current->thread.features &= ~features;
}
static unsigned long alloc_shstk(unsigned long size)
{
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
struct mm_struct *mm = current->mm;
unsigned long addr, unused;
mmap_write_lock(mm);
addr = do_mmap(NULL, 0, size, PROT_READ, flags,
VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
mmap_write_unlock(mm);
return addr;
}
static unsigned long adjust_shstk_size(unsigned long size)
{
if (size)
return PAGE_ALIGN(size);
return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
}
static void unmap_shadow_stack(u64 base, u64 size)
{
while (1) {
int r;
r = vm_munmap(base, size);
/*
* vm_munmap() returns -EINTR when mmap_lock is held by
* something else, and that lock should not be held for a
* long time. Retry it for the case.
*/
if (r == -EINTR) {
cond_resched();
continue;
}
/*
* For all other types of vm_munmap() failure, either the
* system is out of memory or there is bug.
*/
WARN_ON_ONCE(r);
break;
}
}
static int shstk_setup(void)
{
struct thread_shstk *shstk = &current->thread.shstk;
unsigned long addr, size;
/* Already enabled */
if (features_enabled(ARCH_SHSTK_SHSTK))
return 0;
/* Also not supported for 32 bit and x32 */
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
return -EOPNOTSUPP;
size = adjust_shstk_size(0);
addr = alloc_shstk(size);
if (IS_ERR_VALUE(addr))
return PTR_ERR((void *)addr);
fpregs_lock_and_load();
wrmsrl(MSR_IA32_PL3_SSP, addr + size);
wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
fpregs_unlock();
shstk->base = addr;
shstk->size = size;
features_set(ARCH_SHSTK_SHSTK);
return 0;
}
void reset_thread_features(void)
{
memset(&current->thread.shstk, 0, sizeof(struct thread_shstk));
current->thread.features = 0;
current->thread.features_locked = 0;
}
unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
unsigned long stack_size)
{
struct thread_shstk *shstk = &tsk->thread.shstk;
unsigned long addr, size;
/*
* If shadow stack is not enabled on the new thread, skip any
* switch to a new shadow stack.
*/
if (!features_enabled(ARCH_SHSTK_SHSTK))
return 0;
/*
* For CLONE_VM, except vfork, the child needs a separate shadow
* stack.
*/
if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM)
return 0;
size = adjust_shstk_size(stack_size);
addr = alloc_shstk(size);
if (IS_ERR_VALUE(addr))
return addr;
shstk->base = addr;
shstk->size = size;
return addr + size;
}
void shstk_free(struct task_struct *tsk)
{
struct thread_shstk *shstk = &tsk->thread.shstk;
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
!features_enabled(ARCH_SHSTK_SHSTK))
return;
/*
* When fork() with CLONE_VM fails, the child (tsk) already has a
* shadow stack allocated, and exit_thread() calls this function to
* free it. In this case the parent (current) and the child share
* the same mm struct.
*/
if (!tsk->mm || tsk->mm != current->mm)
return;
unmap_shadow_stack(shstk->base, shstk->size);
}
static int shstk_disable(void)
{
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
return -EOPNOTSUPP;
/* Already disabled? */
if (!features_enabled(ARCH_SHSTK_SHSTK))
return 0;
fpregs_lock_and_load();
/* Disable WRSS too when disabling shadow stack */
wrmsrl(MSR_IA32_U_CET, 0);
wrmsrl(MSR_IA32_PL3_SSP, 0);
fpregs_unlock();
shstk_free(current);
features_clr(ARCH_SHSTK_SHSTK);
return 0;
}
long shstk_prctl(struct task_struct *task, int option, unsigned long features)
{
if (option == ARCH_SHSTK_LOCK) {
task->thread.features_locked |= features;
return 0;
}
/* Don't allow via ptrace */
if (task != current)
return -EINVAL;
/* Do not allow to change locked features */
if (features & task->thread.features_locked)
return -EPERM;
/* Only support enabling/disabling one feature at a time. */
if (hweight_long(features) > 1)
return -EINVAL;
if (option == ARCH_SHSTK_DISABLE) {
return -EINVAL;
}
/* Handle ARCH_SHSTK_ENABLE */
return -EINVAL;
}