linux/arch/x86/kernel/shstk.c
Rick Edgecombe 7fad2a432c x86/shstk: Check that signal frame is shadow stack mem
The shadow stack signal frame is read by the kernel on sigreturn. It
relies on shadow stack memory protections to prevent forgeries of this
signal frame (which included the pre-signal SSP). This behavior helps
userspace protect itself. However, using the INCSSP instruction userspace
can adjust the SSP to 8 bytes beyond the end of a shadow stack. INCSSP
performs shadow stack reads to make sure it doesn’t increment off of the
shadow stack, but on the end position it actually reads 8 bytes below the
new SSP.

For the shadow stack HW operations, this situation (INCSSP off the end
of a shadow stack by 8 bytes) would be fine. If the a RET is executed, the
push to the shadow stack would fail to write to the shadow stack. If a
CALL is executed, the SSP will be incremented back onto the stack and the
return address will be written successfully to the very end. That is
expected behavior around shadow stack underflow.

However, the kernel doesn’t have a way to read shadow stack memory using
shadow stack accesses. WRUSS can write to shadow stack memory with a
shadow stack access which ensures the access is to shadow stack memory.
But unfortunately for this case, there is no equivalent instruction for
shadow stack reads. So when reading the shadow stack signal frames, the
kernel currently assumes the SSP is pointing to the shadow stack and uses
a normal read.

The SSP pointing to shadow stack memory will be true in most cases, but as
described above, in can be untrue by 8 bytes. So lookup the VMA of the
shadow stack sigframe being read to verify it is shadow stack.

Since the SSP can only be beyond the shadow stack by 8 bytes, and
shadow stack memory is page aligned, this check only needs to be done
when this type of relative position to a page boundary is encountered.
So skip the extra work otherwise.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/all/20230613001108.3040476-34-rick.p.edgecombe%40intel.com
2023-08-02 15:01:50 -07:00

427 lines
8.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* shstk.c - Intel shadow stack support
*
* Copyright (c) 2021, Intel Corporation.
* Yu-cheng Yu <yu-cheng.yu@intel.com>
*/
#include <linux/sched.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/sched/signal.h>
#include <linux/compat.h>
#include <linux/sizes.h>
#include <linux/user.h>
#include <asm/msr.h>
#include <asm/fpu/xstate.h>
#include <asm/fpu/types.h>
#include <asm/shstk.h>
#include <asm/special_insns.h>
#include <asm/fpu/api.h>
#include <asm/prctl.h>
#define SS_FRAME_SIZE 8
static bool features_enabled(unsigned long features)
{
return current->thread.features & features;
}
static void features_set(unsigned long features)
{
current->thread.features |= features;
}
static void features_clr(unsigned long features)
{
current->thread.features &= ~features;
}
/*
* Create a restore token on the shadow stack. A token is always 8-byte
* and aligned to 8.
*/
static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
{
unsigned long addr;
/* Token must be aligned */
if (!IS_ALIGNED(ssp, 8))
return -EINVAL;
addr = ssp - SS_FRAME_SIZE;
/*
* SSP is aligned, so reserved bits and mode bit are a zero, just mark
* the token 64-bit.
*/
ssp |= BIT(0);
if (write_user_shstk_64((u64 __user *)addr, (u64)ssp))
return -EFAULT;
if (token_addr)
*token_addr = addr;
return 0;
}
static unsigned long alloc_shstk(unsigned long size)
{
int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_ABOVE4G;
struct mm_struct *mm = current->mm;
unsigned long addr, unused;
mmap_write_lock(mm);
addr = do_mmap(NULL, 0, size, PROT_READ, flags,
VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
mmap_write_unlock(mm);
return addr;
}
static unsigned long adjust_shstk_size(unsigned long size)
{
if (size)
return PAGE_ALIGN(size);
return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
}
static void unmap_shadow_stack(u64 base, u64 size)
{
while (1) {
int r;
r = vm_munmap(base, size);
/*
* vm_munmap() returns -EINTR when mmap_lock is held by
* something else, and that lock should not be held for a
* long time. Retry it for the case.
*/
if (r == -EINTR) {
cond_resched();
continue;
}
/*
* For all other types of vm_munmap() failure, either the
* system is out of memory or there is bug.
*/
WARN_ON_ONCE(r);
break;
}
}
static int shstk_setup(void)
{
struct thread_shstk *shstk = &current->thread.shstk;
unsigned long addr, size;
/* Already enabled */
if (features_enabled(ARCH_SHSTK_SHSTK))
return 0;
/* Also not supported for 32 bit and x32 */
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) || in_32bit_syscall())
return -EOPNOTSUPP;
size = adjust_shstk_size(0);
addr = alloc_shstk(size);
if (IS_ERR_VALUE(addr))
return PTR_ERR((void *)addr);
fpregs_lock_and_load();
wrmsrl(MSR_IA32_PL3_SSP, addr + size);
wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN);
fpregs_unlock();
shstk->base = addr;
shstk->size = size;
features_set(ARCH_SHSTK_SHSTK);
return 0;
}
void reset_thread_features(void)
{
memset(&current->thread.shstk, 0, sizeof(struct thread_shstk));
current->thread.features = 0;
current->thread.features_locked = 0;
}
unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
unsigned long stack_size)
{
struct thread_shstk *shstk = &tsk->thread.shstk;
unsigned long addr, size;
/*
* If shadow stack is not enabled on the new thread, skip any
* switch to a new shadow stack.
*/
if (!features_enabled(ARCH_SHSTK_SHSTK))
return 0;
/*
* For CLONE_VM, except vfork, the child needs a separate shadow
* stack.
*/
if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM)
return 0;
size = adjust_shstk_size(stack_size);
addr = alloc_shstk(size);
if (IS_ERR_VALUE(addr))
return addr;
shstk->base = addr;
shstk->size = size;
return addr + size;
}
static unsigned long get_user_shstk_addr(void)
{
unsigned long long ssp;
fpregs_lock_and_load();
rdmsrl(MSR_IA32_PL3_SSP, ssp);
fpregs_unlock();
return ssp;
}
#define SHSTK_DATA_BIT BIT(63)
static int put_shstk_data(u64 __user *addr, u64 data)
{
if (WARN_ON_ONCE(data & SHSTK_DATA_BIT))
return -EINVAL;
/*
* Mark the high bit so that the sigframe can't be processed as a
* return address.
*/
if (write_user_shstk_64(addr, data | SHSTK_DATA_BIT))
return -EFAULT;
return 0;
}
static int get_shstk_data(unsigned long *data, unsigned long __user *addr)
{
unsigned long ldata;
if (unlikely(get_user(ldata, addr)))
return -EFAULT;
if (!(ldata & SHSTK_DATA_BIT))
return -EINVAL;
*data = ldata & ~SHSTK_DATA_BIT;
return 0;
}
static int shstk_push_sigframe(unsigned long *ssp)
{
unsigned long target_ssp = *ssp;
/* Token must be aligned */
if (!IS_ALIGNED(target_ssp, 8))
return -EINVAL;
*ssp -= SS_FRAME_SIZE;
if (put_shstk_data((void *__user)*ssp, target_ssp))
return -EFAULT;
return 0;
}
static int shstk_pop_sigframe(unsigned long *ssp)
{
struct vm_area_struct *vma;
unsigned long token_addr;
bool need_to_check_vma;
int err = 1;
/*
* It is possible for the SSP to be off the end of a shadow stack by 4
* or 8 bytes. If the shadow stack is at the start of a page or 4 bytes
* before it, it might be this case, so check that the address being
* read is actually shadow stack.
*/
if (!IS_ALIGNED(*ssp, 8))
return -EINVAL;
need_to_check_vma = PAGE_ALIGN(*ssp) == *ssp;
if (need_to_check_vma)
mmap_read_lock_killable(current->mm);
err = get_shstk_data(&token_addr, (unsigned long __user *)*ssp);
if (unlikely(err))
goto out_err;
if (need_to_check_vma) {
vma = find_vma(current->mm, *ssp);
if (!vma || !(vma->vm_flags & VM_SHADOW_STACK)) {
err = -EFAULT;
goto out_err;
}
mmap_read_unlock(current->mm);
}
/* Restore SSP aligned? */
if (unlikely(!IS_ALIGNED(token_addr, 8)))
return -EINVAL;
/* SSP in userspace? */
if (unlikely(token_addr >= TASK_SIZE_MAX))
return -EINVAL;
*ssp = token_addr;
return 0;
out_err:
if (need_to_check_vma)
mmap_read_unlock(current->mm);
return err;
}
int setup_signal_shadow_stack(struct ksignal *ksig)
{
void __user *restorer = ksig->ka.sa.sa_restorer;
unsigned long ssp;
int err;
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
!features_enabled(ARCH_SHSTK_SHSTK))
return 0;
if (!restorer)
return -EINVAL;
ssp = get_user_shstk_addr();
if (unlikely(!ssp))
return -EINVAL;
err = shstk_push_sigframe(&ssp);
if (unlikely(err))
return err;
/* Push restorer address */
ssp -= SS_FRAME_SIZE;
err = write_user_shstk_64((u64 __user *)ssp, (u64)restorer);
if (unlikely(err))
return -EFAULT;
fpregs_lock_and_load();
wrmsrl(MSR_IA32_PL3_SSP, ssp);
fpregs_unlock();
return 0;
}
int restore_signal_shadow_stack(void)
{
unsigned long ssp;
int err;
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
!features_enabled(ARCH_SHSTK_SHSTK))
return 0;
ssp = get_user_shstk_addr();
if (unlikely(!ssp))
return -EINVAL;
err = shstk_pop_sigframe(&ssp);
if (unlikely(err))
return err;
fpregs_lock_and_load();
wrmsrl(MSR_IA32_PL3_SSP, ssp);
fpregs_unlock();
return 0;
}
void shstk_free(struct task_struct *tsk)
{
struct thread_shstk *shstk = &tsk->thread.shstk;
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
!features_enabled(ARCH_SHSTK_SHSTK))
return;
/*
* When fork() with CLONE_VM fails, the child (tsk) already has a
* shadow stack allocated, and exit_thread() calls this function to
* free it. In this case the parent (current) and the child share
* the same mm struct.
*/
if (!tsk->mm || tsk->mm != current->mm)
return;
unmap_shadow_stack(shstk->base, shstk->size);
}
static int shstk_disable(void)
{
if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
return -EOPNOTSUPP;
/* Already disabled? */
if (!features_enabled(ARCH_SHSTK_SHSTK))
return 0;
fpregs_lock_and_load();
/* Disable WRSS too when disabling shadow stack */
wrmsrl(MSR_IA32_U_CET, 0);
wrmsrl(MSR_IA32_PL3_SSP, 0);
fpregs_unlock();
shstk_free(current);
features_clr(ARCH_SHSTK_SHSTK);
return 0;
}
long shstk_prctl(struct task_struct *task, int option, unsigned long features)
{
if (option == ARCH_SHSTK_LOCK) {
task->thread.features_locked |= features;
return 0;
}
/* Don't allow via ptrace */
if (task != current)
return -EINVAL;
/* Do not allow to change locked features */
if (features & task->thread.features_locked)
return -EPERM;
/* Only support enabling/disabling one feature at a time. */
if (hweight_long(features) > 1)
return -EINVAL;
if (option == ARCH_SHSTK_DISABLE) {
return -EINVAL;
}
/* Handle ARCH_SHSTK_ENABLE */
return -EINVAL;
}