mirror of
https://github.com/torvalds/linux.git
synced 2024-11-29 07:31:29 +00:00
1423e2660c
- Prevent sigaltstack out of bounds writes. The kernel unconditionally writes the FPU state to the alternate stack without checking whether the stack is large enough to accomodate it. Check the alternate stack size before doing so and in case it's too small force a SIGSEGV instead of silently corrupting user space data. - MINSIGSTKZ and SIGSTKSZ are constants in signal.h and have never been updated despite the fact that the FPU state which is stored on the signal stack has grown over time which causes trouble in the field when AVX512 is available on a CPU. The kernel does not expose the minimum requirements for the alternate stack size depending on the available and enabled CPU features. ARM already added an aux vector AT_MINSIGSTKSZ for the same reason. Add it to x86 as well - A major cleanup of the x86 FPU code. The recent discoveries of XSTATE related issues unearthed quite some inconsistencies, duplicated code and other issues. The fine granular overhaul addresses this, makes the code more robust and maintainable, which allows to integrate upcoming XSTATE related features in sane ways. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmDlcpETHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoeP5D/4i+AgYYeiMLgGb+NS7iaKPfoWo6LIz y3qdTSA0DQaIYbYivWwRO/g0GYdDMXDWeZalFi7eGnVI8O3eOog+22Zrf/y0UINB KJHdYd4ApWHhs401022y5hexrWQvnV8w1yQCuj/zLm6eC+AVhdwt2AY+IBoRrdUj wqY97B/4rJNsBvvqTDn9EeDrJA2y0y0Suc7AhIp2BGMI+dpIdxys8RJDamXNWyDL gJf0YRgUoiIn3AHKb+fgv60AoxfC175NSg/5/y/scFNXqVlW0Up4YCb7pqG9o2Ga f3XvtWfbw1N5PmUYjFkALwEkzGUbM3v0RA3xLY2j2WlWm9fBPPy59dt+i/h/VKyA GrA7i7lcIqX8dfVH6XkrReZBkRDSB6t9SZTvV54jAz5fcIZO2Rg++UFUvI/R6GKK XCcxukYaArwo+IG62iqDszS3gfLGhcor/cviOeULRC5zMUIO4Jah+IhDnifmShtC M5s9QzrwIRD/XMewGRQmvkiN4kBfE7jFoBQr1J9leCXJKrM+2JQmMzVInuubTQIq SdlKOaAIn7xtekz+6XdFG9Gmhck0PCLMJMOLNvQkKWI3KqGLRZ+dAWKK0vsCizAx 0BA7ZeB9w9lFT+D8mQCX77JvW9+VNwyfwIOLIrJRHk3VqVpS5qvoiFTLGJJBdZx4 /TbbRZu7nXDN2w== =Mq1m -----END PGP SIGNATURE----- Merge tag 'x86-fpu-2021-07-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fpu updates from Thomas Gleixner: "Fixes and improvements for FPU handling on x86: - Prevent sigaltstack out of bounds writes. The kernel unconditionally writes the FPU state to the alternate stack without checking whether the stack is large enough to accomodate it. Check the alternate stack size before doing so and in case it's too small force a SIGSEGV instead of silently corrupting user space data. - MINSIGSTKZ and SIGSTKSZ are constants in signal.h and have never been updated despite the fact that the FPU state which is stored on the signal stack has grown over time which causes trouble in the field when AVX512 is available on a CPU. The kernel does not expose the minimum requirements for the alternate stack size depending on the available and enabled CPU features. ARM already added an aux vector AT_MINSIGSTKSZ for the same reason. Add it to x86 as well. - A major cleanup of the x86 FPU code. The recent discoveries of XSTATE related issues unearthed quite some inconsistencies, duplicated code and other issues. The fine granular overhaul addresses this, makes the code more robust and maintainable, which allows to integrate upcoming XSTATE related features in sane ways" * tag 'x86-fpu-2021-07-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (74 commits) x86/fpu/xstate: Clear xstate header in copy_xstate_to_uabi_buf() again x86/fpu/signal: Let xrstor handle the features to init x86/fpu/signal: Handle #PF in the direct restore path x86/fpu: Return proper error codes from user access functions x86/fpu/signal: Split out the direct restore code x86/fpu/signal: Sanitize copy_user_to_fpregs_zeroing() x86/fpu/signal: Sanitize the xstate check on sigframe x86/fpu/signal: Remove the legacy alignment check x86/fpu/signal: Move initial checks into fpu__restore_sig() x86/fpu: Mark init_fpstate __ro_after_init x86/pkru: Remove xstate fiddling from write_pkru() x86/fpu: Don't store PKRU in xstate in fpu_reset_fpstate() x86/fpu: Remove PKRU handling from switch_fpu_finish() x86/fpu: Mask PKRU from kernel XRSTOR[S] operations x86/fpu: Hook up PKRU into ptrace() x86/fpu: Add PKRU storage outside of task XSAVE buffer x86/fpu: Dont restore PKRU in fpregs_restore_userspace() x86/fpu: Rename xfeatures_mask_user() to xfeatures_mask_uabi() x86/fpu: Move FXSAVE_LEAK quirk info __copy_kernel_to_fpregs() x86/fpu: Rename __fpregs_load_activate() to fpregs_restore_userregs() ...
195 lines
5.3 KiB
C
195 lines
5.3 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Intel Memory Protection Keys management
|
|
* Copyright (c) 2015, Intel Corporation.
|
|
*/
|
|
#include <linux/debugfs.h> /* debugfs_create_u32() */
|
|
#include <linux/mm_types.h> /* mm_struct, vma, etc... */
|
|
#include <linux/pkeys.h> /* PKEY_* */
|
|
#include <uapi/asm-generic/mman-common.h>
|
|
|
|
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
|
#include <asm/mmu_context.h> /* vma_pkey() */
|
|
|
|
int __execute_only_pkey(struct mm_struct *mm)
|
|
{
|
|
bool need_to_set_mm_pkey = false;
|
|
int execute_only_pkey = mm->context.execute_only_pkey;
|
|
int ret;
|
|
|
|
/* Do we need to assign a pkey for mm's execute-only maps? */
|
|
if (execute_only_pkey == -1) {
|
|
/* Go allocate one to use, which might fail */
|
|
execute_only_pkey = mm_pkey_alloc(mm);
|
|
if (execute_only_pkey < 0)
|
|
return -1;
|
|
need_to_set_mm_pkey = true;
|
|
}
|
|
|
|
/*
|
|
* We do not want to go through the relatively costly
|
|
* dance to set PKRU if we do not need to. Check it
|
|
* first and assume that if the execute-only pkey is
|
|
* write-disabled that we do not have to set it
|
|
* ourselves.
|
|
*/
|
|
if (!need_to_set_mm_pkey &&
|
|
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
|
|
return execute_only_pkey;
|
|
}
|
|
|
|
/*
|
|
* Set up PKRU so that it denies access for everything
|
|
* other than execution.
|
|
*/
|
|
ret = arch_set_user_pkey_access(current, execute_only_pkey,
|
|
PKEY_DISABLE_ACCESS);
|
|
/*
|
|
* If the PKRU-set operation failed somehow, just return
|
|
* 0 and effectively disable execute-only support.
|
|
*/
|
|
if (ret) {
|
|
mm_set_pkey_free(mm, execute_only_pkey);
|
|
return -1;
|
|
}
|
|
|
|
/* We got one, store it and use it from here on out */
|
|
if (need_to_set_mm_pkey)
|
|
mm->context.execute_only_pkey = execute_only_pkey;
|
|
return execute_only_pkey;
|
|
}
|
|
|
|
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
|
|
{
|
|
/* Do this check first since the vm_flags should be hot */
|
|
if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC)
|
|
return false;
|
|
if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is only called for *plain* mprotect calls.
|
|
*/
|
|
int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey)
|
|
{
|
|
/*
|
|
* Is this an mprotect_pkey() call? If so, never
|
|
* override the value that came from the user.
|
|
*/
|
|
if (pkey != -1)
|
|
return pkey;
|
|
|
|
/*
|
|
* The mapping is execute-only. Go try to get the
|
|
* execute-only protection key. If we fail to do that,
|
|
* fall through as if we do not have execute-only
|
|
* support in this mm.
|
|
*/
|
|
if (prot == PROT_EXEC) {
|
|
pkey = execute_only_pkey(vma->vm_mm);
|
|
if (pkey > 0)
|
|
return pkey;
|
|
} else if (vma_is_pkey_exec_only(vma)) {
|
|
/*
|
|
* Protections are *not* PROT_EXEC, but the mapping
|
|
* is using the exec-only pkey. This mapping was
|
|
* PROT_EXEC and will no longer be. Move back to
|
|
* the default pkey.
|
|
*/
|
|
return ARCH_DEFAULT_PKEY;
|
|
}
|
|
|
|
/*
|
|
* This is a vanilla, non-pkey mprotect (or we failed to
|
|
* setup execute-only), inherit the pkey from the VMA we
|
|
* are working on.
|
|
*/
|
|
return vma_pkey(vma);
|
|
}
|
|
|
|
#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
|
|
|
|
/*
|
|
* Make the default PKRU value (at execve() time) as restrictive
|
|
* as possible. This ensures that any threads clone()'d early
|
|
* in the process's lifetime will not accidentally get access
|
|
* to data which is pkey-protected later on.
|
|
*/
|
|
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
|
|
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
|
|
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
|
|
PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
|
|
PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
|
|
|
|
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
char buf[32];
|
|
unsigned int len;
|
|
|
|
len = sprintf(buf, "0x%x\n", init_pkru_value);
|
|
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
|
|
}
|
|
|
|
static ssize_t init_pkru_write_file(struct file *file,
|
|
const char __user *user_buf, size_t count, loff_t *ppos)
|
|
{
|
|
char buf[32];
|
|
ssize_t len;
|
|
u32 new_init_pkru;
|
|
|
|
len = min(count, sizeof(buf) - 1);
|
|
if (copy_from_user(buf, user_buf, len))
|
|
return -EFAULT;
|
|
|
|
/* Make the buffer a valid string that we can not overrun */
|
|
buf[len] = '\0';
|
|
if (kstrtouint(buf, 0, &new_init_pkru))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Don't allow insane settings that will blow the system
|
|
* up immediately if someone attempts to disable access
|
|
* or writes to pkey 0.
|
|
*/
|
|
if (new_init_pkru & (PKRU_AD_BIT|PKRU_WD_BIT))
|
|
return -EINVAL;
|
|
|
|
WRITE_ONCE(init_pkru_value, new_init_pkru);
|
|
return count;
|
|
}
|
|
|
|
static const struct file_operations fops_init_pkru = {
|
|
.read = init_pkru_read_file,
|
|
.write = init_pkru_write_file,
|
|
.llseek = default_llseek,
|
|
};
|
|
|
|
static int __init create_init_pkru_value(void)
|
|
{
|
|
/* Do not expose the file if pkeys are not supported. */
|
|
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
|
|
return 0;
|
|
|
|
debugfs_create_file("init_pkru", S_IRUSR | S_IWUSR,
|
|
arch_debugfs_dir, NULL, &fops_init_pkru);
|
|
return 0;
|
|
}
|
|
late_initcall(create_init_pkru_value);
|
|
|
|
static __init int setup_init_pkru(char *opt)
|
|
{
|
|
u32 new_init_pkru;
|
|
|
|
if (kstrtouint(opt, 0, &new_init_pkru))
|
|
return 1;
|
|
|
|
WRITE_ONCE(init_pkru_value, new_init_pkru);
|
|
|
|
return 1;
|
|
}
|
|
__setup("init_pkru=", setup_init_pkru);
|