0cecca9d03
While most of a task's FPU state is only needed in user space, the protection keys need to be in place immediately after a context switch. The reason is that any access to userspace memory while running in kernel mode also needs to abide by the memory permissions specified in the protection keys. The "eager switch" is a preparation for loading the FPU state on return to userland. Instead of decoupling PKRU state from xstate, update PKRU within xstate on write operations by the kernel. For user tasks the PKRU should be always read from the xsave area and it should not change anything because the PKRU value was loaded as part of FPU restore. For kernel threads the default "init_pkru_value" will be written. Before this commit, the kernel thread would end up with a random value which it inherited from the previous user task. [ bigeasy: save pkru to xstate, no cache, don't use __raw_xsave_addr() ] [ bp: update commit message, sort headers properly in asm/fpu/xstate.h ] Signed-off-by: Rik van Riel <riel@surriel.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Dave Hansen <dave.hansen@intel.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Aubrey Li <aubrey.li@intel.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jann Horn <jannh@google.com> Cc: "Jason A. Donenfeld" <Jason@zx2c4.com> Cc: Joerg Roedel <jroedel@suse.de> Cc: Juergen Gross <jgross@suse.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: kvm ML <kvm@vger.kernel.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: x86-ml <x86@kernel.org> Link: https://lkml.kernel.org/r/20190403164156.19645-16-bigeasy@linutronix.de
215 lines
6.0 KiB
C
215 lines
6.0 KiB
C
/*
|
|
* Intel Memory Protection Keys management
|
|
* Copyright (c) 2015, Intel Corporation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
#include <linux/debugfs.h> /* debugfs_create_u32() */
|
|
#include <linux/mm_types.h> /* mm_struct, vma, etc... */
|
|
#include <linux/pkeys.h> /* PKEY_* */
|
|
#include <uapi/asm-generic/mman-common.h>
|
|
|
|
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
|
|
#include <asm/mmu_context.h> /* vma_pkey() */
|
|
|
|
int __execute_only_pkey(struct mm_struct *mm)
|
|
{
|
|
bool need_to_set_mm_pkey = false;
|
|
int execute_only_pkey = mm->context.execute_only_pkey;
|
|
int ret;
|
|
|
|
/* Do we need to assign a pkey for mm's execute-only maps? */
|
|
if (execute_only_pkey == -1) {
|
|
/* Go allocate one to use, which might fail */
|
|
execute_only_pkey = mm_pkey_alloc(mm);
|
|
if (execute_only_pkey < 0)
|
|
return -1;
|
|
need_to_set_mm_pkey = true;
|
|
}
|
|
|
|
/*
|
|
* We do not want to go through the relatively costly
|
|
* dance to set PKRU if we do not need to. Check it
|
|
* first and assume that if the execute-only pkey is
|
|
* write-disabled that we do not have to set it
|
|
* ourselves.
|
|
*/
|
|
if (!need_to_set_mm_pkey &&
|
|
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
|
|
return execute_only_pkey;
|
|
}
|
|
|
|
/*
|
|
* Set up PKRU so that it denies access for everything
|
|
* other than execution.
|
|
*/
|
|
ret = arch_set_user_pkey_access(current, execute_only_pkey,
|
|
PKEY_DISABLE_ACCESS);
|
|
/*
|
|
* If the PKRU-set operation failed somehow, just return
|
|
* 0 and effectively disable execute-only support.
|
|
*/
|
|
if (ret) {
|
|
mm_set_pkey_free(mm, execute_only_pkey);
|
|
return -1;
|
|
}
|
|
|
|
/* We got one, store it and use it from here on out */
|
|
if (need_to_set_mm_pkey)
|
|
mm->context.execute_only_pkey = execute_only_pkey;
|
|
return execute_only_pkey;
|
|
}
|
|
|
|
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
|
|
{
|
|
/* Do this check first since the vm_flags should be hot */
|
|
if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
|
|
return false;
|
|
if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* This is only called for *plain* mprotect calls.
|
|
*/
|
|
int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey)
|
|
{
|
|
/*
|
|
* Is this an mprotect_pkey() call? If so, never
|
|
* override the value that came from the user.
|
|
*/
|
|
if (pkey != -1)
|
|
return pkey;
|
|
|
|
/*
|
|
* The mapping is execute-only. Go try to get the
|
|
* execute-only protection key. If we fail to do that,
|
|
* fall through as if we do not have execute-only
|
|
* support in this mm.
|
|
*/
|
|
if (prot == PROT_EXEC) {
|
|
pkey = execute_only_pkey(vma->vm_mm);
|
|
if (pkey > 0)
|
|
return pkey;
|
|
} else if (vma_is_pkey_exec_only(vma)) {
|
|
/*
|
|
* Protections are *not* PROT_EXEC, but the mapping
|
|
* is using the exec-only pkey. This mapping was
|
|
* PROT_EXEC and will no longer be. Move back to
|
|
* the default pkey.
|
|
*/
|
|
return ARCH_DEFAULT_PKEY;
|
|
}
|
|
|
|
/*
|
|
* This is a vanilla, non-pkey mprotect (or we failed to
|
|
* setup execute-only), inherit the pkey from the VMA we
|
|
* are working on.
|
|
*/
|
|
return vma_pkey(vma);
|
|
}
|
|
|
|
#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
|
|
|
|
/*
|
|
* Make the default PKRU value (at execve() time) as restrictive
|
|
* as possible. This ensures that any threads clone()'d early
|
|
* in the process's lifetime will not accidentally get access
|
|
* to data which is pkey-protected later on.
|
|
*/
|
|
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
|
|
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
|
|
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
|
|
PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
|
|
PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
|
|
|
|
/*
|
|
* Called from the FPU code when creating a fresh set of FPU
|
|
* registers. This is called from a very specific context where
|
|
* we know the FPU regstiers are safe for use and we can use PKRU
|
|
* directly.
|
|
*/
|
|
void copy_init_pkru_to_fpregs(void)
|
|
{
|
|
u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value);
|
|
/*
|
|
* Override the PKRU state that came from 'init_fpstate'
|
|
* with the baseline from the process.
|
|
*/
|
|
write_pkru(init_pkru_value_snapshot);
|
|
}
|
|
|
|
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
char buf[32];
|
|
unsigned int len;
|
|
|
|
len = sprintf(buf, "0x%x\n", init_pkru_value);
|
|
return simple_read_from_buffer(user_buf, count, ppos, buf, len);
|
|
}
|
|
|
|
static ssize_t init_pkru_write_file(struct file *file,
|
|
const char __user *user_buf, size_t count, loff_t *ppos)
|
|
{
|
|
char buf[32];
|
|
ssize_t len;
|
|
u32 new_init_pkru;
|
|
|
|
len = min(count, sizeof(buf) - 1);
|
|
if (copy_from_user(buf, user_buf, len))
|
|
return -EFAULT;
|
|
|
|
/* Make the buffer a valid string that we can not overrun */
|
|
buf[len] = '\0';
|
|
if (kstrtouint(buf, 0, &new_init_pkru))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Don't allow insane settings that will blow the system
|
|
* up immediately if someone attempts to disable access
|
|
* or writes to pkey 0.
|
|
*/
|
|
if (new_init_pkru & (PKRU_AD_BIT|PKRU_WD_BIT))
|
|
return -EINVAL;
|
|
|
|
WRITE_ONCE(init_pkru_value, new_init_pkru);
|
|
return count;
|
|
}
|
|
|
|
static const struct file_operations fops_init_pkru = {
|
|
.read = init_pkru_read_file,
|
|
.write = init_pkru_write_file,
|
|
.llseek = default_llseek,
|
|
};
|
|
|
|
static int __init create_init_pkru_value(void)
|
|
{
|
|
debugfs_create_file("init_pkru", S_IRUSR | S_IWUSR,
|
|
arch_debugfs_dir, NULL, &fops_init_pkru);
|
|
return 0;
|
|
}
|
|
late_initcall(create_init_pkru_value);
|
|
|
|
static __init int setup_init_pkru(char *opt)
|
|
{
|
|
u32 new_init_pkru;
|
|
|
|
if (kstrtouint(opt, 0, &new_init_pkru))
|
|
return 1;
|
|
|
|
WRITE_ONCE(init_pkru_value, new_init_pkru);
|
|
|
|
return 1;
|
|
}
|
|
__setup("init_pkru=", setup_init_pkru);
|