mirror of
https://github.com/torvalds/linux.git
synced 2024-11-30 08:01:59 +00:00
f6e39794f4
The kernel can use to allocate executable memory. The only supported way to do that is via __vmalloc_node_range() with the executable bit set in the prot argument. (vmap() resets the bit via pgprot_nx()). Once tag-based KASAN modes start tagging vmalloc allocations, executing code from such allocations will lead to the PC register getting a tag, which is not tolerated by the kernel. Only tag the allocations for normal kernel pages. [andreyknvl@google.com: pass KASAN_VMALLOC_PROT_NORMAL to kasan_unpoison_vmalloc()] Link: https://lkml.kernel.org/r/9230ca3d3e40ffca041c133a524191fd71969a8d.1646233925.git.andreyknvl@google.com [andreyknvl@google.com: support tagged vmalloc mappings] Link: https://lkml.kernel.org/r/2f6605e3a358cf64d73a05710cb3da356886ad29.1646233925.git.andreyknvl@google.com [andreyknvl@google.com: don't unintentionally disabled poisoning] Link: https://lkml.kernel.org/r/de4587d6a719232e83c760113e46ed2d4d8da61e.1646757322.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/fbfd9939a4dc375923c9a5c6b9e7ab05c26b8c6b.1643047180.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov <andreyknvl@google.com> Acked-by: Marco Elver <elver@google.com> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Evgenii Stepanov <eugenis@google.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Collingbourne <pcc@google.com> Cc: Vincenzo Frascino <vincenzo.frascino@arm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
159 lines
2.9 KiB
C
159 lines
2.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Shadow Call Stack support.
|
|
*
|
|
* Copyright (C) 2019 Google LLC
|
|
*/
|
|
|
|
#include <linux/cpuhotplug.h>
|
|
#include <linux/kasan.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/scs.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/vmstat.h>
|
|
|
|
static void __scs_account(void *s, int account)
|
|
{
|
|
struct page *scs_page = vmalloc_to_page(s);
|
|
|
|
mod_node_page_state(page_pgdat(scs_page), NR_KERNEL_SCS_KB,
|
|
account * (SCS_SIZE / SZ_1K));
|
|
}
|
|
|
|
/* Matches NR_CACHED_STACKS for VMAP_STACK */
|
|
#define NR_CACHED_SCS 2
|
|
static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
|
|
|
|
static void *__scs_alloc(int node)
|
|
{
|
|
int i;
|
|
void *s;
|
|
|
|
for (i = 0; i < NR_CACHED_SCS; i++) {
|
|
s = this_cpu_xchg(scs_cache[i], NULL);
|
|
if (s) {
|
|
s = kasan_unpoison_vmalloc(s, SCS_SIZE,
|
|
KASAN_VMALLOC_PROT_NORMAL);
|
|
memset(s, 0, SCS_SIZE);
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
|
|
GFP_SCS, PAGE_KERNEL, 0, node,
|
|
__builtin_return_address(0));
|
|
|
|
out:
|
|
return kasan_reset_tag(s);
|
|
}
|
|
|
|
void *scs_alloc(int node)
|
|
{
|
|
void *s;
|
|
|
|
s = __scs_alloc(node);
|
|
if (!s)
|
|
return NULL;
|
|
|
|
*__scs_magic(s) = SCS_END_MAGIC;
|
|
|
|
/*
|
|
* Poison the allocation to catch unintentional accesses to
|
|
* the shadow stack when KASAN is enabled.
|
|
*/
|
|
kasan_poison_vmalloc(s, SCS_SIZE);
|
|
__scs_account(s, 1);
|
|
return s;
|
|
}
|
|
|
|
void scs_free(void *s)
|
|
{
|
|
int i;
|
|
|
|
__scs_account(s, -1);
|
|
|
|
/*
|
|
* We cannot sleep as this can be called in interrupt context,
|
|
* so use this_cpu_cmpxchg to update the cache, and vfree_atomic
|
|
* to free the stack.
|
|
*/
|
|
|
|
for (i = 0; i < NR_CACHED_SCS; i++)
|
|
if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
|
|
return;
|
|
|
|
kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_PROT_NORMAL);
|
|
vfree_atomic(s);
|
|
}
|
|
|
|
static int scs_cleanup(unsigned int cpu)
|
|
{
|
|
int i;
|
|
void **cache = per_cpu_ptr(scs_cache, cpu);
|
|
|
|
for (i = 0; i < NR_CACHED_SCS; i++) {
|
|
vfree(cache[i]);
|
|
cache[i] = NULL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __init scs_init(void)
|
|
{
|
|
cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
|
|
scs_cleanup);
|
|
}
|
|
|
|
int scs_prepare(struct task_struct *tsk, int node)
|
|
{
|
|
void *s = scs_alloc(node);
|
|
|
|
if (!s)
|
|
return -ENOMEM;
|
|
|
|
task_scs(tsk) = task_scs_sp(tsk) = s;
|
|
return 0;
|
|
}
|
|
|
|
static void scs_check_usage(struct task_struct *tsk)
|
|
{
|
|
static unsigned long highest;
|
|
|
|
unsigned long *p, prev, curr = highest, used = 0;
|
|
|
|
if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
|
|
return;
|
|
|
|
for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
|
|
if (!READ_ONCE_NOCHECK(*p))
|
|
break;
|
|
used += sizeof(*p);
|
|
}
|
|
|
|
while (used > curr) {
|
|
prev = cmpxchg_relaxed(&highest, curr, used);
|
|
|
|
if (prev == curr) {
|
|
pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
|
|
tsk->comm, task_pid_nr(tsk), used);
|
|
break;
|
|
}
|
|
|
|
curr = prev;
|
|
}
|
|
}
|
|
|
|
void scs_release(struct task_struct *tsk)
|
|
{
|
|
void *s = task_scs(tsk);
|
|
|
|
if (!s)
|
|
return;
|
|
|
|
WARN(task_scs_end_corrupted(tsk),
|
|
"corrupted shadow stack detected when freeing task\n");
|
|
scs_check_usage(tsk);
|
|
scs_free(s);
|
|
}
|