mirror of
https://github.com/torvalds/linux.git
synced 2024-12-22 02:52:56 +00:00
3a02764c37
Andreas reported commitfc8504765e
("riscv: bpf: Avoid breaking W^X") breaks booting with one kind of defconfig, I reproduced a kernel panic with the defconfig: [ 0.138553] Unable to handle kernel paging request at virtual address ffffffff81201220 [ 0.139159] Oops [#1] [ 0.139303] Modules linked in: [ 0.139601] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5-default+ #1 [ 0.139934] Hardware name: riscv-virtio,qemu (DT) [ 0.140193] epc : __memset+0xc4/0xfc [ 0.140416] ra : skb_flow_dissector_init+0x1e/0x82 [ 0.140609] epc : ffffffff8029806c ra : ffffffff8033be78 sp : ffffffe001647da0 [ 0.140878] gp : ffffffff81134b08 tp : ffffffe001654380 t0 : ffffffff81201158 [ 0.141156] t1 : 0000000000000002 t2 : 0000000000000154 s0 : ffffffe001647dd0 [ 0.141424] s1 : ffffffff80a43250 a0 : ffffffff81201220 a1 : 0000000000000000 [ 0.141654] a2 : 000000000000003c a3 : ffffffff81201258 a4 : 0000000000000064 [ 0.141893] a5 : ffffffff8029806c a6 : 0000000000000040 a7 : ffffffffffffffff [ 0.142126] s2 : ffffffff81201220 s3 : 0000000000000009 s4 : ffffffff81135088 [ 0.142353] s5 : ffffffff81135038 s6 : ffffffff8080ce80 s7 : ffffffff80800438 [ 0.142584] s8 : ffffffff80bc6578 s9 : 0000000000000008 s10: ffffffff806000ac [ 0.142810] s11: 0000000000000000 t3 : fffffffffffffffc t4 : 0000000000000000 [ 0.143042] t5 : 0000000000000155 t6 : 00000000000003ff [ 0.143220] status: 0000000000000120 badaddr: ffffffff81201220 cause: 000000000000000f [ 0.143560] [<ffffffff8029806c>] __memset+0xc4/0xfc [ 0.143859] [<ffffffff8061e984>] init_default_flow_dissectors+0x22/0x60 [ 0.144092] [<ffffffff800010fc>] do_one_initcall+0x3e/0x168 [ 0.144278] [<ffffffff80600df0>] kernel_init_freeable+0x1c8/0x224 [ 0.144479] [<ffffffff804868a8>] kernel_init+0x12/0x110 [ 0.144658] [<ffffffff800022de>] ret_from_exception+0x0/0xc [ 0.145124] ---[ end trace f1e9643daa46d591 ]--- After some investigation, I think I found the root cause: commit2bfc6cd81b
("move kernel mapping outside of linear mapping") moves BPF JIT region after the kernel: | #define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end) The &_end is unlikely aligned with PMD size, so the front bpf jit region sits with part of kernel .data section in one PMD size mapping. But kernel is mapped in PMD SIZE, when bpf_jit_binary_lock_ro() is called to make the first bpf jit prog ROX, we will make part of kernel .data section RO too, so when we write to, for example memset the .data section, MMU will trigger a store page fault. To fix the issue, we need to ensure the BPF JIT region is PMD size aligned. This patch acchieve this goal by restoring the BPF JIT region to original position, I.E the 128MB before kernel .text section. The modification to kasan_init.c is inspired by Alexandre. Fixes:fc8504765e
("riscv: bpf: Avoid breaking W^X") Reported-by: Andreas Schwab <schwab@linux-m68k.org> Signed-off-by: Jisheng Zhang <jszhang@kernel.org> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
215 lines
6.1 KiB
C
215 lines
6.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (C) 2019 Andes Technology Corporation
|
|
|
|
#include <linux/pfn.h>
|
|
#include <linux/init_task.h>
|
|
#include <linux/kasan.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/pgtable.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/fixmap.h>
|
|
#include <asm/pgalloc.h>
|
|
|
|
extern pgd_t early_pg_dir[PTRS_PER_PGD];
|
|
asmlinkage void __init kasan_early_init(void)
|
|
{
|
|
uintptr_t i;
|
|
pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
|
|
|
|
for (i = 0; i < PTRS_PER_PTE; ++i)
|
|
set_pte(kasan_early_shadow_pte + i,
|
|
mk_pte(virt_to_page(kasan_early_shadow_page),
|
|
PAGE_KERNEL));
|
|
|
|
for (i = 0; i < PTRS_PER_PMD; ++i)
|
|
set_pmd(kasan_early_shadow_pmd + i,
|
|
pfn_pmd(PFN_DOWN
|
|
(__pa((uintptr_t) kasan_early_shadow_pte)),
|
|
__pgprot(_PAGE_TABLE)));
|
|
|
|
for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
|
|
i += PGDIR_SIZE, ++pgd)
|
|
set_pgd(pgd,
|
|
pfn_pgd(PFN_DOWN
|
|
(__pa(((uintptr_t) kasan_early_shadow_pmd))),
|
|
__pgprot(_PAGE_TABLE)));
|
|
|
|
/* init for swapper_pg_dir */
|
|
pgd = pgd_offset_k(KASAN_SHADOW_START);
|
|
|
|
for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
|
|
i += PGDIR_SIZE, ++pgd)
|
|
set_pgd(pgd,
|
|
pfn_pgd(PFN_DOWN
|
|
(__pa(((uintptr_t) kasan_early_shadow_pmd))),
|
|
__pgprot(_PAGE_TABLE)));
|
|
|
|
local_flush_tlb_all();
|
|
}
|
|
|
|
static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
|
|
{
|
|
phys_addr_t phys_addr;
|
|
pte_t *ptep, *base_pte;
|
|
|
|
if (pmd_none(*pmd))
|
|
base_pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
|
|
else
|
|
base_pte = (pte_t *)pmd_page_vaddr(*pmd);
|
|
|
|
ptep = base_pte + pte_index(vaddr);
|
|
|
|
do {
|
|
if (pte_none(*ptep)) {
|
|
phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
|
|
set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL));
|
|
}
|
|
} while (ptep++, vaddr += PAGE_SIZE, vaddr != end);
|
|
|
|
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
|
|
}
|
|
|
|
static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
|
|
{
|
|
phys_addr_t phys_addr;
|
|
pmd_t *pmdp, *base_pmd;
|
|
unsigned long next;
|
|
|
|
base_pmd = (pmd_t *)pgd_page_vaddr(*pgd);
|
|
if (base_pmd == lm_alias(kasan_early_shadow_pmd))
|
|
base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
|
|
|
|
pmdp = base_pmd + pmd_index(vaddr);
|
|
|
|
do {
|
|
next = pmd_addr_end(vaddr, end);
|
|
|
|
if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) {
|
|
phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE);
|
|
if (phys_addr) {
|
|
set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL));
|
|
continue;
|
|
}
|
|
}
|
|
|
|
kasan_populate_pte(pmdp, vaddr, next);
|
|
} while (pmdp++, vaddr = next, vaddr != end);
|
|
|
|
/*
|
|
* Wait for the whole PGD to be populated before setting the PGD in
|
|
* the page table, otherwise, if we did set the PGD before populating
|
|
* it entirely, memblock could allocate a page at a physical address
|
|
* where KASAN is not populated yet and then we'd get a page fault.
|
|
*/
|
|
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
|
|
}
|
|
|
|
static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end)
|
|
{
|
|
phys_addr_t phys_addr;
|
|
pgd_t *pgdp = pgd_offset_k(vaddr);
|
|
unsigned long next;
|
|
|
|
do {
|
|
next = pgd_addr_end(vaddr, end);
|
|
|
|
/*
|
|
* pgdp can't be none since kasan_early_init initialized all KASAN
|
|
* shadow region with kasan_early_shadow_pmd: if this is stillthe case,
|
|
* that means we can try to allocate a hugepage as a replacement.
|
|
*/
|
|
if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) &&
|
|
IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
|
|
phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
|
|
if (phys_addr) {
|
|
set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
|
|
continue;
|
|
}
|
|
}
|
|
|
|
kasan_populate_pmd(pgdp, vaddr, next);
|
|
} while (pgdp++, vaddr = next, vaddr != end);
|
|
}
|
|
|
|
static void __init kasan_populate(void *start, void *end)
|
|
{
|
|
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
|
|
unsigned long vend = PAGE_ALIGN((unsigned long)end);
|
|
|
|
kasan_populate_pgd(vaddr, vend);
|
|
|
|
local_flush_tlb_all();
|
|
memset(start, KASAN_SHADOW_INIT, end - start);
|
|
}
|
|
|
|
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
|
|
{
|
|
unsigned long next;
|
|
void *p;
|
|
pgd_t *pgd_k = pgd_offset_k(vaddr);
|
|
|
|
do {
|
|
next = pgd_addr_end(vaddr, end);
|
|
if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) {
|
|
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
|
|
set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
|
|
}
|
|
} while (pgd_k++, vaddr = next, vaddr != end);
|
|
}
|
|
|
|
static void __init kasan_shallow_populate(void *start, void *end)
|
|
{
|
|
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
|
|
unsigned long vend = PAGE_ALIGN((unsigned long)end);
|
|
|
|
kasan_shallow_populate_pgd(vaddr, vend);
|
|
local_flush_tlb_all();
|
|
}
|
|
|
|
void __init kasan_init(void)
|
|
{
|
|
phys_addr_t p_start, p_end;
|
|
u64 i;
|
|
|
|
/*
|
|
* Populate all kernel virtual address space with kasan_early_shadow_page
|
|
* except for the linear mapping and the modules/kernel/BPF mapping.
|
|
*/
|
|
kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
|
|
(void *)kasan_mem_to_shadow((void *)
|
|
VMEMMAP_END));
|
|
if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
|
|
kasan_shallow_populate(
|
|
(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
|
|
(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
|
|
else
|
|
kasan_populate_early_shadow(
|
|
(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
|
|
(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
|
|
|
|
/* Populate the linear mapping */
|
|
for_each_mem_range(i, &p_start, &p_end) {
|
|
void *start = (void *)__va(p_start);
|
|
void *end = (void *)__va(p_end);
|
|
|
|
if (start >= end)
|
|
break;
|
|
|
|
kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
|
|
}
|
|
|
|
/* Populate kernel, BPF, modules mapping */
|
|
kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR),
|
|
kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G));
|
|
|
|
for (i = 0; i < PTRS_PER_PTE; i++)
|
|
set_pte(&kasan_early_shadow_pte[i],
|
|
mk_pte(virt_to_page(kasan_early_shadow_page),
|
|
__pgprot(_PAGE_PRESENT | _PAGE_READ |
|
|
_PAGE_ACCESSED)));
|
|
|
|
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
|
|
init_task.kasan_depth = 0;
|
|
}
|