From dc002bb62f10c5905420f8b8a7d5ec0da567fc82 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 23 Nov 2018 23:18:03 +0100 Subject: [PATCH 1/2] bpf: add __weak hook for allocating executable memory By default, BPF uses module_alloc() to allocate executable memory, but this is not necessary on all arches and potentially undesirable on some of them. So break out the module_alloc() and module_memfree() calls into __weak functions to allow them to be overridden in arch code. Signed-off-by: Ard Biesheuvel Signed-off-by: Daniel Borkmann --- kernel/bpf/core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f93ed667546f..86817ab204e8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -623,6 +623,16 @@ static void bpf_jit_uncharge_modmem(u32 pages) atomic_long_sub(pages, &bpf_jit_current); } +void *__weak bpf_jit_alloc_exec(unsigned long size) +{ + return module_alloc(size); +} + +void __weak bpf_jit_free_exec(void *addr) +{ + module_memfree(addr); +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -640,7 +650,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, if (bpf_jit_charge_modmem(pages)) return NULL; - hdr = module_alloc(size); + hdr = bpf_jit_alloc_exec(size); if (!hdr) { bpf_jit_uncharge_modmem(pages); return NULL; @@ -664,7 +674,7 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) { u32 pages = hdr->pages; - module_memfree(hdr); + bpf_jit_free_exec(hdr); bpf_jit_uncharge_modmem(pages); } From 91fc957c9b1d6c55168df15a397cbd1af16bc00f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 23 Nov 2018 23:18:04 +0100 Subject: [PATCH 2/2] arm64/bpf: don't allocate BPF JIT programs in module memory The arm64 module region is a 128 MB region that is kept close to the core kernel, in order to ensure that relative branches are always in range. So using the same region for programs that do not have this restriction is wasteful, and preferably avoided. Now that the core BPF JIT code permits the alloc/free routines to be overridden, implement them by vmalloc()/vfree() calls from a dedicated 128 MB region set aside for BPF programs. This ensures that BPF programs are still in branching range of each other, which is something the JIT currently depends upon (and is not guaranteed when using module_alloc() on KASLR kernels like we do currently). It also ensures that placement of BPF programs does not correlate with the placement of the core kernel or modules, making it less likely that leaking the former will reveal the latter. This also solves an issue under KASAN, where shadow memory is needlessly allocated for all BPF programs (which don't require KASAN shadow pages since they are not KASAN instrumented) Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Signed-off-by: Daniel Borkmann --- arch/arm64/include/asm/memory.h | 5 ++++- arch/arm64/net/bpf_jit_comp.c | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b96442960aea..ee20fc63899c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -62,8 +62,11 @@ #define PAGE_OFFSET (UL(0xffffffffffffffff) - \ (UL(1) << (VA_BITS - 1)) + 1) #define KIMAGE_VADDR (MODULES_END) +#define BPF_JIT_REGION_START (VA_START + KASAN_SHADOW_SIZE) +#define BPF_JIT_REGION_SIZE (SZ_128M) +#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) -#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) +#define MODULES_VADDR (BPF_JIT_REGION_END) #define MODULES_VSIZE (SZ_128M) #define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_2M) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b87285924023..0a7371a86139 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -943,3 +943,16 @@ out: tmp : orig_prog); return prog; } + +void *bpf_jit_alloc_exec(unsigned long size) +{ + return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, + BPF_JIT_REGION_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} + +void bpf_jit_free_exec(void *addr) +{ + return vfree(addr); +}