riscv: Introduce CONFIG_RELOCATABLE

This config allows to compile 64b kernel as PIE and to relocate it at
any virtual address at runtime: this paves the way to KASLR.
Runtime relocation is possible since relocation metadata are embedded into
the kernel.

Note that relocating at runtime introduces an overhead even if the
kernel is loaded at the same address it was linked at and that the compiler
options are those used in arm64 which uses the same RELA relocation
format.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20230329045329.64565-4-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Alexandre Ghiti 2023-03-29 06:53:26 +02:00 committed by Palmer Dabbelt
parent 69a90d2fe1
commit 39b3307294
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
5 changed files with 91 additions and 5 deletions

View File

@ -561,6 +561,20 @@ config COMPAT
If you want to execute 32-bit userspace applications, say Y. If you want to execute 32-bit userspace applications, say Y.
config RELOCATABLE
bool "Build a relocatable kernel"
depends on MMU && 64BIT && !XIP_KERNEL
help
This builds a kernel as a Position Independent Executable (PIE),
which retains all relocation metadata required to relocate the
kernel binary at runtime to a different virtual address than the
address it was linked at.
Since RISCV uses the RELA relocation format, this requires a
relocation pass at runtime even if the kernel is loaded at the
same address it was linked at.
If unsure, say N.
endmenu # "Kernel features" endmenu # "Kernel features"
menu "Boot options" menu "Boot options"

View File

@ -7,9 +7,12 @@
# #
OBJCOPYFLAGS := -O binary OBJCOPYFLAGS := -O binary
LDFLAGS_vmlinux := ifeq ($(CONFIG_RELOCATABLE),y)
LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro
KBUILD_CFLAGS += -fPIE
endif
ifeq ($(CONFIG_DYNAMIC_FTRACE),y) ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
LDFLAGS_vmlinux := --no-relax LDFLAGS_vmlinux += --no-relax
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
ifeq ($(CONFIG_RISCV_ISA_C),y) ifeq ($(CONFIG_RISCV_ISA_C),y)
CC_FLAGS_FTRACE := -fpatchable-function-entry=4 CC_FLAGS_FTRACE := -fpatchable-function-entry=4

View File

@ -122,10 +122,23 @@ SECTIONS
*(.sdata*) *(.sdata*)
} }
.rela.dyn : { .rela.dyn : ALIGN(8) {
*(.rela*) __rela_dyn_start = .;
*(.rela .rela*)
__rela_dyn_end = .;
} }
#ifdef CONFIG_RELOCATABLE
.data.rel : { *(.data.rel*) }
.got : { *(.got*) }
.plt : { *(.plt) }
.dynamic : { *(.dynamic) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
#endif
#ifdef CONFIG_EFI #ifdef CONFIG_EFI
.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
__pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end); __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);

View File

@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
CFLAGS_init.o := -mcmodel=medany CFLAGS_init.o := -mcmodel=medany
ifdef CONFIG_RELOCATABLE
CFLAGS_init.o += -fno-pie
endif
ifdef CONFIG_FTRACE ifdef CONFIG_FTRACE
CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)

View File

@ -20,6 +20,9 @@
#include <linux/dma-map-ops.h> #include <linux/dma-map-ops.h>
#include <linux/crash_dump.h> #include <linux/crash_dump.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#ifdef CONFIG_RELOCATABLE
#include <linux/elf.h>
#endif
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
@ -146,7 +149,7 @@ static void __init print_vm_layout(void)
print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
#endif #endif
print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR, print_ml("kernel", (unsigned long)kernel_map.virt_addr,
(unsigned long)ADDRESS_SPACE_END); (unsigned long)ADDRESS_SPACE_END);
} }
} }
@ -820,6 +823,44 @@ retry:
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif #endif
#ifdef CONFIG_RELOCATABLE
extern unsigned long __rela_dyn_start, __rela_dyn_end;
static void __init relocate_kernel(void)
{
Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
/*
* This holds the offset between the linked virtual address and the
* relocated virtual address.
*/
uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
/*
* This holds the offset between kernel linked virtual address and
* physical address.
*/
uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
Elf64_Addr relocated_addr = rela->r_addend;
if (rela->r_info != R_RISCV_RELATIVE)
continue;
/*
* Make sure to not relocate vdso symbols like rt_sigreturn
* which are linked from the address 0 in vmlinux since
* vdso symbol addresses are actually used as an offset from
* mm->context.vdso in VDSO_OFFSET macro.
*/
if (relocated_addr >= KERNEL_LINK_ADDR)
relocated_addr += reloc_offset;
*(Elf64_Addr *)addr = relocated_addr;
}
}
#endif /* CONFIG_RELOCATABLE */
#ifdef CONFIG_XIP_KERNEL #ifdef CONFIG_XIP_KERNEL
static void __init create_kernel_page_table(pgd_t *pgdir, static void __init create_kernel_page_table(pgd_t *pgdir,
__always_unused bool early) __always_unused bool early)
@ -1007,6 +1048,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
#endif #endif
#ifdef CONFIG_RELOCATABLE
/*
* Early page table uses only one PUD, which makes it possible
* to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
* makes the kernel cross over a PUD_SIZE boundary, raise a bug
* since a part of the kernel would not get mapped.
*/
BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
relocate_kernel();
#endif
apply_early_boot_alternatives(); apply_early_boot_alternatives();
pt_ops_set_early(); pt_ops_set_early();