linux/arch/riscv/kernel/elf_kexec.c

483 lines
12 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Load ELF vmlinux file for the kexec_file_load syscall.
*
* Copyright (C) 2021 Huawei Technologies Co, Ltd.
*
* Author: Liao Chang (liaochang1@huawei.com)
*
* Based on kexec-tools' kexec-elf-riscv.c, heavily modified
* for kernel.
*/
#define pr_fmt(fmt) "kexec_image: " fmt
#include <linux/elf.h>
#include <linux/kexec.h>
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/libfdt.h>
#include <linux/types.h>
#include <linux/memblock.h>
fix missing vmalloc.h includes Patch series "Memory allocation profiling", v6. Overview: Low overhead [1] per-callsite memory allocation profiling. Not just for debug kernels, overhead low enough to be deployed in production. Example output: root@moria-kvm:~# sort -rn /proc/allocinfo 127664128 31168 mm/page_ext.c:270 func:alloc_page_ext 56373248 4737 mm/slub.c:2259 func:alloc_slab_page 14880768 3633 mm/readahead.c:247 func:page_cache_ra_unbounded 14417920 3520 mm/mm_init.c:2530 func:alloc_large_system_hash 13377536 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs 11718656 2861 mm/filemap.c:1919 func:__filemap_get_folio 9192960 2800 kernel/fork.c:307 func:alloc_thread_stack_node 4206592 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable 4136960 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start 3940352 962 mm/memory.c:4214 func:alloc_anon_folio 2894464 22613 fs/kernfs/dir.c:615 func:__kernfs_new_node ... Usage: kconfig options: - CONFIG_MEM_ALLOC_PROFILING - CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT - CONFIG_MEM_ALLOC_PROFILING_DEBUG adds warnings for allocations that weren't accounted because of a missing annotation sysctl: /proc/sys/vm/mem_profiling Runtime info: /proc/allocinfo Notes: [1]: Overhead To measure the overhead we are comparing the following configurations: (1) Baseline with CONFIG_MEMCG_KMEM=n (2) Disabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n) (3) Enabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=y) (4) Enabled at runtime (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n && /proc/sys/vm/mem_profiling=1) (5) Baseline with CONFIG_MEMCG_KMEM=y && allocating with __GFP_ACCOUNT (6) Disabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=n) && CONFIG_MEMCG_KMEM=y (7) Enabled by default (CONFIG_MEM_ALLOC_PROFILING=y && CONFIG_MEM_ALLOC_PROFILING_BY_DEFAULT=y) && CONFIG_MEMCG_KMEM=y Performance overhead: To evaluate performance we implemented an in-kernel test executing multiple get_free_page/free_page and kmalloc/kfree calls with allocation sizes growing from 8 to 240 bytes with CPU frequency set to max and CPU affinity set to a specific CPU to minimize the noise. Below are results from running the test on Ubuntu 22.04.2 LTS with 6.8.0-rc1 kernel on 56 core Intel Xeon: kmalloc pgalloc (1 baseline) 6.764s 16.902s (2 default disabled) 6.793s (+0.43%) 17.007s (+0.62%) (3 default enabled) 7.197s (+6.40%) 23.666s (+40.02%) (4 runtime enabled) 7.405s (+9.48%) 23.901s (+41.41%) (5 memcg) 13.388s (+97.94%) 48.460s (+186.71%) (6 def disabled+memcg) 13.332s (+97.10%) 48.105s (+184.61%) (7 def enabled+memcg) 13.446s (+98.78%) 54.963s (+225.18%) Memory overhead: Kernel size: text data bss dec diff (1) 26515311 18890222 17018880 62424413 (2) 26524728 19423818 16740352 62688898 264485 (3) 26524724 19423818 16740352 62688894 264481 (4) 26524728 19423818 16740352 62688898 264485 (5) 26541782 18964374 16957440 62463596 39183 Memory consumption on a 56 core Intel CPU with 125GB of memory: Code tags: 192 kB PageExts: 262144 kB (256MB) SlabExts: 9876 kB (9.6MB) PcpuExts: 512 kB (0.5MB) Total overhead is 0.2% of total memory. Benchmarks: Hackbench tests run 100 times: hackbench -s 512 -l 200 -g 15 -f 25 -P baseline disabled profiling enabled profiling avg 0.3543 0.3559 (+0.0016) 0.3566 (+0.0023) stdev 0.0137 0.0188 0.0077 hackbench -l 10000 baseline disabled profiling enabled profiling avg 6.4218 6.4306 (+0.0088) 6.5077 (+0.0859) stdev 0.0933 0.0286 0.0489 stress-ng tests: stress-ng --class memory --seq 4 -t 60 stress-ng --class cpu --seq 4 -t 60 Results posted at: https://evilpiepirate.org/~kent/memalloc_prof_v4_stress-ng/ [2] https://lore.kernel.org/all/20240306182440.2003814-1-surenb@google.com/ This patch (of 37): The next patch drops vmalloc.h from a system header in order to fix a circular dependency; this adds it to all the files that were pulling it in implicitly. [kent.overstreet@linux.dev: fix arch/alpha/lib/memcpy.c] Link: https://lkml.kernel.org/r/20240327002152.3339937-1-kent.overstreet@linux.dev [surenb@google.com: fix arch/x86/mm/numa_32.c] Link: https://lkml.kernel.org/r/20240402180933.1663992-1-surenb@google.com [kent.overstreet@linux.dev: a few places were depending on sizes.h] Link: https://lkml.kernel.org/r/20240404034744.1664840-1-kent.overstreet@linux.dev [arnd@arndb.de: fix mm/kasan/hw_tags.c] Link: https://lkml.kernel.org/r/20240404124435.3121534-1-arnd@kernel.org [surenb@google.com: fix arc build] Link: https://lkml.kernel.org/r/20240405225115.431056-1-surenb@google.com Link: https://lkml.kernel.org/r/20240321163705.3067592-1-surenb@google.com Link: https://lkml.kernel.org/r/20240321163705.3067592-2-surenb@google.com Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> Signed-off-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com> Tested-by: Kees Cook <keescook@chromium.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alex Gaynor <alex.gaynor@gmail.com> Cc: Alice Ryhl <aliceryhl@google.com> Cc: Andreas Hindborg <a.hindborg@samsung.com> Cc: Benno Lossin <benno.lossin@proton.me> Cc: "Björn Roy Baron" <bjorn3_gh@protonmail.com> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Christoph Lameter <cl@linux.com> Cc: Dennis Zhou <dennis@kernel.org> Cc: Gary Guo <gary@garyguo.net> Cc: Miguel Ojeda <ojeda@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wedson Almeida Filho <wedsonaf@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-03-21 16:36:23 +00:00
#include <linux/vmalloc.h>
#include <asm/setup.h>
RISC-V: kexec: Fix memory leak of fdt buffer This is reported by kmemleak detector: unreferenced object 0xff60000082864000 (size 9588): comm "kexec", pid 146, jiffies 4294900634 (age 64.788s) hex dump (first 32 bytes): d0 0d fe ed 00 00 12 ed 00 00 00 48 00 00 11 40 ...........H...@ 00 00 00 28 00 00 00 11 00 00 00 02 00 00 00 00 ...(............ backtrace: [<00000000f95b17c4>] kmemleak_alloc+0x34/0x3e [<00000000b9ec8e3e>] kmalloc_order+0x9c/0xc4 [<00000000a95cf02e>] kmalloc_order_trace+0x34/0xb6 [<00000000f01e68b4>] __kmalloc+0x5c2/0x62a [<000000002bd497b2>] kvmalloc_node+0x66/0xd6 [<00000000906542fa>] of_kexec_alloc_and_setup_fdt+0xa6/0x6ea [<00000000e1166bde>] elf_kexec_load+0x206/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via kvmalloc() to store fdt. While it's not freed back to system when kexec kernel is reloaded or unloaded. Then memory leak is caused. Fix it by introducing riscv specific function arch_kimage_file_post_load_cleanup(), and freeing the buffer there. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Signed-off-by: Li Huafei <lihuafei1@huawei.com> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Reviewed-by: Liao Chang <liaochang1@huawei.com> Link: https://lore.kernel.org/r/20221104095658.141222-1-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-11-04 09:56:57 +00:00
int arch_kimage_file_post_load_cleanup(struct kimage *image)
{
kvfree(image->arch.fdt);
image->arch.fdt = NULL;
vfree(image->elf_headers);
image->elf_headers = NULL;
image->elf_headers_sz = 0;
RISC-V: kexec: Fix memory leak of fdt buffer This is reported by kmemleak detector: unreferenced object 0xff60000082864000 (size 9588): comm "kexec", pid 146, jiffies 4294900634 (age 64.788s) hex dump (first 32 bytes): d0 0d fe ed 00 00 12 ed 00 00 00 48 00 00 11 40 ...........H...@ 00 00 00 28 00 00 00 11 00 00 00 02 00 00 00 00 ...(............ backtrace: [<00000000f95b17c4>] kmemleak_alloc+0x34/0x3e [<00000000b9ec8e3e>] kmalloc_order+0x9c/0xc4 [<00000000a95cf02e>] kmalloc_order_trace+0x34/0xb6 [<00000000f01e68b4>] __kmalloc+0x5c2/0x62a [<000000002bd497b2>] kvmalloc_node+0x66/0xd6 [<00000000906542fa>] of_kexec_alloc_and_setup_fdt+0xa6/0x6ea [<00000000e1166bde>] elf_kexec_load+0x206/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via kvmalloc() to store fdt. While it's not freed back to system when kexec kernel is reloaded or unloaded. Then memory leak is caused. Fix it by introducing riscv specific function arch_kimage_file_post_load_cleanup(), and freeing the buffer there. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Signed-off-by: Li Huafei <lihuafei1@huawei.com> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Reviewed-by: Liao Chang <liaochang1@huawei.com> Link: https://lore.kernel.org/r/20221104095658.141222-1-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-11-04 09:56:57 +00:00
return kexec_image_post_load_cleanup_default(image);
}
static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
struct kexec_elf_info *elf_info, unsigned long old_pbase,
unsigned long new_pbase)
{
int i;
int ret = 0;
size_t size;
struct kexec_buf kbuf;
const struct elf_phdr *phdr;
kbuf.image = image;
for (i = 0; i < ehdr->e_phnum; i++) {
phdr = &elf_info->proghdrs[i];
if (phdr->p_type != PT_LOAD)
continue;
size = phdr->p_filesz;
if (size > phdr->p_memsz)
size = phdr->p_memsz;
kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
kbuf.bufsz = size;
kbuf.buf_align = phdr->p_align;
kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
kbuf.memsz = phdr->p_memsz;
kbuf.top_down = false;
ret = kexec_add_buffer(&kbuf);
if (ret)
break;
}
return ret;
}
/*
* Go through the available phsyical memory regions and find one that hold
* an image of the specified size.
*/
static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
unsigned long *old_pbase, unsigned long *new_pbase)
{
int i;
int ret;
struct kexec_buf kbuf;
const struct elf_phdr *phdr;
unsigned long lowest_paddr = ULONG_MAX;
unsigned long lowest_vaddr = ULONG_MAX;
for (i = 0; i < ehdr->e_phnum; i++) {
phdr = &elf_info->proghdrs[i];
if (phdr->p_type != PT_LOAD)
continue;
if (lowest_paddr > phdr->p_paddr)
lowest_paddr = phdr->p_paddr;
if (lowest_vaddr > phdr->p_vaddr)
lowest_vaddr = phdr->p_vaddr;
}
kbuf.image = image;
kbuf.buf_min = lowest_paddr;
kbuf.buf_max = ULONG_MAX;
/*
* Current riscv boot protocol requires 2MB alignment for
* RV64 and 4MB alignment for RV32
*
*/
kbuf.buf_align = PMD_SIZE;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
kbuf.top_down = false;
ret = arch_kexec_locate_mem_hole(&kbuf);
if (!ret) {
*old_pbase = lowest_paddr;
*new_pbase = kbuf.mem;
image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
}
return ret;
}
#ifdef CONFIG_CRASH_DUMP
static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
(*nr_ranges)++;
return 0;
}
static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
{
struct crash_mem *cmem = arg;
cmem->ranges[cmem->nr_ranges].start = res->start;
cmem->ranges[cmem->nr_ranges].end = res->end;
cmem->nr_ranges++;
return 0;
}
static int prepare_elf_headers(void **addr, unsigned long *sz)
{
struct crash_mem *cmem;
unsigned int nr_ranges;
int ret;
nr_ranges = 1; /* For exclusion of crashkernel region */
walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
if (!cmem)
return -ENOMEM;
cmem->max_nr_ranges = nr_ranges;
cmem->nr_ranges = 0;
ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
if (ret)
goto out;
/* Exclude crashkernel region */
ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
if (!ret)
ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
out:
kfree(cmem);
return ret;
}
static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
unsigned long cmdline_len)
{
int elfcorehdr_strlen;
char *cmdline_ptr;
cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
if (!cmdline_ptr)
return NULL;
elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
image->elf_load_addr);
if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
kfree(cmdline_ptr);
return NULL;
}
memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
/* Ensure it's nul terminated */
cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
return cmdline_ptr;
}
#endif
static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
unsigned long kernel_len, char *initrd,
unsigned long initrd_len, char *cmdline,
unsigned long cmdline_len)
{
int ret;
void *fdt;
unsigned long old_kernel_pbase = ULONG_MAX;
unsigned long new_kernel_pbase = 0UL;
unsigned long initrd_pbase = 0UL;
unsigned long kernel_start;
struct elfhdr ehdr;
struct kexec_buf kbuf;
struct kexec_elf_info elf_info;
char *modified_cmdline = NULL;
ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
if (ret)
return ERR_PTR(ret);
ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info,
&old_kernel_pbase, &new_kernel_pbase);
if (ret)
goto out;
kernel_start = image->start;
/* Add the kernel binary to the image */
ret = riscv_kexec_elf_load(image, &ehdr, &elf_info,
old_kernel_pbase, new_kernel_pbase);
if (ret)
goto out;
kbuf.image = image;
kbuf.buf_min = new_kernel_pbase + kernel_len;
kbuf.buf_max = ULONG_MAX;
#ifdef CONFIG_CRASH_DUMP
/* Add elfcorehdr */
if (image->type == KEXEC_TYPE_CRASH) {
void *headers;
unsigned long headers_sz;
ret = prepare_elf_headers(&headers, &headers_sz);
if (ret) {
pr_err("Preparing elf core header failed\n");
goto out;
}
kbuf.buffer = headers;
kbuf.bufsz = headers_sz;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
kbuf.memsz = headers_sz;
kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
kbuf.top_down = true;
ret = kexec_add_buffer(&kbuf);
if (ret) {
vfree(headers);
goto out;
}
image->elf_headers = headers;
image->elf_load_addr = kbuf.mem;
image->elf_headers_sz = headers_sz;
kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
/* Setup cmdline for kdump kernel case */
modified_cmdline = setup_kdump_cmdline(image, cmdline,
cmdline_len);
if (!modified_cmdline) {
pr_err("Setting up cmdline for kdump kernel failed\n");
ret = -EINVAL;
goto out;
}
cmdline = modified_cmdline;
}
#endif
#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
/* Add purgatory to the image */
kbuf.top_down = true;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_load_purgatory(image, &kbuf);
if (ret) {
pr_err("Error loading purgatory ret=%d\n", ret);
goto out;
}
kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry",
&kernel_start,
sizeof(kernel_start), 0);
if (ret)
pr_err("Error update purgatory ret=%d\n", ret);
#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
/* Add the initrd to the image */
if (initrd != NULL) {
kbuf.buffer = initrd;
kbuf.bufsz = kbuf.memsz = initrd_len;
kbuf.buf_align = PAGE_SIZE;
kbuf.top_down = true;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
goto out;
initrd_pbase = kbuf.mem;
kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
}
/* Add the DTB to the image */
fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase,
initrd_len, cmdline, 0);
if (!fdt) {
pr_err("Error setting up the new device tree.\n");
ret = -EINVAL;
goto out;
}
fdt_pack(fdt);
kbuf.buffer = fdt;
kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
kbuf.buf_align = PAGE_SIZE;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
kbuf.top_down = true;
ret = kexec_add_buffer(&kbuf);
if (ret) {
pr_err("Error add DTB kbuf ret=%d\n", ret);
goto out_free_fdt;
}
RISC-V: kexec: Fix memory leak of fdt buffer This is reported by kmemleak detector: unreferenced object 0xff60000082864000 (size 9588): comm "kexec", pid 146, jiffies 4294900634 (age 64.788s) hex dump (first 32 bytes): d0 0d fe ed 00 00 12 ed 00 00 00 48 00 00 11 40 ...........H...@ 00 00 00 28 00 00 00 11 00 00 00 02 00 00 00 00 ...(............ backtrace: [<00000000f95b17c4>] kmemleak_alloc+0x34/0x3e [<00000000b9ec8e3e>] kmalloc_order+0x9c/0xc4 [<00000000a95cf02e>] kmalloc_order_trace+0x34/0xb6 [<00000000f01e68b4>] __kmalloc+0x5c2/0x62a [<000000002bd497b2>] kvmalloc_node+0x66/0xd6 [<00000000906542fa>] of_kexec_alloc_and_setup_fdt+0xa6/0x6ea [<00000000e1166bde>] elf_kexec_load+0x206/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via kvmalloc() to store fdt. While it's not freed back to system when kexec kernel is reloaded or unloaded. Then memory leak is caused. Fix it by introducing riscv specific function arch_kimage_file_post_load_cleanup(), and freeing the buffer there. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Signed-off-by: Li Huafei <lihuafei1@huawei.com> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Reviewed-by: Liao Chang <liaochang1@huawei.com> Link: https://lore.kernel.org/r/20221104095658.141222-1-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2022-11-04 09:56:57 +00:00
/* Cache the fdt buffer address for memory cleanup */
image->arch.fdt = fdt;
kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
goto out;
out_free_fdt:
kvfree(fdt);
out:
kfree(modified_cmdline);
kexec_free_elf_info(&elf_info);
return ret ? ERR_PTR(ret) : NULL;
}
#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
#define RISCV_IMM_BITS 12
#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
#define RISCV_CONST_HIGH_PART(x) \
(((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
#define ENCODE_ITYPE_IMM(x) \
(RV_X(x, 0, 12) << 20)
#define ENCODE_BTYPE_IMM(x) \
((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
(RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
#define ENCODE_UTYPE_IMM(x) \
(RV_X(x, 12, 20) << 12)
#define ENCODE_JTYPE_IMM(x) \
((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
(RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
#define ENCODE_CBTYPE_IMM(x) \
((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
(RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
#define ENCODE_CJTYPE_IMM(x) \
((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
(RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
(RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
#define ENCODE_UJTYPE_IMM(x) \
(ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
(ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
#define ENCODE_UITYPE_IMM(x) \
(ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
#define CLEAN_IMM(type, x) \
((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
Elf_Shdr *section,
const Elf_Shdr *relsec,
const Elf_Shdr *symtab)
{
const char *strtab, *name, *shstrtab;
const Elf_Shdr *sechdrs;
Elf64_Rela *relas;
int i, r_type;
/* String & section header string table */
sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
relas = (void *)pi->ehdr + relsec->sh_offset;
for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
const Elf_Sym *sym; /* symbol to relocate */
unsigned long addr; /* final location after relocation */
unsigned long val; /* relocated symbol value */
unsigned long sec_base; /* relocated symbol value */
void *loc; /* tmp location to modify */
sym = (void *)pi->ehdr + symtab->sh_offset;
sym += ELF64_R_SYM(relas[i].r_info);
if (sym->st_name)
name = strtab + sym->st_name;
else
name = shstrtab + sechdrs[sym->st_shndx].sh_name;
loc = pi->purgatory_buf;
loc += section->sh_offset;
loc += relas[i].r_offset;
if (sym->st_shndx == SHN_ABS)
sec_base = 0;
else if (sym->st_shndx >= pi->ehdr->e_shnum) {
pr_err("Invalid section %d for symbol %s\n",
sym->st_shndx, name);
return -ENOEXEC;
} else
sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
val = sym->st_value;
val += sec_base;
val += relas[i].r_addend;
addr = section->sh_addr + relas[i].r_offset;
r_type = ELF64_R_TYPE(relas[i].r_info);
switch (r_type) {
case R_RISCV_BRANCH:
*(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
ENCODE_BTYPE_IMM(val - addr);
break;
case R_RISCV_JAL:
*(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
ENCODE_JTYPE_IMM(val - addr);
break;
/*
* With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
* sym is expected to be next to R_RISCV_PCREL_HI20
* in purgatory relsec. Handle it like R_RISCV_CALL
* sym, instead of searching the whole relsec.
*/
case R_RISCV_PCREL_HI20:
case R_RISCV_CALL_PLT:
case R_RISCV_CALL:
*(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
ENCODE_UJTYPE_IMM(val - addr);
break;
case R_RISCV_RVC_BRANCH:
*(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
ENCODE_CBTYPE_IMM(val - addr);
break;
case R_RISCV_RVC_JUMP:
*(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
ENCODE_CJTYPE_IMM(val - addr);
break;
case R_RISCV_ADD16:
*(u16 *)loc += val;
break;
case R_RISCV_SUB16:
*(u16 *)loc -= val;
break;
case R_RISCV_ADD32:
*(u32 *)loc += val;
break;
case R_RISCV_SUB32:
*(u32 *)loc -= val;
break;
/* It has been applied by R_RISCV_PCREL_HI20 sym */
case R_RISCV_PCREL_LO12_I:
case R_RISCV_ALIGN:
case R_RISCV_RELAX:
break;
default:
pr_err("Unknown rela relocation: %d\n", r_type);
return -ENOEXEC;
}
}
return 0;
}
const struct kexec_file_ops elf_kexec_ops = {
.probe = kexec_elf_probe,
.load = elf_kexec_load,
};