From a1d2a6b4cee858a2f27eebce731fbf1dfd72cb4e Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:16 +0800 Subject: [PATCH 01/21] riscv/ftrace: Add RECORD_MCOUNT support Now recordmcount.pl recognizes RISC-V object files. For the mechanism to work, we have to disable the linker relaxation. Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/Makefile | 3 +++ scripts/recordmcount.pl | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 04807c7f64cc..c9ccc6ea4371 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -114,6 +114,7 @@ config ARCH_RV64I select 64BIT select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FTRACE_MCOUNT_RECORD endchoice diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6719dd30ec5b..899226e0da7d 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -11,6 +11,9 @@ LDFLAGS := OBJCOPYFLAGS := -O binary LDFLAGS_vmlinux := +ifeq ($(CONFIG_DYNAMIC_FTRACE),y) + LDFLAGS_vmlinux := --no-relax +endif KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 2033af758173..d44d55db7c06 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -376,6 +376,11 @@ if ($arch eq "x86_64") { $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s__mcount\$"; $type = ".quad"; $alignment = 8; +} elsif ($arch eq "riscv") { + $function_regex = "^([0-9a-fA-F]+)\\s+<([^.0-9][0-9a-zA-Z_\\.]+)>:"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL\\s_mcount\$"; + $type = ".quad"; + $alignment = 2; } else { die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; } From c15ac4fd60d5ffdb151bb2c7805f377fd7f90363 Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:17 +0800 Subject: [PATCH 02/21] riscv/ftrace: Add dynamic function tracer support We now have dynamic ftrace with the following added items: * ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c) The two functions turn each recorded call site of filtered functions into a call to ftrace_caller or nops * ftracce_update_ftrace_func (in kernel/ftrace.c) turns the nops at ftrace_call into a call to a generic entry for function tracers. * ftrace_caller (in kernel/mcount-dyn.S) The entry where each _mcount call sites calls to once they are filtered to be traced. Also, this patch fixes the semantic problems in mcount.S, which will be treated as only a reference implementation once we have the dynamic ftrace. Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/ftrace.h | 54 +++++++++++++++++ arch/riscv/kernel/Makefile | 5 +- arch/riscv/kernel/ftrace.c | 103 +++++++++++++++++++++++++++++++- arch/riscv/kernel/mcount-dyn.S | 50 ++++++++++++++++ arch/riscv/kernel/mcount.S | 22 ++++--- 6 files changed, 223 insertions(+), 12 deletions(-) create mode 100644 arch/riscv/kernel/mcount-dyn.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c9ccc6ea4371..1e9d878c1ac4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -115,6 +115,7 @@ config ARCH_RV64I select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_DYNAMIC_FTRACE endchoice diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 66d4175eb13e..078743aacfd3 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -8,3 +8,57 @@ #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER) #define HAVE_FUNCTION_GRAPH_FP_TEST #endif + +#ifndef __ASSEMBLY__ +void _mcount(void); +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { +}; +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * A general call in RISC-V is a pair of insts: + * 1) auipc: setting high-20 pc-related bits to ra register + * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to + * return address (original pc + 4) + * + * Dynamic ftrace generates probes to call sites, so we must deal with + * both auipc and jalr at the same time. + */ + +#define MCOUNT_ADDR ((unsigned long)_mcount) +#define JALR_SIGN_MASK (0x00000800) +#define JALR_OFFSET_MASK (0x00000fff) +#define AUIPC_OFFSET_MASK (0xfffff000) +#define AUIPC_PAD (0x00001000) +#define JALR_SHIFT 20 +#define JALR_BASIC (0x000080e7) +#define AUIPC_BASIC (0x00000097) +#define NOP4 (0x00000013) + +#define make_call(caller, callee, call) \ +do { \ + call[0] = to_auipc_insn((unsigned int)((unsigned long)callee - \ + (unsigned long)caller)); \ + call[1] = to_jalr_insn((unsigned int)((unsigned long)callee - \ + (unsigned long)caller)); \ +} while (0) + +#define to_jalr_insn(offset) \ + (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC) + +#define to_auipc_insn(offset) \ + ((offset & JALR_SIGN_MASK) ? \ + (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) : \ + ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC)) + +/* + * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here. + */ +#define MCOUNT_INSN_SIZE 8 +#endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 196f62ffc428..d7bdf888f1ca 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -34,7 +34,8 @@ CFLAGS_setup.o := -mcmodel=medany obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_FUNCTION_TRACER) += mcount.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o + +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o +obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o clean: diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index d0de68d144cb..be4b24332d97 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -6,9 +6,109 @@ */ #include +#include +#include +#ifdef CONFIG_DYNAMIC_FTRACE +static int ftrace_check_current_call(unsigned long hook_pos, + unsigned int *expected) +{ + unsigned int replaced[2]; + unsigned int nops[2] = {NOP4, NOP4}; + + /* we expect nops at the hook position */ + if (!expected) + expected = nops; + + /* + * Read the text we want to modify; + * return must be -EFAULT on read error + */ + if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* + * Make sure it is what we expect it to be; + * return must be -EINVAL on failed comparison + */ + if (memcmp(expected, replaced, sizeof(replaced))) { + pr_err("%p: expected (%08x %08x) but get (%08x %08x)", + (void *)hook_pos, expected[0], expected[1], replaced[0], + replaced[1]); + return -EINVAL; + } + + return 0; +} + +static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, + bool enable) +{ + unsigned int call[2]; + unsigned int nops[2] = {NOP4, NOP4}; + int ret = 0; + + make_call(hook_pos, target, call); + + /* replace the auipc-jalr pair at once */ + ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, + MCOUNT_INSN_SIZE); + /* return must be -EPERM on write error */ + if (ret) + return -EPERM; + + smp_mb(); + flush_icache_range((void *)hook_pos, (void *)hook_pos + MCOUNT_INSN_SIZE); + + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + int ret = ftrace_check_current_call(rec->ip, NULL); + + if (ret) + return ret; + + return __ftrace_modify_call(rec->ip, addr, true); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned int call[2]; + int ret; + + make_call(rec->ip, addr, call); + ret = ftrace_check_current_call(rec->ip, call); + + if (ret) + return ret; + + return __ftrace_modify_call(rec->ip, addr, false); +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + int ret = __ftrace_modify_call((unsigned long)&ftrace_call, + (unsigned long)func, true); + if (!ret) { + ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call, + (unsigned long)func, true); + } + + return ret; +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* - * Most of this file is copied from arm64. + * Most of this function is copied from arm64. */ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) @@ -39,3 +139,4 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, return; *parent = return_hooker; } +#endif diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S new file mode 100644 index 000000000000..a3ebeadbe698 --- /dev/null +++ b/arch/riscv/kernel/mcount-dyn.S @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + .text + + .macro SAVE_ABI_STATE + addi sp, sp, -16 + sd s0, 0(sp) + sd ra, 8(sp) + addi s0, sp, 16 + .endm + + .macro RESTORE_ABI_STATE + ld ra, 8(sp) + ld s0, 0(sp) + addi sp, sp, 16 + .endm + +ENTRY(ftrace_caller) + /* + * a0: the address in the caller when calling ftrace_caller + * a1: the caller's return address + */ + ld a1, -8(s0) + addi a0, ra, -MCOUNT_INSN_SIZE + SAVE_ABI_STATE +ftrace_call: + .global ftrace_call + /* + * For the dynamic ftrace to work, here we should reserve at least + * 8 bytes for a functional auipc-jalr pair. The following call + * serves this purpose. + * + * Calling ftrace_update_ftrace_func would overwrite the nops below. + * Check ftrace_modify_all_code for details. + */ + call ftrace_stub + RESTORE_ABI_STATE + ret +ENDPROC(ftrace_caller) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index c46a778627be..ce9bdc57a2a1 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -32,13 +32,13 @@ addi s0, sp, 32 .endm - .macro STORE_ABI_STATE + .macro RESTORE_ABI_STATE ld ra, 8(sp) ld s0, 0(sp) addi sp, sp, 16 .endm - .macro STORE_RET_ABI_STATE + .macro RESTORE_RET_ABI_STATE ld ra, 24(sp) ld s0, 16(sp) ld a0, 8(sp) @@ -46,6 +46,10 @@ .endm ENTRY(ftrace_stub) +#ifdef CONFIG_DYNAMIC_FTRACE + .global _mcount + .set _mcount, ftrace_stub +#endif ret ENDPROC(ftrace_stub) @@ -66,15 +70,15 @@ ENTRY(return_to_handler) #ifdef HAVE_FUNCTION_GRAPH_FP_TEST mv a0, t6 #endif - la t0, ftrace_return_to_handler - jalr t0 + call ftrace_return_to_handler mv a1, a0 - STORE_RET_ABI_STATE + RESTORE_RET_ABI_STATE jalr a1 ENDPROC(return_to_handler) EXPORT_SYMBOL(return_to_handler) #endif +#ifndef CONFIG_DYNAMIC_FTRACE ENTRY(_mcount) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -104,9 +108,8 @@ do_ftrace_graph_caller: ld a2, -16(s0) #endif SAVE_ABI_STATE - la t0, prepare_ftrace_return - jalr t0 - STORE_ABI_STATE + call prepare_ftrace_return + RESTORE_ABI_STATE ret #endif @@ -120,7 +123,8 @@ do_trace: SAVE_ABI_STATE jalr t5 - STORE_ABI_STATE + RESTORE_ABI_STATE ret ENDPROC(_mcount) EXPORT_SYMBOL(_mcount) +#endif From bc1a4c3a842556852bb02039887d73899d513532 Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:18 +0800 Subject: [PATCH 03/21] riscv/ftrace: Add dynamic function graph tracer support Once the function_graph tracer is enabled, a filtered function has the following call sequence: * ftracer_caller ==> on/off by ftrace_make_call/ftrace_make_nop * ftrace_graph_caller * ftrace_graph_call ==> on/off by ftrace_en/disable_ftrace_graph_caller * prepare_ftrace_return Considering the following DYNAMIC_FTRACE_WITH_REGS feature, it would be more extendable to have a ftrace_graph_caller function, instead of calling prepare_ftrace_return directly in ftrace_caller. Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ftrace.c | 55 ++++++++++++++++++++++++++++- arch/riscv/kernel/mcount-dyn.S | 64 ++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index be4b24332d97..5bbe1afd9463 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -139,4 +139,57 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, return; *parent = return_hooker; } -#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_graph_call(void); +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned int call[2]; + static int init_graph = 1; + int ret; + + make_call(&ftrace_graph_call, &ftrace_stub, call); + + /* + * When enabling graph tracer for the first time, ftrace_graph_call + * should contains a call to ftrace_stub. Once it has been disabled, + * the 8-bytes at the position becomes NOPs. + */ + if (init_graph) { + ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call, + call); + init_graph = 0; + } else { + ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call, + NULL); + } + + if (ret) + return ret; + + return __ftrace_modify_call((unsigned long)&ftrace_graph_call, + (unsigned long)&prepare_ftrace_return, true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + unsigned int call[2]; + int ret; + + make_call(&ftrace_graph_call, &prepare_ftrace_return, call); + + /* + * This is to make sure that ftrace_enable_ftrace_graph_caller + * did the right thing. + */ + ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call, + call); + + if (ret) + return ret; + + return __ftrace_modify_call((unsigned long)&ftrace_graph_call, + (unsigned long)&prepare_ftrace_return, false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index a3ebeadbe698..739e07a6fd85 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -14,18 +14,62 @@ .text .macro SAVE_ABI_STATE +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + addi sp, sp, -48 + sd s0, 32(sp) + sd ra, 40(sp) + addi s0, sp, 48 + sd t0, 24(sp) + sd t1, 16(sp) +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + sd t2, 8(sp) +#endif +#else addi sp, sp, -16 sd s0, 0(sp) sd ra, 8(sp) addi s0, sp, 16 +#endif .endm .macro RESTORE_ABI_STATE +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ld s0, 32(sp) + ld ra, 40(sp) + addi sp, sp, 48 +#else ld ra, 8(sp) ld s0, 0(sp) addi sp, sp, 16 +#endif .endm + .macro RESTORE_GRAPH_ARGS + ld a0, 24(sp) + ld a1, 16(sp) +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + ld a2, 8(sp) +#endif + .endm + +ENTRY(ftrace_graph_caller) + addi sp, sp, -16 + sd s0, 0(sp) + sd ra, 8(sp) + addi s0, sp, 16 +ftrace_graph_call: + .global ftrace_graph_call + /* + * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the + * call below. Check ftrace_modify_all_code for details. + */ + call ftrace_stub + ld ra, 8(sp) + ld s0, 0(sp) + addi sp, sp, 16 + ret +ENDPROC(ftrace_graph_caller) + ENTRY(ftrace_caller) /* * a0: the address in the caller when calling ftrace_caller @@ -33,6 +77,20 @@ ENTRY(ftrace_caller) */ ld a1, -8(s0) addi a0, ra, -MCOUNT_INSN_SIZE + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* + * the graph tracer (specifically, prepare_ftrace_return) needs these + * arguments but for now the function tracer occupies the regs, so we + * save them in temporary regs to recover later. + */ + addi t0, s0, -8 + mv t1, a0 +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + ld t2, -16(s0) +#endif +#endif + SAVE_ABI_STATE ftrace_call: .global ftrace_call @@ -45,6 +103,12 @@ ftrace_call: * Check ftrace_modify_all_code for details. */ call ftrace_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + RESTORE_GRAPH_ARGS + call ftrace_graph_caller +#endif + RESTORE_ABI_STATE ret ENDPROC(ftrace_caller) From 71e736a7d65551e49136c1efc4759e5902729cc2 Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:19 +0800 Subject: [PATCH 04/21] riscv/ftrace: Add ARCH_SUPPORTS_FTRACE_OPS support Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 1 + arch/riscv/kernel/mcount-dyn.S | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 078743aacfd3..fedadc40e358 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -9,6 +9,7 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST #endif +#define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 739e07a6fd85..6bbc3f88fcb3 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -74,9 +74,12 @@ ENTRY(ftrace_caller) /* * a0: the address in the caller when calling ftrace_caller * a1: the caller's return address + * a2: the address of global variable function_trace_op */ ld a1, -8(s0) addi a0, ra, -MCOUNT_INSN_SIZE + la t5, function_trace_op + ld a2, 0(t5) #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* From aea4c671fb985e6a9ffc365c43ea6f5e0d737fea Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:20 +0800 Subject: [PATCH 05/21] riscv/ftrace: Add DYNAMIC_FTRACE_WITH_REGS support Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/ftrace.h | 1 + arch/riscv/kernel/ftrace.c | 17 +++++ arch/riscv/kernel/mcount-dyn.S | 122 ++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1e9d878c1ac4..61dd82709898 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -116,6 +116,7 @@ config ARCH_RV64I select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS endchoice diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index fedadc40e358..c6dcc5291f97 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -8,6 +8,7 @@ #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER) #define HAVE_FUNCTION_GRAPH_FP_TEST #endif +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 5bbe1afd9463..48b5353691c3 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -106,6 +106,23 @@ int __init ftrace_dyn_arch_init(void) } #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned int call[2]; + int ret; + + make_call(rec->ip, old_addr, call); + ret = ftrace_check_current_call(rec->ip, call); + + if (ret) + return ret; + + return __ftrace_modify_call(rec->ip, addr, true); +} +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Most of this function is copied from arm64. diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 6bbc3f88fcb3..35a6ed76cb8b 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -115,3 +115,125 @@ ftrace_call: RESTORE_ABI_STATE ret ENDPROC(ftrace_caller) + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + .macro SAVE_ALL + addi sp, sp, -(PT_SIZE_ON_STACK+16) + sd s0, (PT_SIZE_ON_STACK)(sp) + sd ra, (PT_SIZE_ON_STACK+8)(sp) + addi s0, sp, (PT_SIZE_ON_STACK+16) + + sd x1, PT_RA(sp) + sd x2, PT_SP(sp) + sd x3, PT_GP(sp) + sd x4, PT_TP(sp) + sd x5, PT_T0(sp) + sd x6, PT_T1(sp) + sd x7, PT_T2(sp) + sd x8, PT_S0(sp) + sd x9, PT_S1(sp) + sd x10, PT_A0(sp) + sd x11, PT_A1(sp) + sd x12, PT_A2(sp) + sd x13, PT_A3(sp) + sd x14, PT_A4(sp) + sd x15, PT_A5(sp) + sd x16, PT_A6(sp) + sd x17, PT_A7(sp) + sd x18, PT_S2(sp) + sd x19, PT_S3(sp) + sd x20, PT_S4(sp) + sd x21, PT_S5(sp) + sd x22, PT_S6(sp) + sd x23, PT_S7(sp) + sd x24, PT_S8(sp) + sd x25, PT_S9(sp) + sd x26, PT_S10(sp) + sd x27, PT_S11(sp) + sd x28, PT_T3(sp) + sd x29, PT_T4(sp) + sd x30, PT_T5(sp) + sd x31, PT_T6(sp) + .endm + + .macro RESTORE_ALL + ld x1, PT_RA(sp) + ld x2, PT_SP(sp) + ld x3, PT_GP(sp) + ld x4, PT_TP(sp) + ld x5, PT_T0(sp) + ld x6, PT_T1(sp) + ld x7, PT_T2(sp) + ld x8, PT_S0(sp) + ld x9, PT_S1(sp) + ld x10, PT_A0(sp) + ld x11, PT_A1(sp) + ld x12, PT_A2(sp) + ld x13, PT_A3(sp) + ld x14, PT_A4(sp) + ld x15, PT_A5(sp) + ld x16, PT_A6(sp) + ld x17, PT_A7(sp) + ld x18, PT_S2(sp) + ld x19, PT_S3(sp) + ld x20, PT_S4(sp) + ld x21, PT_S5(sp) + ld x22, PT_S6(sp) + ld x23, PT_S7(sp) + ld x24, PT_S8(sp) + ld x25, PT_S9(sp) + ld x26, PT_S10(sp) + ld x27, PT_S11(sp) + ld x28, PT_T3(sp) + ld x29, PT_T4(sp) + ld x30, PT_T5(sp) + ld x31, PT_T6(sp) + + ld s0, (PT_SIZE_ON_STACK)(sp) + ld ra, (PT_SIZE_ON_STACK+8)(sp) + addi sp, sp, (PT_SIZE_ON_STACK+16) + .endm + + .macro RESTORE_GRAPH_REG_ARGS + ld a0, PT_T0(sp) + ld a1, PT_T1(sp) +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + ld a2, PT_T2(sp) +#endif + .endm + +/* + * Most of the contents are the same as ftrace_caller. + */ +ENTRY(ftrace_regs_caller) + /* + * a3: the address of all registers in the stack + */ + ld a1, -8(s0) + addi a0, ra, -MCOUNT_INSN_SIZE + la t5, function_trace_op + ld a2, 0(t5) + addi a3, sp, -(PT_SIZE_ON_STACK+16) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + addi t0, s0, -8 + mv t1, a0 +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + ld t2, -16(s0) +#endif +#endif + SAVE_ALL + +ftrace_regs_call: + .global ftrace_regs_call + call ftrace_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + RESTORE_GRAPH_REG_ARGS + call ftrace_graph_caller +#endif + + RESTORE_ALL + ret +ENDPROC(ftrace_regs_caller) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ From b785ec129bd9498ecd6e221e45820b00c6ed4adf Mon Sep 17 00:00:00 2001 From: Alan Kao Date: Tue, 13 Feb 2018 13:13:21 +0800 Subject: [PATCH 06/21] riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support In walk_stackframe, the pc now receives the address from calling ftrace_graph_ret_addr instead of manual calculation. Note that the original calculation, pc = frame->ra - 4 is buggy when the instruction at the return address happened to be a compressed inst. But since it is not a critical part of ftrace, it is ignored for now to ease the review process. Cc: Greentime Hu Signed-off-by: Alan Kao Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ftrace.c | 2 +- arch/riscv/kernel/stacktrace.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 48b5353691c3..1157b6b52d25 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -151,7 +151,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, return; err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); + frame_pointer, parent); if (err == -EBUSY) return; *parent = return_hooker; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 559aae781154..a4b1d94371a0 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_FRAME_POINTER @@ -63,7 +64,12 @@ static void notrace walk_stackframe(struct task_struct *task, frame = (struct stackframe *)fp - 1; sp = fp; fp = frame->fp; +#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR + pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + (unsigned long *)(fp - 8)); +#else pc = frame->ra - 0x4; +#endif } } From 8d235b174af5d0af35ff206c15041fc2b02a0993 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Tue, 27 Feb 2018 03:24:11 +0100 Subject: [PATCH 07/21] riscv/barrier: Define __smp_{store_release,load_acquire} Introduce __smp_{store_release,load_acquire}, and rely on the generic definitions for smp_{store_release,load_acquire}. This avoids the use of full ("rw,rw") fences on SMP. Signed-off-by: Andrea Parri Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 5510366d169a..d4628e4b3a5e 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,6 +38,21 @@ #define __smp_rmb() RISCV_FENCE(r,r) #define __smp_wmb() RISCV_FENCE(w,w) +#define __smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + RISCV_FENCE(rw,w); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + RISCV_FENCE(r,rw); \ + ___p1; \ +}) + /* * This is a very specific barrier: it's currently only used in two places in * the kernel, both in the scheduler. See include/linux/spinlock.h for the two From 0123f4d76ca63b7b895f40089be0ce4809e392d8 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Fri, 9 Mar 2018 13:13:20 +0100 Subject: [PATCH 08/21] riscv/spinlock: Strengthen implementations with fences Current implementations map locking operations using .rl and .aq annotations. However, this mapping is unsound w.r.t. the kernel memory consistency model (LKMM) [1]: Referring to the "unlock-lock-read-ordering" test reported below, Daniel wrote: "I think an RCpc interpretation of .aq and .rl would in fact allow the two normal loads in P1 to be reordered [...] The intuition would be that the amoswap.w.aq can forward from the amoswap.w.rl while that's still in the store buffer, and then the lw x3,0(x4) can also perform while the amoswap.w.rl is still in the store buffer, all before the l1 x1,0(x2) executes. That's not forbidden unless the amoswaps are RCsc, unless I'm missing something. Likewise even if the unlock()/lock() is between two stores. A control dependency might originate from the load part of the amoswap.w.aq, but there still would have to be something to ensure that this load part in fact performs after the store part of the amoswap.w.rl performs globally, and that's not automatic under RCpc." Simulation of the RISC-V memory consistency model confirmed this expectation. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the locking operations by replacing .rl and .aq with the use of ("lightweigth") fences, resp., "fence rw, w" and "fence r , rw". C unlock-lock-read-ordering {} /* s initially owned by P1 */ P0(int *x, int *y) { WRITE_ONCE(*x, 1); smp_wmb(); WRITE_ONCE(*y, 1); } P1(int *x, int *y, spinlock_t *s) { int r0; int r1; r0 = READ_ONCE(*y); spin_unlock(s); spin_lock(s); r1 = READ_ONCE(*x); } exists (1:r0=1 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Signed-off-by: Andrea Parri Cc: Palmer Dabbelt Cc: Albert Ou Cc: Daniel Lustig Cc: Alan Stern Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Nicholas Piggin Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: "Paul E. McKenney" Cc: Akira Yokosawa Cc: Ingo Molnar Cc: Linus Torvalds Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/fence.h | 12 ++++++++++++ arch/riscv/include/asm/spinlock.h | 29 +++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) create mode 100644 arch/riscv/include/asm/fence.h diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h new file mode 100644 index 000000000000..2b443a3a487f --- /dev/null +++ b/arch/riscv/include/asm/fence.h @@ -0,0 +1,12 @@ +#ifndef _ASM_RISCV_FENCE_H +#define _ASM_RISCV_FENCE_H + +#ifdef CONFIG_SMP +#define RISCV_ACQUIRE_BARRIER "\tfence r , rw\n" +#define RISCV_RELEASE_BARRIER "\tfence rw, w\n" +#else +#define RISCV_ACQUIRE_BARRIER +#define RISCV_RELEASE_BARRIER +#endif + +#endif /* _ASM_RISCV_FENCE_H */ diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index 2fd27e8ef1fd..8eb26d1ede81 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -17,6 +17,7 @@ #include #include +#include /* * Simple spin lock operations. These provide no fairness guarantees. @@ -28,10 +29,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { - __asm__ __volatile__ ( - "amoswap.w.rl x0, x0, %0" - : "=A" (lock->lock) - :: "memory"); + smp_store_release(&lock->lock, 0); } static inline int arch_spin_trylock(arch_spinlock_t *lock) @@ -39,7 +37,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) int tmp = 1, busy; __asm__ __volatile__ ( - "amoswap.w.aq %0, %2, %1" + " amoswap.w %0, %2, %1\n" + RISCV_ACQUIRE_BARRIER : "=r" (busy), "+A" (lock->lock) : "r" (tmp) : "memory"); @@ -68,8 +67,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) "1: lr.w %1, %0\n" " bltz %1, 1b\n" " addi %1, %1, 1\n" - " sc.w.aq %1, %1, %0\n" + " sc.w %1, %1, %0\n" " bnez %1, 1b\n" + RISCV_ACQUIRE_BARRIER : "+A" (lock->lock), "=&r" (tmp) :: "memory"); } @@ -82,8 +82,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) "1: lr.w %1, %0\n" " bnez %1, 1b\n" " li %1, -1\n" - " sc.w.aq %1, %1, %0\n" + " sc.w %1, %1, %0\n" " bnez %1, 1b\n" + RISCV_ACQUIRE_BARRIER : "+A" (lock->lock), "=&r" (tmp) :: "memory"); } @@ -96,8 +97,9 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) "1: lr.w %1, %0\n" " bltz %1, 1f\n" " addi %1, %1, 1\n" - " sc.w.aq %1, %1, %0\n" + " sc.w %1, %1, %0\n" " bnez %1, 1b\n" + RISCV_ACQUIRE_BARRIER "1:\n" : "+A" (lock->lock), "=&r" (busy) :: "memory"); @@ -113,8 +115,9 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) "1: lr.w %1, %0\n" " bnez %1, 1f\n" " li %1, -1\n" - " sc.w.aq %1, %1, %0\n" + " sc.w %1, %1, %0\n" " bnez %1, 1b\n" + RISCV_ACQUIRE_BARRIER "1:\n" : "+A" (lock->lock), "=&r" (busy) :: "memory"); @@ -125,7 +128,8 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) static inline void arch_read_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( - "amoadd.w.rl x0, %1, %0" + RISCV_RELEASE_BARRIER + " amoadd.w x0, %1, %0\n" : "+A" (lock->lock) : "r" (-1) : "memory"); @@ -133,10 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) static inline void arch_write_unlock(arch_rwlock_t *lock) { - __asm__ __volatile__ ( - "amoswap.w.rl x0, x0, %0" - : "=A" (lock->lock) - :: "memory"); + smp_store_release(&lock->lock, 0); } #endif /* _ASM_RISCV_SPINLOCK_H */ From 5ce6c1f3535fa8d2134468547377b7b737042834 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Fri, 9 Mar 2018 13:13:40 +0100 Subject: [PATCH 09/21] riscv/atomic: Strengthen implementations with fences Atomics present the same issue with locking: release and acquire variants need to be strengthened to meet the constraints defined by the Linux-kernel memory consistency model [1]. Atomics present a further issue: implementations of atomics such as atomic_cmpxchg() and atomic_add_unless() rely on LR/SC pairs, which do not give full-ordering with .aqrl; for example, current implementations allow the "lr-sc-aqrl-pair-vs-full-barrier" test below to end up with the state indicated in the "exists" clause. In order to "synchronize" LKMM and RISC-V's implementation, this commit strengthens the implementations of the atomics operations by replacing .rl and .aq with the use of ("lightweigth") fences, and by replacing .aqrl LR/SC pairs in sequences such as: 0: lr.w.aqrl %0, %addr bne %0, %old, 1f ... sc.w.aqrl %1, %new, %addr bnez %1, 0b 1: with sequences of the form: 0: lr.w %0, %addr bne %0, %old, 1f ... sc.w.rl %1, %new, %addr /* SC-release */ bnez %1, 0b fence rw, rw /* "full" fence */ 1: following Daniel's suggestion. These modifications were validated with simulation of the RISC-V memory consistency model. C lr-sc-aqrl-pair-vs-full-barrier {} P0(int *x, int *y, atomic_t *u) { int r0; int r1; WRITE_ONCE(*x, 1); r0 = atomic_cmpxchg(u, 0, 1); r1 = READ_ONCE(*y); } P1(int *x, int *y, atomic_t *v) { int r0; int r1; WRITE_ONCE(*y, 1); r0 = atomic_cmpxchg(v, 0, 1); r1 = READ_ONCE(*x); } exists (u=1 /\ v=1 /\ 0:r1=0 /\ 1:r1=0) [1] https://marc.info/?l=linux-kernel&m=151930201102853&w=2 https://groups.google.com/a/groups.riscv.org/forum/#!topic/isa-dev/hKywNHBkAXM https://marc.info/?l=linux-kernel&m=151633436614259&w=2 Suggested-by: Daniel Lustig Signed-off-by: Andrea Parri Cc: Palmer Dabbelt Cc: Albert Ou Cc: Daniel Lustig Cc: Alan Stern Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Nicholas Piggin Cc: David Howells Cc: Jade Alglave Cc: Luc Maranget Cc: "Paul E. McKenney" Cc: Akira Yokosawa Cc: Ingo Molnar Cc: Linus Torvalds Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/atomic.h | 417 ++++++++++++++++++++----------- arch/riscv/include/asm/cmpxchg.h | 387 +++++++++++++++++++++++----- 2 files changed, 586 insertions(+), 218 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index e65d1cd89e28..855115ace98c 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -24,6 +24,20 @@ #include #define ATOMIC_INIT(i) { (i) } + +#define __atomic_op_acquire(op, args...) \ +({ \ + typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ + __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory"); \ + __ret; \ +}) + +#define __atomic_op_release(op, args...) \ +({ \ + __asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory"); \ + op##_relaxed(args); \ +}) + static __always_inline int atomic_read(const atomic_t *v) { return READ_ONCE(v->counter); @@ -50,22 +64,23 @@ static __always_inline void atomic64_set(atomic64_t *v, long i) * have the AQ or RL bits set. These don't return anything, so there's only * one version to worry about. */ -#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ -static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ -{ \ - __asm__ __volatile__ ( \ - "amo" #asm_op "." #asm_type " zero, %1, %0" \ - : "+A" (v->counter) \ - : "r" (I) \ - : "memory"); \ -} +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ +static __always_inline \ +void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + __asm__ __volatile__ ( \ + " amo" #asm_op "." #asm_type " zero, %1, %0" \ + : "+A" (v->counter) \ + : "r" (I) \ + : "memory"); \ +} \ #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, I) \ +#define ATOMIC_OPS(op, asm_op, I) \ ATOMIC_OP (op, asm_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_OP (op, asm_op, I, w, int, ) \ +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) \ ATOMIC_OP (op, asm_op, I, d, long, 64) #endif @@ -79,75 +94,115 @@ ATOMIC_OPS(xor, xor, i) #undef ATOMIC_OPS /* - * Atomic ops that have ordered, relaxed, acquire, and relese variants. + * Atomic ops that have ordered, relaxed, acquire, and release variants. * There's two flavors of these: the arithmatic ops have both fetch and return * versions, while the logical ops only have fetch versions. */ -#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \ -static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ -{ \ - register c_type ret; \ - __asm__ __volatile__ ( \ - "amo" #asm_op "." #asm_type #asm_or " %1, %2, %0" \ - : "+A" (v->counter), "=r" (ret) \ - : "r" (I) \ - : "memory"); \ - return ret; \ +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix) \ +static __always_inline \ +c_type atomic##prefix##_fetch_##op##_relaxed(c_type i, \ + atomic##prefix##_t *v) \ +{ \ + register c_type ret; \ + __asm__ __volatile__ ( \ + " amo" #asm_op "." #asm_type " %1, %2, %0" \ + : "+A" (v->counter), "=r" (ret) \ + : "r" (I) \ + : "memory"); \ + return ret; \ +} \ +static __always_inline \ +c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + register c_type ret; \ + __asm__ __volatile__ ( \ + " amo" #asm_op "." #asm_type ".aqrl %1, %2, %0" \ + : "+A" (v->counter), "=r" (ret) \ + : "r" (I) \ + : "memory"); \ + return ret; \ } -#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ -static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_fetch_##op##c_or(i, v) c_op I; \ +#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix) \ +static __always_inline \ +c_type atomic##prefix##_##op##_return_relaxed(c_type i, \ + atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I; \ +} \ +static __always_inline \ +c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##op(i, v) c_op I; \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \ + ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64) #endif -ATOMIC_OPS(add, add, +, i, , _relaxed) -ATOMIC_OPS(add, add, +, i, .aq , _acquire) -ATOMIC_OPS(add, add, +, i, .rl , _release) -ATOMIC_OPS(add, add, +, i, .aqrl, ) +ATOMIC_OPS(add, add, +, i) +ATOMIC_OPS(sub, add, +, -i) -ATOMIC_OPS(sub, add, +, -i, , _relaxed) -ATOMIC_OPS(sub, add, +, -i, .aq , _acquire) -ATOMIC_OPS(sub, add, +, -i, .rl , _release) -ATOMIC_OPS(sub, add, +, -i, .aqrl, ) +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_add_return atomic_add_return +#define atomic_sub_return atomic_sub_return + +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed +#define atomic_fetch_add atomic_fetch_add +#define atomic_fetch_sub atomic_fetch_sub + +#ifndef CONFIG_GENERIC_ATOMIC64 +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_add_return atomic64_add_return +#define atomic64_sub_return atomic64_sub_return + +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed +#define atomic64_fetch_add atomic64_fetch_add +#define atomic64_fetch_sub atomic64_fetch_sub +#endif #undef ATOMIC_OPS #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \ + ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64) #endif -ATOMIC_OPS(and, and, i, , _relaxed) -ATOMIC_OPS(and, and, i, .aq , _acquire) -ATOMIC_OPS(and, and, i, .rl , _release) -ATOMIC_OPS(and, and, i, .aqrl, ) +ATOMIC_OPS(and, and, i) +ATOMIC_OPS( or, or, i) +ATOMIC_OPS(xor, xor, i) -ATOMIC_OPS( or, or, i, , _relaxed) -ATOMIC_OPS( or, or, i, .aq , _acquire) -ATOMIC_OPS( or, or, i, .rl , _release) -ATOMIC_OPS( or, or, i, .aqrl, ) +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed +#define atomic_fetch_and atomic_fetch_and +#define atomic_fetch_or atomic_fetch_or +#define atomic_fetch_xor atomic_fetch_xor -ATOMIC_OPS(xor, xor, i, , _relaxed) -ATOMIC_OPS(xor, xor, i, .aq , _acquire) -ATOMIC_OPS(xor, xor, i, .rl , _release) -ATOMIC_OPS(xor, xor, i, .aqrl, ) +#ifndef CONFIG_GENERIC_ATOMIC64 +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed +#define atomic64_fetch_and atomic64_fetch_and +#define atomic64_fetch_or atomic64_fetch_or +#define atomic64_fetch_xor atomic64_fetch_xor +#endif #undef ATOMIC_OPS @@ -157,22 +212,24 @@ ATOMIC_OPS(xor, xor, i, .aqrl, ) /* * The extra atomic operations that are constructed from one of the core * AMO-based operations above (aside from sub, which is easier to fit above). - * These are required to perform a barrier, but they're OK this way because - * atomic_*_return is also required to perform a barrier. + * These are required to perform a full barrier, but they're OK this way + * because atomic_*_return is also required to perform a full barrier. + * */ -#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix) \ -static __always_inline bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_##func_op##_return(i, v) comp_op I; \ +#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix) \ +static __always_inline \ +bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_##func_op##_return(i, v) comp_op I; \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, func_op, comp_op, I) \ - ATOMIC_OP (op, func_op, comp_op, I, int, ) +#define ATOMIC_OPS(op, func_op, comp_op, I) \ + ATOMIC_OP(op, func_op, comp_op, I, int, ) #else -#define ATOMIC_OPS(op, func_op, comp_op, I) \ - ATOMIC_OP (op, func_op, comp_op, I, int, ) \ - ATOMIC_OP (op, func_op, comp_op, I, long, 64) +#define ATOMIC_OPS(op, func_op, comp_op, I) \ + ATOMIC_OP(op, func_op, comp_op, I, int, ) \ + ATOMIC_OP(op, func_op, comp_op, I, long, 64) #endif ATOMIC_OPS(add_and_test, add, ==, 0) @@ -182,51 +239,87 @@ ATOMIC_OPS(add_negative, add, <, 0) #undef ATOMIC_OP #undef ATOMIC_OPS -#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ -static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ -{ \ - atomic##prefix##_##func_op(I, v); \ +#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ +static __always_inline \ +void atomic##prefix##_##op(atomic##prefix##_t *v) \ +{ \ + atomic##prefix##_##func_op(I, v); \ } -#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ -static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_fetch_##func_op(I, v); \ +#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ +static __always_inline \ +c_type atomic##prefix##_fetch_##op##_relaxed(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##func_op##_relaxed(I, v); \ +} \ +static __always_inline \ +c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##func_op(I, v); \ } -#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix) \ -static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_fetch_##op(v) c_op I; \ +#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix) \ +static __always_inline \ +c_type atomic##prefix##_##op##_return_relaxed(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##op##_relaxed(v) c_op I; \ +} \ +static __always_inline \ +c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_fetch_##op(v) c_op I; \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_OP( op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP( op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ - ATOMIC_OP (op, asm_op, I, long, 64) \ - ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \ +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_OP( op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP( op, asm_op, I, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ + ATOMIC_OP( op, asm_op, I, long, 64) \ + ATOMIC_FETCH_OP( op, asm_op, I, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) #endif ATOMIC_OPS(inc, add, +, 1) ATOMIC_OPS(dec, add, +, -1) +#define atomic_inc_return_relaxed atomic_inc_return_relaxed +#define atomic_dec_return_relaxed atomic_dec_return_relaxed +#define atomic_inc_return atomic_inc_return +#define atomic_dec_return atomic_dec_return + +#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed +#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed +#define atomic_fetch_inc atomic_fetch_inc +#define atomic_fetch_dec atomic_fetch_dec + +#ifndef CONFIG_GENERIC_ATOMIC64 +#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed +#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed +#define atomic64_inc_return atomic64_inc_return +#define atomic64_dec_return atomic64_dec_return + +#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed +#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed +#define atomic64_fetch_inc atomic64_fetch_inc +#define atomic64_fetch_dec atomic64_fetch_dec +#endif + #undef ATOMIC_OPS #undef ATOMIC_OP #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN -#define ATOMIC_OP(op, func_op, comp_op, I, prefix) \ -static __always_inline bool atomic##prefix##_##op(atomic##prefix##_t *v) \ -{ \ - return atomic##prefix##_##func_op##_return(v) comp_op I; \ +#define ATOMIC_OP(op, func_op, comp_op, I, prefix) \ +static __always_inline \ +bool atomic##prefix##_##op(atomic##prefix##_t *v) \ +{ \ + return atomic##prefix##_##func_op##_return(v) comp_op I; \ } ATOMIC_OP(inc_and_test, inc, ==, 0, ) @@ -238,19 +331,19 @@ ATOMIC_OP(dec_and_test, dec, ==, 0, 64) #undef ATOMIC_OP -/* This is required to provide a barrier on success. */ +/* This is required to provide a full barrier on success. */ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) { int prev, rc; __asm__ __volatile__ ( - "0:\n\t" - "lr.w.aqrl %[p], %[c]\n\t" - "beq %[p], %[u], 1f\n\t" - "add %[rc], %[p], %[a]\n\t" - "sc.w.aqrl %[rc], %[rc], %[c]\n\t" - "bnez %[rc], 0b\n\t" - "1:" + "0: lr.w %[p], %[c]\n" + " beq %[p], %[u], 1f\n" + " add %[rc], %[p], %[a]\n" + " sc.w.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [a]"r" (a), [u]"r" (u) : "memory"); @@ -263,13 +356,13 @@ static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u) long prev, rc; __asm__ __volatile__ ( - "0:\n\t" - "lr.d.aqrl %[p], %[c]\n\t" - "beq %[p], %[u], 1f\n\t" - "add %[rc], %[p], %[a]\n\t" - "sc.d.aqrl %[rc], %[rc], %[c]\n\t" - "bnez %[rc], 0b\n\t" - "1:" + "0: lr.d %[p], %[c]\n" + " beq %[p], %[u], 1f\n" + " add %[rc], %[p], %[a]\n" + " sc.d.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [a]"r" (a), [u]"r" (u) : "memory"); @@ -300,37 +393,63 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v) /* * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as - * {cmp,}xchg and the operations that return, so they need a barrier. + * {cmp,}xchg and the operations that return, so they need a full barrier. */ -/* - * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by - * assigning the same barrier to both the LR and SC operations, but that might - * not make any sense. We're waiting on a memory model specification to - * determine exactly what the right thing to do is here. - */ -#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ -static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ -{ \ - return __cmpxchg(&(v->counter), o, n, size, asm_or, asm_or); \ -} \ -static __always_inline c_t atomic##prefix##_xchg##c_or(atomic##prefix##_t *v, c_t n) \ -{ \ - return __xchg(n, &(v->counter), size, asm_or); \ +#define ATOMIC_OP(c_t, prefix, size) \ +static __always_inline \ +c_t atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \ +{ \ + return __xchg_relaxed(&(v->counter), n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \ +{ \ + return __xchg_acquire(&(v->counter), n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \ +{ \ + return __xchg_release(&(v->counter), n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \ +{ \ + return __xchg(&(v->counter), n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \ + c_t o, c_t n) \ +{ \ + return __cmpxchg_relaxed(&(v->counter), o, n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \ + c_t o, c_t n) \ +{ \ + return __cmpxchg_acquire(&(v->counter), o, n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \ + c_t o, c_t n) \ +{ \ + return __cmpxchg_release(&(v->counter), o, n, size); \ +} \ +static __always_inline \ +c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \ +{ \ + return __cmpxchg(&(v->counter), o, n, size); \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(c_or, asm_or) \ - ATOMIC_OP( int, , c_or, 4, asm_or) +#define ATOMIC_OPS() \ + ATOMIC_OP( int, , 4) #else -#define ATOMIC_OPS(c_or, asm_or) \ - ATOMIC_OP( int, , c_or, 4, asm_or) \ - ATOMIC_OP(long, 64, c_or, 8, asm_or) +#define ATOMIC_OPS() \ + ATOMIC_OP( int, , 4) \ + ATOMIC_OP(long, 64, 8) #endif -ATOMIC_OPS( , .aqrl) -ATOMIC_OPS(_acquire, .aq) -ATOMIC_OPS(_release, .rl) -ATOMIC_OPS(_relaxed, ) +ATOMIC_OPS() #undef ATOMIC_OPS #undef ATOMIC_OP @@ -340,13 +459,13 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset) int prev, rc; __asm__ __volatile__ ( - "0:\n\t" - "lr.w.aqrl %[p], %[c]\n\t" - "sub %[rc], %[p], %[o]\n\t" - "bltz %[rc], 1f\n\t" - "sc.w.aqrl %[rc], %[rc], %[c]\n\t" - "bnez %[rc], 0b\n\t" - "1:" + "0: lr.w %[p], %[c]\n" + " sub %[rc], %[p], %[o]\n" + " bltz %[rc], 1f\n" + " sc.w.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [o]"r" (offset) : "memory"); @@ -361,13 +480,13 @@ static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset) long prev, rc; __asm__ __volatile__ ( - "0:\n\t" - "lr.d.aqrl %[p], %[c]\n\t" - "sub %[rc], %[p], %[o]\n\t" - "bltz %[rc], 1f\n\t" - "sc.d.aqrl %[rc], %[rc], %[c]\n\t" - "bnez %[rc], 0b\n\t" - "1:" + "0: lr.d %[p], %[c]\n" + " sub %[rc], %[p], %[o]\n" + " bltz %[rc], 1f\n" + " sc.d.rl %[rc], %[rc], %[c]\n" + " bnez %[rc], 0b\n" + " fence rw, rw\n" + "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [o]"r" (offset) : "memory"); diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index db249dbc7b97..c12833f7b6bd 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -17,45 +17,153 @@ #include #include +#include -#define __xchg(new, ptr, size, asm_or) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - "amoswap.w" #asm_or " %0, %2, %1" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - "amoswap.d" #asm_or " %0, %2, %1" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ +#define __xchg_relaxed(ptr, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(new) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + " amoswap.w %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + " amoswap.d %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ }) -#define xchg(ptr, x) (__xchg((x), (ptr), sizeof(*(ptr)), .aqrl)) - -#define xchg32(ptr, x) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - xchg((ptr), (x)); \ +#define xchg_relaxed(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ + _x_, sizeof(*(ptr))); \ }) -#define xchg64(ptr, x) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - xchg((ptr), (x)); \ +#define __xchg_acquire(ptr, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(new) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + " amoswap.w %0, %2, %1\n" \ + RISCV_ACQUIRE_BARRIER \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + " amoswap.d %0, %2, %1\n" \ + RISCV_ACQUIRE_BARRIER \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define xchg_acquire(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_acquire((ptr), \ + _x_, sizeof(*(ptr))); \ +}) + +#define __xchg_release(ptr, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(new) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + RISCV_RELEASE_BARRIER \ + " amoswap.w %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + RISCV_RELEASE_BARRIER \ + " amoswap.d %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define xchg_release(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg_release((ptr), \ + _x_, sizeof(*(ptr))); \ +}) + +#define __xchg(ptr, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(new) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + " amoswap.w.aqrl %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + " amoswap.d.aqrl %0, %2, %1\n" \ + : "=r" (__ret), "+A" (*__ptr) \ + : "r" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr))); \ +}) + +#define xchg32(ptr, x) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ + xchg((ptr), (x)); \ +}) + +#define xchg64(ptr, x) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + xchg((ptr), (x)); \ }) /* @@ -63,7 +171,7 @@ * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ -#define __cmpxchg(ptr, old, new, size, lrb, scb) \ +#define __cmpxchg_relaxed(ptr, old, new, size) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(ptr)) __old = (old); \ @@ -73,24 +181,22 @@ switch (size) { \ case 4: \ __asm__ __volatile__ ( \ - "0:" \ - "lr.w" #scb " %0, %2\n" \ - "bne %0, %z3, 1f\n" \ - "sc.w" #lrb " %1, %z4, %2\n" \ - "bnez %1, 0b\n" \ - "1:" \ + "0: lr.w %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.w %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "rJ" (__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ __asm__ __volatile__ ( \ - "0:" \ - "lr.d" #scb " %0, %2\n" \ - "bne %0, %z3, 1f\n" \ - "sc.d" #lrb " %1, %z4, %2\n" \ - "bnez %1, 0b\n" \ - "1:" \ + "0: lr.d %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.d %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "rJ" (__old), "rJ" (__new) \ : "memory"); \ @@ -101,34 +207,177 @@ __ret; \ }) -#define cmpxchg(ptr, o, n) \ - (__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), .aqrl, .aqrl)) - -#define cmpxchg_local(ptr, o, n) \ - (__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), , )) - -#define cmpxchg32(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - cmpxchg((ptr), (o), (n)); \ +#define cmpxchg_relaxed(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \ + _o_, _n_, sizeof(*(ptr))); \ }) -#define cmpxchg32_local(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - cmpxchg_local((ptr), (o), (n)); \ +#define __cmpxchg_acquire(ptr, old, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + register unsigned int __rc; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + "0: lr.w %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.w %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + RISCV_ACQUIRE_BARRIER \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + "0: lr.d %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.d %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + RISCV_ACQUIRE_BARRIER \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ }) -#define cmpxchg64(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg((ptr), (o), (n)); \ +#define cmpxchg_acquire(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \ + _o_, _n_, sizeof(*(ptr))); \ }) -#define cmpxchg64_local(ptr, o, n) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg_local((ptr), (o), (n)); \ +#define __cmpxchg_release(ptr, old, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + register unsigned int __rc; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + RISCV_RELEASE_BARRIER \ + "0: lr.w %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.w %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + RISCV_RELEASE_BARRIER \ + "0: lr.d %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.d %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define cmpxchg_release(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg_release((ptr), \ + _o_, _n_, sizeof(*(ptr))); \ +}) + +#define __cmpxchg(ptr, old, new, size) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + __typeof__(*(ptr)) __ret; \ + register unsigned int __rc; \ + switch (size) { \ + case 4: \ + __asm__ __volatile__ ( \ + "0: lr.w %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.w.rl %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__ ( \ + "0: lr.d %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc.d.rl %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ + : "rJ" (__old), "rJ" (__new) \ + : "memory"); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + __ret; \ +}) + +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), \ + _o_, _n_, sizeof(*(ptr))); \ +}) + +#define cmpxchg_local(ptr, o, n) \ + (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) + +#define cmpxchg32(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ + cmpxchg((ptr), (o), (n)); \ +}) + +#define cmpxchg32_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ + cmpxchg_relaxed((ptr), (o), (n)) \ +}) + +#define cmpxchg64(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ +}) + +#define cmpxchg64_local(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_relaxed((ptr), (o), (n)); \ }) #endif /* _ASM_RISCV_CMPXCHG_H */ From ab1ef68e54019937cf859f2c86c9ead6f3e62f19 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:41 +0800 Subject: [PATCH 10/21] RISC-V: Add sections of PLT and GOT for kernel module The address of external symbols will locate more than 32-bit offset in 64-bit kernel with sv39 or sv48 virtual addressing. Module loader emits the GOT and PLT entries for data symbols and function symbols respectively. The PLT entry is a trampoline code for jumping to the 64-bit real address. The GOT entry is just the data symbol address. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 5 + arch/riscv/Makefile | 5 + arch/riscv/include/asm/module.h | 103 +++++++++++++++++++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/module-sections.c | 139 ++++++++++++++++++++++++++++ arch/riscv/kernel/module.lds | 7 ++ 6 files changed, 260 insertions(+) create mode 100644 arch/riscv/include/asm/module.h create mode 100644 arch/riscv/kernel/module-sections.c create mode 100644 arch/riscv/kernel/module.lds diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 04807c7f64cc..90ff52059794 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -131,6 +131,10 @@ choice bool "medium any code model" endchoice +config MODULE_SECTIONS + bool + select HAVE_MOD_ARCH_SPECIFIC + choice prompt "Maximum Physical Memory" default MAXPHYSMEM_2GB if 32BIT @@ -141,6 +145,7 @@ choice bool "2GiB" config MAXPHYSMEM_128GB depends on 64BIT && CMODEL_MEDANY + select MODULE_SECTIONS if MODULES bool "128GiB" endchoice diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6719dd30ec5b..c72d408c05c0 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -56,6 +56,11 @@ endif ifeq ($(CONFIG_CMODEL_MEDANY),y) KBUILD_CFLAGS += -mcmodel=medany endif +ifeq ($(CONFIG_MODULE_SECTIONS),y) + KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/riscv/kernel/module.lds +endif + +KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h new file mode 100644 index 000000000000..e61d73f82d4d --- /dev/null +++ b/arch/riscv/include/asm/module.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +#ifndef _ASM_RISCV_MODULE_H +#define _ASM_RISCV_MODULE_H + +#include + +#define MODULE_ARCH_VERMAGIC "riscv" + +u64 module_emit_got_entry(struct module *mod, u64 val); +u64 module_emit_plt_entry(struct module *mod, u64 val); + +#ifdef CONFIG_MODULE_SECTIONS +struct mod_section { + struct elf64_shdr *shdr; + int num_entries; + int max_entries; +}; + +struct mod_arch_specific { + struct mod_section got; + struct mod_section plt; +}; + +struct got_entry { + u64 symbol_addr; /* the real variable address */ +}; + +static inline struct got_entry emit_got_entry(u64 val) +{ + return (struct got_entry) {val}; +} + +static inline struct got_entry *get_got_entry(u64 val, + const struct mod_section *sec) +{ + struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr; + int i; + for (i = 0; i < sec->num_entries; i++) { + if (got[i].symbol_addr == val) + return &got[i]; + } + return NULL; +} + +struct plt_entry { + /* + * Trampoline code to real target address. The return address + * should be the original (pc+4) before entring plt entry. + * For 8 byte alignment of symbol_addr, + * don't pack structure to remove the padding. + */ + u32 insn_auipc; /* auipc t0, 0x0 */ + u32 insn_ld; /* ld t1, 0x10(t0) */ + u32 insn_jr; /* jr t1 */ + u64 symbol_addr; /* the real jump target address */ +}; + +#define OPC_AUIPC 0x0017 +#define OPC_LD 0x3003 +#define OPC_JALR 0x0067 +#define REG_T0 0x5 +#define REG_T1 0x6 +#define IMM_OFFSET 0x10 + +static inline struct plt_entry emit_plt_entry(u64 val) +{ + /* + * U-Type encoding: + * +------------+----------+----------+ + * | imm[31:12] | rd[11:7] | opc[6:0] | + * +------------+----------+----------+ + * + * I-Type encoding: + * +------------+------------+--------+----------+----------+ + * | imm[31:20] | rs1[19:15] | funct3 | rd[11:7] | opc[6:0] | + * +------------+------------+--------+----------+----------+ + * + */ + return (struct plt_entry) { + OPC_AUIPC | (REG_T0 << 7), + OPC_LD | (IMM_OFFSET << 20) | (REG_T0 << 15) | (REG_T1 << 7), + OPC_JALR | (REG_T1 << 15), + val + }; +} + +static inline struct plt_entry *get_plt_entry(u64 val, + const struct mod_section *sec) +{ + struct plt_entry *plt = (struct plt_entry *)sec->shdr->sh_addr; + int i; + for (i = 0; i < sec->num_entries; i++) { + if (plt[i].symbol_addr == val) + return &plt[i]; + } + return NULL; +} + +#endif /* CONFIG_MODULE_SECTIONS */ + +#endif /* _ASM_RISCV_MODULE_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 196f62ffc428..d355e3c18278 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -34,6 +34,7 @@ CFLAGS_setup.o := -mcmodel=medany obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c new file mode 100644 index 000000000000..94ba1551eac3 --- /dev/null +++ b/arch/riscv/kernel/module-sections.c @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2014-2017 Linaro Ltd. + * + * Copyright (C) 2018 Andes Technology Corporation + */ + +#include +#include +#include + +u64 module_emit_got_entry(struct module *mod, u64 val) +{ + struct mod_section *got_sec = &mod->arch.got; + int i = got_sec->num_entries; + struct got_entry *got = get_got_entry(val, got_sec); + + if (got) + return (u64)got; + + /* There is no duplicate entry, create a new one */ + got = (struct got_entry *)got_sec->shdr->sh_addr; + got[i] = emit_got_entry(val); + + got_sec->num_entries++; + BUG_ON(got_sec->num_entries > got_sec->max_entries); + + return (u64)&got[i]; +} + +u64 module_emit_plt_entry(struct module *mod, u64 val) +{ + struct mod_section *plt_sec = &mod->arch.plt; + struct plt_entry *plt = get_plt_entry(val, plt_sec); + int i = plt_sec->num_entries; + + if (plt) + return (u64)plt; + + /* There is no duplicate entry, create a new one */ + plt = (struct plt_entry *)plt_sec->shdr->sh_addr; + plt[i] = emit_plt_entry(val); + + plt_sec->num_entries++; + BUG_ON(plt_sec->num_entries > plt_sec->max_entries); + + return (u64)&plt[i]; +} + +static int is_rela_equal(const Elf64_Rela *x, const Elf64_Rela *y) +{ + return x->r_info == y->r_info && x->r_addend == y->r_addend; +} + +static bool duplicate_rela(const Elf64_Rela *rela, int idx) +{ + int i; + for (i = 0; i < idx; i++) { + if (is_rela_equal(&rela[i], &rela[idx])) + return true; + } + return false; +} + +static void count_max_entries(Elf64_Rela *relas, int num, + unsigned int *plts, unsigned int *gots) +{ + unsigned int type, i; + + for (i = 0; i < num; i++) { + type = ELF64_R_TYPE(relas[i].r_info); + if (type == R_RISCV_CALL_PLT) { + if (!duplicate_rela(relas, i)) + (*plts)++; + } else if (type == R_RISCV_GOT_HI20) { + if (!duplicate_rela(relas, i)) + (*gots)++; + } + } +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned int num_plts = 0; + unsigned int num_gots = 0; + int i; + + /* + * Find the empty .got and .plt sections. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + mod->arch.plt.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) + mod->arch.got.shdr = sechdrs + i; + } + + if (!mod->arch.plt.shdr) { + pr_err("%s: module PLT section(s) missing\n", mod->name); + return -ENOEXEC; + } + if (!mod->arch.got.shdr) { + pr_err("%s: module GOT section(s) missing\n", mod->name); + return -ENOEXEC; + } + + /* Calculate the maxinum number of entries */ + for (i = 0; i < ehdr->e_shnum; i++) { + Elf64_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset; + int num_rela = sechdrs[i].sh_size / sizeof(Elf64_Rela); + Elf64_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info; + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + + /* ignore relocations that operate on non-exec sections */ + if (!(dst_sec->sh_flags & SHF_EXECINSTR)) + continue; + + count_max_entries(relas, num_rela, &num_plts, &num_gots); + } + + mod->arch.plt.shdr->sh_type = SHT_NOBITS; + mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry); + mod->arch.plt.num_entries = 0; + mod->arch.plt.max_entries = num_plts; + + mod->arch.got.shdr->sh_type = SHT_NOBITS; + mod->arch.got.shdr->sh_flags = SHF_ALLOC; + mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + mod->arch.got.num_entries = 0; + mod->arch.got.max_entries = num_gots; + + return 0; +} diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/kernel/module.lds new file mode 100644 index 000000000000..7ef580e62883 --- /dev/null +++ b/arch/riscv/kernel/module.lds @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +SECTIONS { + .plt (NOLOAD) : { BYTE(0) } + .got (NOLOAD) : { BYTE(0) } +} From b8bde0ef12bd43f013d879973a1900930bfb95ee Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:42 +0800 Subject: [PATCH 11/21] RISC-V: Add section of GOT.PLT for kernel module Separate the function symbol address from .plt to .got.plt section. The original plt entry has trampoline code with symbol address, there is a 32-bit padding bwtween jar instruction and symbol address. Extract the symbol address to .got.plt to reduce the module size. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/module.h | 40 ++++++++++++++++++----------- arch/riscv/kernel/module-sections.c | 21 +++++++++++++-- arch/riscv/kernel/module.lds | 1 + 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h index e61d73f82d4d..349df33808c4 100644 --- a/arch/riscv/include/asm/module.h +++ b/arch/riscv/include/asm/module.h @@ -21,6 +21,7 @@ struct mod_section { struct mod_arch_specific { struct mod_section got; struct mod_section plt; + struct mod_section got_plt; }; struct got_entry { @@ -48,13 +49,10 @@ struct plt_entry { /* * Trampoline code to real target address. The return address * should be the original (pc+4) before entring plt entry. - * For 8 byte alignment of symbol_addr, - * don't pack structure to remove the padding. */ u32 insn_auipc; /* auipc t0, 0x0 */ u32 insn_ld; /* ld t1, 0x10(t0) */ u32 insn_jr; /* jr t1 */ - u64 symbol_addr; /* the real jump target address */ }; #define OPC_AUIPC 0x0017 @@ -62,9 +60,8 @@ struct plt_entry { #define OPC_JALR 0x0067 #define REG_T0 0x5 #define REG_T1 0x6 -#define IMM_OFFSET 0x10 -static inline struct plt_entry emit_plt_entry(u64 val) +static inline struct plt_entry emit_plt_entry(u64 val, u64 plt, u64 got_plt) { /* * U-Type encoding: @@ -78,24 +75,37 @@ static inline struct plt_entry emit_plt_entry(u64 val) * +------------+------------+--------+----------+----------+ * */ + u64 offset = got_plt - plt; + u32 hi20 = (offset + 0x800) & 0xfffff000; + u32 lo12 = (offset - hi20); return (struct plt_entry) { - OPC_AUIPC | (REG_T0 << 7), - OPC_LD | (IMM_OFFSET << 20) | (REG_T0 << 15) | (REG_T1 << 7), - OPC_JALR | (REG_T1 << 15), - val + OPC_AUIPC | (REG_T0 << 7) | hi20, + OPC_LD | (lo12 << 20) | (REG_T0 << 15) | (REG_T1 << 7), + OPC_JALR | (REG_T1 << 15) }; } -static inline struct plt_entry *get_plt_entry(u64 val, - const struct mod_section *sec) +static inline int get_got_plt_idx(u64 val, const struct mod_section *sec) { - struct plt_entry *plt = (struct plt_entry *)sec->shdr->sh_addr; + struct got_entry *got_plt = (struct got_entry *)sec->shdr->sh_addr; int i; for (i = 0; i < sec->num_entries; i++) { - if (plt[i].symbol_addr == val) - return &plt[i]; + if (got_plt[i].symbol_addr == val) + return i; } - return NULL; + return -1; +} + +static inline struct plt_entry *get_plt_entry(u64 val, + const struct mod_section *sec_plt, + const struct mod_section *sec_got_plt) +{ + struct plt_entry *plt = (struct plt_entry *)sec_plt->shdr->sh_addr; + int got_plt_idx = get_got_plt_idx(val, sec_got_plt); + if (got_plt_idx >= 0) + return plt + got_plt_idx; + else + return NULL; } #endif /* CONFIG_MODULE_SECTIONS */ diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index 94ba1551eac3..bbbd26e19bfd 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -30,18 +30,23 @@ u64 module_emit_got_entry(struct module *mod, u64 val) u64 module_emit_plt_entry(struct module *mod, u64 val) { + struct mod_section *got_plt_sec = &mod->arch.got_plt; + struct got_entry *got_plt; struct mod_section *plt_sec = &mod->arch.plt; - struct plt_entry *plt = get_plt_entry(val, plt_sec); + struct plt_entry *plt = get_plt_entry(val, plt_sec, got_plt_sec); int i = plt_sec->num_entries; if (plt) return (u64)plt; /* There is no duplicate entry, create a new one */ + got_plt = (struct got_entry *)got_plt_sec->shdr->sh_addr; + got_plt[i] = emit_got_entry(val); plt = (struct plt_entry *)plt_sec->shdr->sh_addr; - plt[i] = emit_plt_entry(val); + plt[i] = emit_plt_entry(val, (u64)&plt[i], (u64)&got_plt[i]); plt_sec->num_entries++; + got_plt_sec->num_entries++; BUG_ON(plt_sec->num_entries > plt_sec->max_entries); return (u64)&plt[i]; @@ -94,6 +99,8 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.plt.shdr = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) mod->arch.got.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got.plt")) + mod->arch.got_plt.shdr = sechdrs + i; } if (!mod->arch.plt.shdr) { @@ -104,6 +111,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, pr_err("%s: module GOT section(s) missing\n", mod->name); return -ENOEXEC; } + if (!mod->arch.got_plt.shdr) { + pr_err("%s: module GOT.PLT section(s) missing\n", mod->name); + return -ENOEXEC; + } /* Calculate the maxinum number of entries */ for (i = 0; i < ehdr->e_shnum; i++) { @@ -135,5 +146,11 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.got.num_entries = 0; mod->arch.got.max_entries = num_gots; + mod->arch.got_plt.shdr->sh_type = SHT_NOBITS; + mod->arch.got_plt.shdr->sh_flags = SHF_ALLOC; + mod->arch.got_plt.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got_plt.shdr->sh_size = (num_plts + 1) * sizeof(struct got_entry); + mod->arch.got_plt.num_entries = 0; + mod->arch.got_plt.max_entries = num_plts; return 0; } diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/kernel/module.lds index 7ef580e62883..295ecfb341a2 100644 --- a/arch/riscv/kernel/module.lds +++ b/arch/riscv/kernel/module.lds @@ -4,4 +4,5 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } .got (NOLOAD) : { BYTE(0) } + .got.plt (NOLOAD) : { BYTE(0) } } From da975dd4818cf42a181910789c096eb6997ed663 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:43 +0800 Subject: [PATCH 12/21] RISC-V: Support GOT_HI20/CALL_PLT relocation type in kernel module For CALL_PLT, emit the plt entry only when offset is more than 32-bit. For PCREL_LO12, it uses the location of corresponding HI20 to get the address of external symbol. It should check the HI20 type is the PCREL_HI20 or GOT_HI20, because sometime the location will have two or more relocation types. For example: 0: 00000797 auipc a5,0x0 0: R_RISCV_ALIGN *ABS* 0: R_RISCV_GOT_HI20 SYMBOL 4: 0007b783 ld a5,0(a5) # 0 4: R_RISCV_PCREL_LO12_I .L0 4: R_RISCV_RELAX *ABS* Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 62 ++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index e0f05034fc21..be717bd7cea7 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -92,6 +92,28 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, return 0; } +static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + s64 offset = (void *)v - (void *)location; + s32 hi20; + + /* Always emit the got entry */ + if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { + offset = module_emit_got_entry(me, v); + offset = (void *)offset - (void *)location; + } else { + pr_err( + "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", + me->name, v, location); + return -EINVAL; + } + + hi20 = (offset + 0x800) & 0xfffff000; + *location = (*location & 0xfff) | hi20; + return 0; +} + static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -100,10 +122,16 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, u32 hi20, lo12; if (offset != fill_v) { - pr_err( - "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); - return -EINVAL; + /* Only emit the plt entry if offset over 32-bit range */ + if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { + offset = module_emit_plt_entry(me, v); + offset = (void *)offset - (void *)location; + } else { + pr_err( + "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, v, location); + return -EINVAL; + } } hi20 = (offset + 0x800) & 0xfffff000; @@ -127,6 +155,7 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, [R_RISCV_PCREL_LO12_S] = apply_r_riscv_pcrel_lo12_s_rela, + [R_RISCV_GOT_HI20] = apply_r_riscv_got_hi20_rela, [R_RISCV_CALL_PLT] = apply_r_riscv_call_plt_rela, [R_RISCV_RELAX] = apply_r_riscv_relax_rela, }; @@ -184,25 +213,38 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, u64 hi20_loc = sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[j].r_offset; - /* Find the corresponding HI20 PC-relative relocation entry */ - if (hi20_loc == sym->st_value) { + u32 hi20_type = ELF_RISCV_R_TYPE(rel[j].r_info); + + /* Find the corresponding HI20 relocation entry */ + if (hi20_loc == sym->st_value + && (hi20_type == R_RISCV_PCREL_HI20 + || hi20_type == R_RISCV_GOT_HI20)) { + s32 hi20, lo12; Elf_Sym *hi20_sym = (Elf_Sym *)sechdrs[symindex].sh_addr + ELF_RISCV_R_SYM(rel[j].r_info); u64 hi20_sym_val = hi20_sym->st_value + rel[j].r_addend; + /* Calculate lo12 */ - s64 offset = hi20_sym_val - hi20_loc; - s32 hi20 = (offset + 0x800) & 0xfffff000; - s32 lo12 = offset - hi20; + u64 offset = hi20_sym_val - hi20_loc; + if (IS_ENABLED(CONFIG_MODULE_SECTIONS) + && hi20_type == R_RISCV_GOT_HI20) { + offset = module_emit_got_entry( + me, hi20_sym_val); + offset = offset - hi20_loc; + } + hi20 = (offset + 0x800) & 0xfffff000; + lo12 = offset - hi20; v = lo12; + break; } } if (j == sechdrs[relsec].sh_size / sizeof(*rel)) { pr_err( - "%s: Can not find HI20 PC-relative relocation information\n", + "%s: Can not find HI20 relocation information\n", me->name); return -EINVAL; } From e1910c72bdc405b5028510ccc3ed42f0ed25cc6c Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:44 +0800 Subject: [PATCH 13/21] RISC-V: Support CALL relocation type in kernel module Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index be717bd7cea7..3f2730840c25 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -141,6 +141,27 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, return 0; } +static int apply_r_riscv_call_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + s64 offset = (void *)v - (void *)location; + s32 fill_v = offset; + u32 hi20, lo12; + + if (offset != fill_v) { + pr_err( + "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, v, location); + return -EINVAL; + } + + hi20 = (offset + 0x800) & 0xfffff000; + lo12 = (offset - hi20) & 0xfff; + *location = (*location & 0xfff) | hi20; + *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); + return 0; +} + static int apply_r_riscv_relax_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -157,6 +178,7 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_PCREL_LO12_S] = apply_r_riscv_pcrel_lo12_s_rela, [R_RISCV_GOT_HI20] = apply_r_riscv_got_hi20_rela, [R_RISCV_CALL_PLT] = apply_r_riscv_call_plt_rela, + [R_RISCV_CALL] = apply_r_riscv_call_rela, [R_RISCV_RELAX] = apply_r_riscv_relax_rela, }; From e7456e696bff09ef2345081bf21e5ce741e2a3ce Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:45 +0800 Subject: [PATCH 14/21] RISC-V: Support HI20/LO12_I/LO12_S relocation type in kernel module HI20 and LO12_I/LO12_S relocate the absolute address, the range of offset must in 32-bit. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 3f2730840c25..f1bd6b1a4520 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -92,6 +92,45 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, return 0; } +static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + s32 hi20; + + if (IS_ENABLED(CMODEL_MEDLOW)) { + pr_err( + "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, v, location); + return -EINVAL; + } + + hi20 = ((s32)v + 0x800) & 0xfffff000; + *location = (*location & 0xfff) | hi20; + return 0; +} + +static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + /* Skip medlow checking because of filtering by HI20 already */ + s32 hi20 = ((s32)v + 0x800) & 0xfffff000; + s32 lo12 = ((s32)v - hi20); + *location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20); + return 0; +} + +static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + /* Skip medlow checking because of filtering by HI20 already */ + s32 hi20 = ((s32)v + 0x800) & 0xfffff000; + s32 lo12 = ((s32)v - hi20); + u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11); + u32 imm4_0 = (lo12 & 0x1f) << (11 - 4); + *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; + return 0; +} + static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -176,6 +215,9 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, [R_RISCV_PCREL_LO12_S] = apply_r_riscv_pcrel_lo12_s_rela, + [R_RISCV_HI20] = apply_r_riscv_hi20_rela, + [R_RISCV_LO12_I] = apply_r_riscv_lo12_i_rela, + [R_RISCV_LO12_S] = apply_r_riscv_lo12_s_rela, [R_RISCV_GOT_HI20] = apply_r_riscv_got_hi20_rela, [R_RISCV_CALL_PLT] = apply_r_riscv_call_plt_rela, [R_RISCV_CALL] = apply_r_riscv_call_rela, From 56ea45ae239206d79d776a43bf32cbd8fa4e069d Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:46 +0800 Subject: [PATCH 15/21] RISC-V: Support RVC_BRANCH/JUMP relocation type in kernel modulewq Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index f1bd6b1a4520..7ab6a9b72384 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -49,6 +49,39 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location, return 0; } +static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + s64 offset = (void *)v - (void *)location; + u16 imm8 = (offset & 0x100) << (12 - 8); + u16 imm7_6 = (offset & 0xc0) >> (6 - 5); + u16 imm5 = (offset & 0x20) >> (5 - 2); + u16 imm4_3 = (offset & 0x18) << (12 - 5); + u16 imm2_1 = (offset & 0x6) << (12 - 10); + + *(u16 *)location = (*(u16 *)location & 0xe383) | + imm8 | imm7_6 | imm5 | imm4_3 | imm2_1; + return 0; +} + +static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + s64 offset = (void *)v - (void *)location; + u16 imm11 = (offset & 0x800) << (12 - 11); + u16 imm10 = (offset & 0x400) >> (10 - 8); + u16 imm9_8 = (offset & 0x300) << (12 - 11); + u16 imm7 = (offset & 0x80) >> (7 - 6); + u16 imm6 = (offset & 0x40) << (12 - 11); + u16 imm5 = (offset & 0x20) >> (5 - 2); + u16 imm4 = (offset & 0x10) << (12 - 5); + u16 imm3_1 = (offset & 0xe) << (12 - 10); + + *(u16 *)location = (*(u16 *)location & 0xe003) | + imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1; + return 0; +} + static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -212,6 +245,8 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_64] = apply_r_riscv_64_rela, [R_RISCV_BRANCH] = apply_r_riscv_branch_rela, [R_RISCV_JAL] = apply_r_riscv_jal_rela, + [R_RISCV_RVC_BRANCH] = apply_r_riscv_rcv_branch_rela, + [R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela, [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, [R_RISCV_PCREL_LO12_S] = apply_r_riscv_pcrel_lo12_s_rela, From 29e405cd88c373ddcb3399687311aa869a7c8ee7 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:47 +0800 Subject: [PATCH 16/21] RISC-V: Support ALIGN relocation type in kernel module Just fail on align type. Kernel modules loader didn't do relax like linker, it is difficult to remove or migrate the code, but the remnant nop instructions harm the performaace of module. We expect the building module with the no-relax option. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 7ab6a9b72384..957933e669b1 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -240,6 +240,15 @@ static int apply_r_riscv_relax_rela(struct module *me, u32 *location, return 0; } +static int apply_r_riscv_align_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + pr_err( + "%s: The unexpected relocation type 'R_RISCV_ALIGN' from PC = %p\n", + me->name, location); + return -EINVAL; +} + static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, Elf_Addr v) = { [R_RISCV_64] = apply_r_riscv_64_rela, @@ -257,6 +266,7 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_CALL_PLT] = apply_r_riscv_call_plt_rela, [R_RISCV_CALL] = apply_r_riscv_call_rela, [R_RISCV_RELAX] = apply_r_riscv_relax_rela, + [R_RISCV_ALIGN] = apply_r_riscv_align_rela, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, From 8e691b16769d233f8a8b668dc7fad783459e4573 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:48 +0800 Subject: [PATCH 17/21] RISC-V: Support ADD32 relocation type in kernel module Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 957933e669b1..73ea36c73d3b 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -249,6 +249,13 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location, return -EINVAL; } +static int apply_r_riscv_add32_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + *(u32 *)location += (*(u32 *)v); + return 0; +} + static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, Elf_Addr v) = { [R_RISCV_64] = apply_r_riscv_64_rela, @@ -267,6 +274,7 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_CALL] = apply_r_riscv_call_rela, [R_RISCV_RELAX] = apply_r_riscv_relax_rela, [R_RISCV_ALIGN] = apply_r_riscv_align_rela, + [R_RISCV_ADD32] = apply_r_riscv_add32_rela, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, From 4aad074c9c1d1302c504e1a26fe434878fe08cd5 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:49 +0800 Subject: [PATCH 18/21] RISC-V: Support SUB32 relocation type in kernel module Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 73ea36c73d3b..5dddba301d0a 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -256,6 +256,13 @@ static int apply_r_riscv_add32_rela(struct module *me, u32 *location, return 0; } +static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + *(u32 *)location -= (*(u32 *)v); + return 0; +} + static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, Elf_Addr v) = { [R_RISCV_64] = apply_r_riscv_64_rela, @@ -275,6 +282,7 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_RELAX] = apply_r_riscv_relax_rela, [R_RISCV_ALIGN] = apply_r_riscv_align_rela, [R_RISCV_ADD32] = apply_r_riscv_add32_rela, + [R_RISCV_SUB32] = apply_r_riscv_sub32_rela, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, From 4a632cec8884b3eca70fb644bcf56a985dc31ebf Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:50 +0800 Subject: [PATCH 19/21] RISC-V: Enable module support in defconfig Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 47dacf06c679..bca0eee733b0 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -73,3 +73,5 @@ CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y # CONFIG_RCU_TRACE is not set CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y From e21d54219c7a698b10d5f1e6a1023ebde284cd7b Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 15 Mar 2018 16:50:51 +0800 Subject: [PATCH 20/21] RISC-V: Add definition of relocation types Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/elf.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index a510edfa8226..5cae4c30cd8e 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -79,5 +79,12 @@ typedef union __riscv_fp_state elf_fpregset_t; #define R_RISCV_TPREL_I 49 #define R_RISCV_TPREL_S 50 #define R_RISCV_RELAX 51 +#define R_RISCV_SUB6 52 +#define R_RISCV_SET6 53 +#define R_RISCV_SET8 54 +#define R_RISCV_SET16 55 +#define R_RISCV_SET32 56 +#define R_RISCV_32_PCREL 57 + #endif /* _UAPI_ASM_ELF_H */ From f6a11d9febad1f308fe4119a54b92e335e7c8032 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 13 Mar 2018 13:31:20 -0700 Subject: [PATCH 21/21] RISC-V: Rename CONFIG_CMDLINE_OVERRIDE to CONFIG_CMDLINE_FORCE The device tree code looks for CONFIG_CMDLINE_FORCE, but we were using CONFIG_CMDLINE_OVERRIDE. It looks like this was just a hold over from before our device tree conversion -- in fact, we'd already removed the support for CONFIG_CMDLINE_OVERRIDE from our arch-specific code so it didn't even work any more. Thanks to Mortiz and Trung for finding the original bug, and for Michael for suggeting a better fix. CC: Trung Tran CC: Michael J Clark Reviewed-by: Moritz Fischer Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1df6e264edd3..e8379daf9334 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -289,7 +289,7 @@ config CMDLINE_BOOL in CONFIG_CMDLINE. The built-in options will be concatenated to the default command - line if CMDLINE_OVERRIDE is set to 'N'. Otherwise, the default + line if CMDLINE_FORCE is set to 'N'. Otherwise, the default command line will be ignored and replaced by the built-in string. config CMDLINE @@ -299,7 +299,7 @@ config CMDLINE help Supply command-line options at build time by entering them here. -config CMDLINE_OVERRIDE +config CMDLINE_FORCE bool "Built-in command line overrides bootloader arguments" depends on CMDLINE_BOOL help