From c54a1a06daa78613519b4d24495b0d175b8af63f Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 18 Aug 2024 21:50:28 +0900 Subject: [PATCH 01/22] tracing: Fix function timing profiler to initialize hashtable Since the new fgraph requires to initialize fgraph_ops.ops.func_hash before calling register_ftrace_graph(), initialize it with default (tracing all functions) parameter. Cc: stable@vger.kernel.org Fixes: 5fccc7552ccb ("ftrace: Add subops logic to allow one ops to manage many") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4c28dd177ca6..d2dd71d04b8a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -883,6 +883,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, } static struct fgraph_ops fprofiler_ops = { + .ops = { + .flags = FTRACE_OPS_FL_INITIALIZED, + INIT_OPS_HASH(fprofiler_ops.ops) + }, .entryfunc = &profile_graph_entry, .retfunc = &profile_graph_return, }; From a370b72ec7165ebe1230d0225cbe66f6526e68ef Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 18 Aug 2024 21:48:13 +0900 Subject: [PATCH 02/22] tracing: Add a comment about ftrace_regs definition To clarify what will be expected on ftrace_regs, add a comment to the architecture independent definition of the ftrace_regs. Signed-off-by: Masami Hiramatsu (Google) Acked-by: Mark Rutland Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fd5e84d0ec47..42106b3de396 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -117,6 +117,32 @@ extern int ftrace_enabled; #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +/** + * ftrace_regs - ftrace partial/optimal register set + * + * ftrace_regs represents a group of registers which is used at the + * function entry and exit. There are three types of registers. + * + * - Registers for passing the parameters to callee, including the stack + * pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64) + * - Registers for passing the return values to caller. + * (e.g. rax and rdx on x86_64) + * - Registers for hooking the function call and return including the + * frame pointer (the frame pointer is architecture/config dependent) + * (e.g. rip, rbp and rsp for x86_64) + * + * Also, architecture dependent fields can be used for internal process. + * (e.g. orig_ax on x86_64) + * + * On the function entry, those registers will be restored except for + * the stack pointer, so that user can change the function parameters + * and instruction pointer (e.g. live patching.) + * On the function exit, only registers which is used for return values + * are restored. + * + * NOTE: user *must not* access regs directly, only do it via APIs, because + * the member can be changed according to the architecture. + */ struct ftrace_regs { struct pt_regs regs; }; From a312a0f7834e605e7c41570f0e9525d0fc4a70a4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:06 -0400 Subject: [PATCH 03/22] fgraph: Use fgraph data to store subtime for profiler Instead of having the "subtime" for the function profiler in the infrastructure ftrace_ret_stack structure, have it use the fgraph data reserve and retrieve functions. This will keep the limited shadow stack from wasting 8 bytes for something that is seldom used. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214826.780323141@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 4 +-- kernel/trace/fgraph.c | 64 ++++++++++++++++++++++++++++++++---------- kernel/trace/ftrace.c | 23 +++++++-------- 3 files changed, 62 insertions(+), 29 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 42106b3de396..aabd348cad4a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1081,6 +1081,7 @@ struct fgraph_ops { void *fgraph_reserve_data(int idx, int size_bytes); void *fgraph_retrieve_data(int idx, int *size_bytes); +void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth); /* * Stack of return addresses for functions @@ -1091,9 +1092,6 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long long subtime; -#endif #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..095ceb752b28 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -390,21 +390,7 @@ void *fgraph_reserve_data(int idx, int size_bytes) */ void *fgraph_retrieve_data(int idx, int *size_bytes) { - int offset = current->curr_ret_stack - 1; - unsigned long val; - - val = get_fgraph_entry(current, offset); - while (__get_type(val) == FGRAPH_TYPE_DATA) { - if (__get_data_index(val) == idx) - goto found; - offset -= __get_data_size(val) + 1; - val = get_fgraph_entry(current, offset); - } - return NULL; -found: - if (size_bytes) - *size_bytes = __get_data_size(val) * sizeof(long); - return get_data_type_data(current, offset); + return fgraph_retrieve_parent_data(idx, size_bytes, 0); } /** @@ -460,6 +446,54 @@ get_ret_stack(struct task_struct *t, int offset, int *frame_offset) return RET_STACK(t, offset); } +/** + * fgraph_retrieve_parent_data - get data from a parent function + * @idx: The index into the fgraph_array (fgraph_ops::idx) + * @size_bytes: A pointer to retrieved data size + * @depth: The depth to find the parent (0 is the current function) + * + * This is similar to fgraph_retrieve_data() but can be used to retrieve + * data from a parent caller function. + * + * Return: a pointer to the specified parent data or NULL if not found + */ +void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth) +{ + struct ftrace_ret_stack *ret_stack = NULL; + int offset = current->curr_ret_stack; + unsigned long val; + + if (offset <= 0) + return NULL; + + for (;;) { + int next_offset; + + ret_stack = get_ret_stack(current, offset, &next_offset); + if (!ret_stack || --depth < 0) + break; + offset = next_offset; + } + + if (!ret_stack) + return NULL; + + offset--; + + val = get_fgraph_entry(current, offset); + while (__get_type(val) == FGRAPH_TYPE_DATA) { + if (__get_data_index(val) == idx) + goto found; + offset -= __get_data_size(val) + 1; + val = get_fgraph_entry(current, offset); + } + return NULL; +found: + if (size_bytes) + *size_bytes = __get_data_size(val) * sizeof(long); + return get_data_type_data(current, offset); +} + /* Both enabled by default (can be cleared by function_graph tracer flags */ static bool fgraph_sleep_time = true; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d2dd71d04b8a..bac1f2ee1983 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -823,7 +823,7 @@ void ftrace_graph_graph_time_control(bool enable) static int profile_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { - struct ftrace_ret_stack *ret_stack; + unsigned long long *subtime; function_profile_call(trace->func, 0, NULL, NULL); @@ -831,9 +831,9 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, if (!current->ret_stack) return 0; - ret_stack = ftrace_graph_get_ret_stack(current, 0); - if (ret_stack) - ret_stack->subtime = 0; + subtime = fgraph_reserve_data(gops->idx, sizeof(*subtime)); + if (subtime) + *subtime = 0; return 1; } @@ -841,11 +841,12 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, static void profile_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { - struct ftrace_ret_stack *ret_stack; struct ftrace_profile_stat *stat; unsigned long long calltime; + unsigned long long *subtime; struct ftrace_profile *rec; unsigned long flags; + int size; local_irq_save(flags); stat = this_cpu_ptr(&ftrace_profile_stats); @@ -861,13 +862,13 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, if (!fgraph_graph_time) { /* Append this call time to the parent time to subtract */ - ret_stack = ftrace_graph_get_ret_stack(current, 1); - if (ret_stack) - ret_stack->subtime += calltime; + subtime = fgraph_retrieve_parent_data(gops->idx, &size, 1); + if (subtime) + *subtime += calltime; - ret_stack = ftrace_graph_get_ret_stack(current, 0); - if (ret_stack && ret_stack->subtime < calltime) - calltime -= ret_stack->subtime; + subtime = fgraph_retrieve_data(gops->idx, &size); + if (subtime && *subtime && *subtime < calltime) + calltime -= *subtime; else calltime = 0; } From 3c9880f3ab52b52b5b4e1850a70e80dd7329cb4c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:07 -0400 Subject: [PATCH 04/22] ftrace: Use a running sleeptime instead of saving on shadow stack The fgraph "sleep-time" option tells the function graph tracer and the profiler whether to include the time a function "sleeps" (is scheduled off the CPU) in its duration for the function. By default it is true, which means the duration of a function is calculated by the timestamp of when the function was entered to the timestamp of when it exits. If the "sleep-time" option is disabled, it needs to remove the time that the task was not running on the CPU during the function. Currently it is done in a sched_switch tracepoint probe where it moves the "calltime" (time of entry of the function) forward by the sleep time calculated. It updates all the calltime in the shadow stack. This is time consuming for those users of the function graph tracer that does not care about the sleep time. Instead, add a "ftrace_sleeptime" to the task_struct that gets the sleep time added each time the task wakes up. Then have the function entry save the current "ftrace_sleeptime" and on function exit, move the calltime forward by the difference of the current "ftrace_sleeptime" from the saved sleeptime. This removes one dependency of "calltime" needed to be on the shadow stack. It also simplifies the code that removes the sleep time of functions. TODO: Only enable the sched_switch tracepoint when this is needed. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214826.938908568@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 1 + kernel/trace/fgraph.c | 16 ++---------- kernel/trace/ftrace.c | 39 ++++++++++++++++++++-------- kernel/trace/trace.h | 1 + kernel/trace/trace_functions_graph.c | 28 ++++++++++++++++++++ 5 files changed, 60 insertions(+), 25 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index e6ee4258169a..c08f3bdb11a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1441,6 +1441,7 @@ struct task_struct { /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; + unsigned long long ftrace_sleeptime; /* * Number of functions that haven't been traced diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 095ceb752b28..b2e95bf82211 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -495,7 +495,7 @@ found: } /* Both enabled by default (can be cleared by function_graph tracer flags */ -static bool fgraph_sleep_time = true; +bool fgraph_sleep_time = true; #ifdef CONFIG_DYNAMIC_FTRACE /* @@ -1046,9 +1046,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *next, unsigned int prev_state) { - struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; - int offset; /* * Does the user want to count the time a function was asleep. @@ -1065,17 +1063,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, if (!next->ftrace_timestamp) return; - /* - * Update all the counters in next to make up for the - * time next was sleeping. - */ - timestamp -= next->ftrace_timestamp; - - for (offset = next->curr_ret_stack; offset > 0; ) { - ret_stack = get_ret_stack(next, offset, &offset); - if (ret_stack) - ret_stack->calltime += timestamp; - } + next->ftrace_sleeptime += timestamp - next->ftrace_timestamp; } static DEFINE_PER_CPU(unsigned long *, idle_ret_stack); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bac1f2ee1983..90b3975d5315 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -820,10 +820,15 @@ void ftrace_graph_graph_time_control(bool enable) fgraph_graph_time = enable; } +struct profile_fgraph_data { + unsigned long long subtime; + unsigned long long sleeptime; +}; + static int profile_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { - unsigned long long *subtime; + struct profile_fgraph_data *profile_data; function_profile_call(trace->func, 0, NULL, NULL); @@ -831,9 +836,12 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, if (!current->ret_stack) return 0; - subtime = fgraph_reserve_data(gops->idx, sizeof(*subtime)); - if (subtime) - *subtime = 0; + profile_data = fgraph_reserve_data(gops->idx, sizeof(*profile_data)); + if (!profile_data) + return 0; + + profile_data->subtime = 0; + profile_data->sleeptime = current->ftrace_sleeptime; return 1; } @@ -841,9 +849,10 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, static void profile_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { + struct profile_fgraph_data *profile_data; + struct profile_fgraph_data *parent_data; struct ftrace_profile_stat *stat; unsigned long long calltime; - unsigned long long *subtime; struct ftrace_profile *rec; unsigned long flags; int size; @@ -859,16 +868,24 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, calltime = trace->rettime - trace->calltime; + if (!fgraph_sleep_time) { + profile_data = fgraph_retrieve_data(gops->idx, &size); + if (profile_data && current->ftrace_sleeptime) + calltime -= current->ftrace_sleeptime - profile_data->sleeptime; + } + if (!fgraph_graph_time) { /* Append this call time to the parent time to subtract */ - subtime = fgraph_retrieve_parent_data(gops->idx, &size, 1); - if (subtime) - *subtime += calltime; + parent_data = fgraph_retrieve_parent_data(gops->idx, &size, 1); + if (parent_data) + parent_data->subtime += calltime; - subtime = fgraph_retrieve_data(gops->idx, &size); - if (subtime && *subtime && *subtime < calltime) - calltime -= *subtime; + if (!profile_data) + profile_data = fgraph_retrieve_data(gops->idx, &size); + + if (profile_data && profile_data->subtime && profile_data->subtime < calltime) + calltime -= profile_data->subtime; else calltime = 0; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c866991b9c78..2f8017f8d34d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1048,6 +1048,7 @@ static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftra #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; +extern bool fgraph_sleep_time; static inline bool ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a569daaac4c4..bbd898f5a73c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -133,6 +133,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; + unsigned long *sleeptime; unsigned long flags; unsigned int trace_ctx; long disabled; @@ -167,6 +168,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, if (ftrace_graph_ignore_irqs()) return 0; + /* save the current sleep time if we are to ignore it */ + if (!fgraph_sleep_time) { + sleeptime = fgraph_reserve_data(gops->idx, sizeof(*sleeptime)); + if (sleeptime) + *sleeptime = current->ftrace_sleeptime; + } + /* * Stop here if tracing_threshold is set. We only write function return * events to the ring buffer. @@ -238,6 +246,22 @@ void __trace_graph_return(struct trace_array *tr, trace_buffer_unlock_commit_nostack(buffer, event); } +static void handle_nosleeptime(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) +{ + unsigned long long *sleeptime; + int size; + + if (fgraph_sleep_time) + return; + + sleeptime = fgraph_retrieve_data(gops->idx, &size); + if (!sleeptime) + return; + + trace->calltime += current->ftrace_sleeptime - *sleeptime; +} + void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { @@ -256,6 +280,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace, return; } + handle_nosleeptime(trace, gops); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); @@ -278,6 +304,8 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, return; } + handle_nosleeptime(trace, gops); + if (tracing_thresh && (trace->rettime - trace->calltime < tracing_thresh)) return; From f1f36e22bee967db5e812a65e24389e54c46f3c2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 14 Sep 2024 17:48:08 -0400 Subject: [PATCH 05/22] ftrace: Have calltime be saved in the fgraph storage The calltime field in the shadow stack frame is only used by the function graph tracer and profiler. But now that there's other users of the function graph infrastructure, this adds overhead and wastes space on the shadow stack. Move the calltime to the fgraph data storage, where the function graph and profiler entry functions will save it in its own graph storage and retrieve it in its exit functions. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Jiri Olsa Link: https://lore.kernel.org/20240914214827.096968730@goodmis.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 - kernel/trace/fgraph.c | 5 --- kernel/trace/ftrace.c | 19 ++++----- kernel/trace/trace_functions_graph.c | 60 +++++++++++++++++++--------- 4 files changed, 51 insertions(+), 34 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index aabd348cad4a..e684addf6508 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1091,7 +1091,6 @@ void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth); struct ftrace_ret_stack { unsigned long ret; unsigned long func; - unsigned long long calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index b2e95bf82211..58a28ec35dab 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -558,7 +558,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int fgraph_idx) { struct ftrace_ret_stack *ret_stack; - unsigned long long calltime; unsigned long val; int offset; @@ -588,8 +587,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, return -EBUSY; } - calltime = trace_clock_local(); - offset = READ_ONCE(current->curr_ret_stack); ret_stack = RET_STACK(current, offset); offset += FGRAPH_FRAME_OFFSET; @@ -623,7 +620,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, ret_stack->ret = ret; ret_stack->func = func; - ret_stack->calltime = calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST ret_stack->fp = frame_pointer; #endif @@ -757,7 +753,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, *offset += FGRAPH_FRAME_OFFSET; *ret = ret_stack->ret; trace->func = ret_stack->func; - trace->calltime = ret_stack->calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = current->curr_ret_depth; /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 90b3975d5315..cae388122ca8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -821,6 +821,7 @@ void ftrace_graph_graph_time_control(bool enable) } struct profile_fgraph_data { + unsigned long long calltime; unsigned long long subtime; unsigned long long sleeptime; }; @@ -842,6 +843,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace, profile_data->subtime = 0; profile_data->sleeptime = current->ftrace_sleeptime; + profile_data->calltime = trace_clock_local(); return 1; } @@ -850,9 +852,9 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { struct profile_fgraph_data *profile_data; - struct profile_fgraph_data *parent_data; struct ftrace_profile_stat *stat; unsigned long long calltime; + unsigned long long rettime = trace_clock_local(); struct ftrace_profile *rec; unsigned long flags; int size; @@ -862,29 +864,28 @@ static void profile_graph_return(struct ftrace_graph_ret *trace, if (!stat->hash || !ftrace_profile_enabled) goto out; + profile_data = fgraph_retrieve_data(gops->idx, &size); + /* If the calltime was zero'd ignore it */ - if (!trace->calltime) + if (!profile_data || !profile_data->calltime) goto out; - calltime = trace->rettime - trace->calltime; + calltime = rettime - profile_data->calltime; if (!fgraph_sleep_time) { - profile_data = fgraph_retrieve_data(gops->idx, &size); - if (profile_data && current->ftrace_sleeptime) + if (current->ftrace_sleeptime) calltime -= current->ftrace_sleeptime - profile_data->sleeptime; } if (!fgraph_graph_time) { + struct profile_fgraph_data *parent_data; /* Append this call time to the parent time to subtract */ parent_data = fgraph_retrieve_parent_data(gops->idx, &size, 1); if (parent_data) parent_data->subtime += calltime; - if (!profile_data) - profile_data = fgraph_retrieve_data(gops->idx, &size); - - if (profile_data && profile_data->subtime && profile_data->subtime < calltime) + if (profile_data->subtime && profile_data->subtime < calltime) calltime -= profile_data->subtime; else calltime = 0; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index bbd898f5a73c..5c1b150fbba3 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -127,13 +127,18 @@ static inline int ftrace_graph_ignore_irqs(void) return in_hardirq(); } +struct fgraph_times { + unsigned long long calltime; + unsigned long long sleeptime; /* may be optional! */ +}; + int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; - unsigned long *sleeptime; + struct fgraph_times *ftimes; unsigned long flags; unsigned int trace_ctx; long disabled; @@ -168,12 +173,18 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, if (ftrace_graph_ignore_irqs()) return 0; - /* save the current sleep time if we are to ignore it */ - if (!fgraph_sleep_time) { - sleeptime = fgraph_reserve_data(gops->idx, sizeof(*sleeptime)); - if (sleeptime) - *sleeptime = current->ftrace_sleeptime; + if (fgraph_sleep_time) { + /* Only need to record the calltime */ + ftimes = fgraph_reserve_data(gops->idx, sizeof(ftimes->calltime)); + } else { + ftimes = fgraph_reserve_data(gops->idx, sizeof(*ftimes)); + if (ftimes) + ftimes->sleeptime = current->ftrace_sleeptime; } + if (!ftimes) + return 0; + + ftimes->calltime = trace_clock_local(); /* * Stop here if tracing_threshold is set. We only write function return @@ -247,19 +258,13 @@ void __trace_graph_return(struct trace_array *tr, } static void handle_nosleeptime(struct ftrace_graph_ret *trace, - struct fgraph_ops *gops) + struct fgraph_times *ftimes, + int size) { - unsigned long long *sleeptime; - int size; - - if (fgraph_sleep_time) + if (fgraph_sleep_time || size < sizeof(*ftimes)) return; - sleeptime = fgraph_retrieve_data(gops->idx, &size); - if (!sleeptime) - return; - - trace->calltime += current->ftrace_sleeptime - *sleeptime; + ftimes->calltime += current->ftrace_sleeptime - ftimes->sleeptime; } void trace_graph_return(struct ftrace_graph_ret *trace, @@ -268,9 +273,11 @@ void trace_graph_return(struct ftrace_graph_ret *trace, unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; + struct fgraph_times *ftimes; unsigned long flags; unsigned int trace_ctx; long disabled; + int size; int cpu; ftrace_graph_addr_finish(gops, trace); @@ -280,7 +287,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace, return; } - handle_nosleeptime(trace, gops); + ftimes = fgraph_retrieve_data(gops->idx, &size); + if (!ftimes) + return; + + handle_nosleeptime(trace, ftimes, size); + + trace->calltime = ftimes->calltime; local_irq_save(flags); cpu = raw_smp_processor_id(); @@ -297,6 +310,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace, static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { + struct fgraph_times *ftimes; + int size; + ftrace_graph_addr_finish(gops, trace); if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { @@ -304,10 +320,16 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, return; } - handle_nosleeptime(trace, gops); + ftimes = fgraph_retrieve_data(gops->idx, &size); + if (!ftimes) + return; + + handle_nosleeptime(trace, ftimes, size); + + trace->calltime = ftimes->calltime; if (tracing_thresh && - (trace->rettime - trace->calltime < tracing_thresh)) + (trace->rettime - ftimes->calltime < tracing_thresh)) return; else trace_graph_return(trace, gops); From 21e92806d39c68af2accd1fb238c2daecfcf9fbd Mon Sep 17 00:00:00 2001 From: Donglin Peng Date: Sat, 14 Sep 2024 20:29:12 -0700 Subject: [PATCH 06/22] function_graph: Support recording and printing the function return address When using function_graph tracer to analyze the flow of kernel function execution, it is often necessary to quickly locate the exact line of code where the call occurs. While this may be easy at times, it can be more time-consuming when some functions are inlined or the flow is too long. This feature aims to simplify the process by recording the return address of traced funcions and printing it when outputing trace logs. To enhance human readability, the prefix 'ret=' is used for the kernel return value, while '<-' serves as the prefix for the return address in trace logs to make it look more like the function tracer. A new trace option named 'funcgraph-retaddr' has been introduced, and the existing option 'sym-addr' can be used to control the format of the return address. See below logs with both funcgraph-retval and funcgraph-retaddr enabled. 0) | load_elf_binary() { /* <-bprm_execve+0x249/0x600 */ 0) | load_elf_phdrs() { /* <-load_elf_binary+0x84/0x1730 */ 0) | __kmalloc_noprof() { /* <-load_elf_phdrs+0x4a/0xb0 */ 0) 3.657 us | __cond_resched(); /* <-__kmalloc_noprof+0x28c/0x390 ret=0x0 */ 0) + 24.335 us | } /* __kmalloc_noprof ret=0xffff8882007f3000 */ 0) | kernel_read() { /* <-load_elf_phdrs+0x6c/0xb0 */ 0) | rw_verify_area() { /* <-kernel_read+0x2b/0x50 */ 0) | security_file_permission() { /* <-kernel_read+0x2b/0x50 */ 0) | selinux_file_permission() { /* <-security_file_permission+0x26/0x40 */ 0) | __inode_security_revalidate() { /* <-selinux_file_permission+0x6d/0x140 */ 0) 2.034 us | __cond_resched(); /* <-__inode_security_revalidate+0x5f/0x80 ret=0x0 */ 0) 6.602 us | } /* __inode_security_revalidate ret=0x0 */ 0) 2.214 us | avc_policy_seqno(); /* <-selinux_file_permission+0x107/0x140 ret=0x0 */ 0) + 16.670 us | } /* selinux_file_permission ret=0x0 */ 0) + 20.809 us | } /* security_file_permission ret=0x0 */ 0) + 25.217 us | } /* rw_verify_area ret=0x0 */ 0) | __kernel_read() { /* <-load_elf_phdrs+0x6c/0xb0 */ 0) | ext4_file_read_iter() { /* <-__kernel_read+0x160/0x2e0 */ Then, we can use the faddr2line to locate the source code, for example: $ ./scripts/faddr2line ./vmlinux load_elf_phdrs+0x6c/0xb0 load_elf_phdrs+0x6c/0xb0: elf_read at fs/binfmt_elf.c:471 (inlined by) load_elf_phdrs at fs/binfmt_elf.c:531 Link: https://lore.kernel.org/20240915032912.1118397-1-dolinux.peng@gmail.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409150605.HgUmU8ea-lkp@intel.com/ Signed-off-by: Donglin Peng [ Rebased to handle text_delta offsets ] Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 27 ++- kernel/trace/Kconfig | 10 + kernel/trace/fgraph.c | 22 +- kernel/trace/ftrace.c | 3 +- kernel/trace/trace.h | 11 +- kernel/trace/trace_entries.h | 29 ++- kernel/trace/trace_functions_graph.c | 220 ++++++++++++++---- kernel/trace/trace_irqsoff.c | 3 +- kernel/trace/trace_sched_wakeup.c | 3 +- kernel/trace/trace_selftest.c | 9 +- .../ftrace/test.d/ftrace/fgraph-retval.tc | 2 +- 11 files changed, 276 insertions(+), 63 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e684addf6508..2ac3b3b53cd0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1040,6 +1040,17 @@ struct ftrace_graph_ent { int depth; } __packed; +/* + * Structure that defines an entry function trace with retaddr. + * It's already packed but the attribute "packed" is needed + * to remove extra padding at the end. + */ +struct fgraph_retaddr_ent { + unsigned long func; /* Current function */ + int depth; + unsigned long retaddr; /* Return address */ +} __packed; + /* * Structure that defines a return function trace. * It's already packed but the attribute "packed" is needed @@ -1057,19 +1068,29 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; +struct fgraph_extras; struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, struct fgraph_ops *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *); /* entry */ + struct fgraph_ops *, + struct fgraph_extras *); /* entry */ -extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops, + struct fgraph_extras *extras); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* Used to convey some extra datas when creating a graph entry */ +struct fgraph_extras { + u32 flags; + unsigned long retaddr; +}; + struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; @@ -1115,6 +1136,8 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); +u32 graph_tracer_flags_get(u32 flags); + /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 721c3b221048..74c2b1d43bb9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -242,6 +242,16 @@ config FUNCTION_GRAPH_RETVAL enable it via the trace option funcgraph-retval. See Documentation/trace/ftrace.rst +config FUNCTION_GRAPH_RETADDR + bool "Kernel Function Graph Return Address" + depends on FUNCTION_GRAPH_TRACER + default n + help + Support recording and printing the function return address when + using function graph tracer. It can be helpful to locate code line that + the function is called. This feature is off by default, and you can + enable it via the trace option funcgraph-retaddr. + config DYNAMIC_FTRACE bool "enable/disable function tracing dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 58a28ec35dab..875aefe60a13 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -290,7 +290,8 @@ static inline unsigned long make_data_type_val(int idx, int size, int offset) } /* ftrace_graph_entry set to this to tell some archs to run function graph */ -static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops, + struct fgraph_extras *extras) { return 0; } @@ -518,7 +519,8 @@ int __weak ftrace_disable_ftrace_graph_caller(void) #endif int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { return 0; } @@ -646,13 +648,20 @@ int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { struct ftrace_graph_ent trace; + struct fgraph_extras extras; unsigned long bitmap = 0; int offset; int i; + int idx = 0; trace.func = func; trace.depth = ++current->curr_ret_depth; + extras.flags = graph_tracer_flags_get(TRACE_GRAPH_PRINT_RETADDR); + if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) + && extras.flags & TRACE_GRAPH_PRINT_RETADDR) + extras.retaddr = ftrace_graph_ret_addr(current, &idx, ret, retp); + offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); if (offset < 0) goto out; @@ -661,7 +670,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (static_branch_likely(&fgraph_do_direct)) { int save_curr_ret_stack = current->curr_ret_stack; - if (static_call(fgraph_func)(&trace, fgraph_direct_gops)) + if (static_call(fgraph_func)(&trace, fgraph_direct_gops, &extras)) bitmap |= BIT(fgraph_direct_gops->idx); else /* Clear out any saved storage */ @@ -679,7 +688,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, save_curr_ret_stack = current->curr_ret_stack; if (ftrace_ops_test(&gops->ops, func, NULL) && - gops->entryfunc(&trace, gops)) + gops->entryfunc(&trace, gops, &extras)) bitmap |= BIT(i); else /* Clear out any saved storage */ @@ -1136,7 +1145,8 @@ void ftrace_graph_exit_task(struct task_struct *t) #ifdef CONFIG_DYNAMIC_FTRACE static int fgraph_pid_func(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct trace_array *tr = gops->ops.private; int pid; @@ -1150,7 +1160,7 @@ static int fgraph_pid_func(struct ftrace_graph_ent *trace, return 0; } - return gops->saved_func(trace, gops); + return gops->saved_func(trace, gops, NULL); } void fgraph_update_pid_func(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cae388122ca8..5d87dac83b80 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -827,7 +827,8 @@ struct profile_fgraph_data { }; static int profile_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct profile_fgraph_data *profile_data; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2f8017f8d34d..13f08f257c0b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -46,6 +46,7 @@ enum trace_type { TRACE_BRANCH, TRACE_GRAPH_RET, TRACE_GRAPH_ENT, + TRACE_GRAPH_RETADDR_ENT, TRACE_USER_STACK, TRACE_BLK, TRACE_BPUTS, @@ -512,6 +513,8 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ TRACE_GRAPH_ENT); \ + IF_ASSIGN(var, ent, struct fgraph_retaddr_ent_entry,\ + TRACE_GRAPH_RETADDR_ENT); \ IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct func_repeats_entry, \ @@ -692,7 +695,8 @@ void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); -int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, + struct fgraph_extras *extras); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); @@ -879,6 +883,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash) #define TRACE_GRAPH_GRAPH_TIME 0x400 #define TRACE_GRAPH_PRINT_RETVAL 0x800 #define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000 +#define TRACE_GRAPH_PRINT_RETADDR 0x2000 #define TRACE_GRAPH_PRINT_FILL_SHIFT 28 #define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT) @@ -900,6 +905,10 @@ extern void graph_trace_close(struct trace_iterator *iter); extern int __trace_graph_entry(struct trace_array *tr, struct ftrace_graph_ent *trace, unsigned int trace_ctx); +extern int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr); extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned int trace_ctx); diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index c47422b20908..82fd174ebbe0 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -85,9 +85,35 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry, F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth) ); -/* Function return entry */ +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + +/* Function call entry with a return address */ +FTRACE_ENTRY_PACKED(fgraph_retaddr_entry, fgraph_retaddr_ent_entry, + + TRACE_GRAPH_RETADDR_ENT, + + F_STRUCT( + __field_struct( struct fgraph_retaddr_ent, graph_ent ) + __field_packed( unsigned long, graph_ent, func ) + __field_packed( int, graph_ent, depth ) + __field_packed( unsigned long, graph_ent, retaddr ) + ), + + F_printk("--> %ps (%d) <- %ps", (void *)__entry->func, __entry->depth, + (void *)__entry->retaddr) +); + +#else + +#ifndef fgraph_retaddr_ent_entry +#define fgraph_retaddr_ent_entry ftrace_graph_ent_entry +#endif + +#endif + #ifdef CONFIG_FUNCTION_GRAPH_RETVAL +/* Function return entry */ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, TRACE_GRAPH_RET, @@ -110,6 +136,7 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, #else +/* Function return entry */ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry, TRACE_GRAPH_RET, diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 5c1b150fbba3..3dd63ae2afe8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -31,7 +31,10 @@ struct fgraph_data { struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ - struct ftrace_graph_ent_entry ent; + union { + struct ftrace_graph_ent_entry ent; + struct fgraph_retaddr_ent_entry rent; + } ent; struct ftrace_graph_ret_entry ret; int failed; int cpu; @@ -63,6 +66,10 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) }, /* Display function return value in hexadecimal format ? */ { TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) }, +#endif +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + /* Display function return address ? */ + { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) }, #endif /* Include sleep time (scheduled out) between entry and return */ { TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) }, @@ -83,6 +90,11 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; +u32 graph_tracer_flags_get(u32 flags) +{ + return tracer_flags.val & flags; +} + /* * DURATION column is being also used to display IRQ signs, * following values are used by print_graph_irq and others @@ -119,6 +131,40 @@ int __trace_graph_entry(struct trace_array *tr, return 1; } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr) +{ + struct trace_event_call *call = &event_fgraph_retaddr_entry; + struct ring_buffer_event *event; + struct trace_buffer *buffer = tr->array_buffer.buffer; + struct fgraph_retaddr_ent_entry *entry; + + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RETADDR_ENT, + sizeof(*entry), trace_ctx); + if (!event) + return 0; + entry = ring_buffer_event_data(event); + entry->graph_ent.func = trace->func; + entry->graph_ent.depth = trace->depth; + entry->graph_ent.retaddr = retaddr; + if (!call_filter_check_discard(call, entry, buffer, event)) + trace_buffer_unlock_commit_nostack(buffer, event); + + return 1; +} +#else +int __trace_graph_retaddr_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, + unsigned int trace_ctx, + unsigned long retaddr) +{ + return 1; +} +#endif + static inline int ftrace_graph_ignore_irqs(void) { if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT)) @@ -133,7 +179,8 @@ struct fgraph_times { }; int trace_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; @@ -199,7 +246,12 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { trace_ctx = tracing_gen_ctx_flags(flags); - ret = __trace_graph_entry(tr, trace, trace_ctx); + if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && extras + && (extras->flags & TRACE_GRAPH_PRINT_RETADDR))) + ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, + extras->retaddr); + else + ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; } @@ -507,7 +559,7 @@ get_return_for_leaf(struct trace_iterator *iter, * then we just reuse the data from before. */ if (data && data->failed) { - curr = &data->ent; + curr = &data->ent.ent; next = &data->ret; } else { @@ -537,7 +589,10 @@ get_return_for_leaf(struct trace_iterator *iter, * Save current and next entries for later reference * if the output fails. */ - data->ent = *curr; + if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT)) + data->ent.rent = *(struct fgraph_retaddr_ent_entry *)curr; + else + data->ent.ent = *curr; /* * If the next event is not a return type, then * we only care about what type it is. Otherwise we can @@ -701,52 +756,96 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration, } #ifdef CONFIG_FUNCTION_GRAPH_RETVAL - #define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL +#else +#define __TRACE_GRAPH_PRINT_RETVAL 0 +#endif -static void print_graph_retval(struct trace_seq *s, unsigned long retval, - bool leaf, void *func, bool hex_format) +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR +static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_entry *entry, + u32 trace_flags, bool comment) +{ + if (comment) + trace_seq_puts(s, " /*"); + + trace_seq_puts(s, " <-"); + seq_print_ip_sym(s, entry->graph_ent.retaddr, trace_flags | TRACE_ITER_SYM_OFFSET); + + if (comment) + trace_seq_puts(s, " */"); +} +#else +#define __TRACE_GRAPH_PRINT_RETADDR 0 +#define print_graph_retaddr(_seq, _entry, _tflags, _comment) do { } while (0) +#endif + +#if defined(CONFIG_FUNCTION_GRAPH_RETVAL) || defined(CONFIG_FUNCTION_GRAPH_RETADDR) + +static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entry *entry, + struct ftrace_graph_ret *graph_ret, void *func, + u32 opt_flags, u32 trace_flags) { unsigned long err_code = 0; + unsigned long retval = 0; + bool print_retaddr = false; + bool print_retval = false; + bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX); - if (retval == 0 || hex_format) - goto done; +#ifdef CONFIG_FUNCTION_GRAPH_RETVAL + retval = graph_ret->retval; + print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL); +#endif - /* Check if the return value matches the negative format */ - if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) && - (((u64)retval) >> 32) == 0) { - /* sign extension */ - err_code = (unsigned long)(s32)retval; - } else { - err_code = retval; +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR); +#endif + + if (print_retval && retval && !hex_format) { + /* Check if the return value matches the negative format */ + if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) && + (((u64)retval) >> 32) == 0) { + err_code = sign_extend64(retval, 31); + } else { + err_code = retval; + } + + if (!IS_ERR_VALUE(err_code)) + err_code = 0; } - if (!IS_ERR_VALUE(err_code)) - err_code = 0; + if (entry) { + if (entry->ent.type != TRACE_GRAPH_RETADDR_ENT) + print_retaddr = false; -done: - if (leaf) { - if (hex_format || (err_code == 0)) - trace_seq_printf(s, "%ps(); /* = 0x%lx */\n", - func, retval); + trace_seq_printf(s, "%ps();", func); + if (print_retval || print_retaddr) + trace_seq_puts(s, " /*"); else - trace_seq_printf(s, "%ps(); /* = %ld */\n", - func, err_code); + trace_seq_putc(s, '\n'); } else { - if (hex_format || (err_code == 0)) - trace_seq_printf(s, "} /* %ps = 0x%lx */\n", - func, retval); - else - trace_seq_printf(s, "} /* %ps = %ld */\n", - func, err_code); + print_retaddr = false; + trace_seq_printf(s, "} /* %ps", func); } + + if (print_retaddr) + print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry, + trace_flags, false); + + if (print_retval) { + if (hex_format || (err_code == 0)) + trace_seq_printf(s, " ret=0x%lx", retval); + else + trace_seq_printf(s, " ret=%ld", err_code); + } + + if (!entry || print_retval || print_retaddr) + trace_seq_puts(s, " */\n"); } #else -#define __TRACE_GRAPH_PRINT_RETVAL 0 - -#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0) +#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags) do {} while (0) #endif @@ -798,14 +897,15 @@ print_graph_entry_leaf(struct trace_iterator *iter, trace_seq_putc(s, ' '); /* - * Write out the function return value if the option function-retval is - * enabled. + * Write out the function return value or return address */ - if (flags & __TRACE_GRAPH_PRINT_RETVAL) - print_graph_retval(s, graph_ret->retval, true, (void *)func, - !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX)); - else + if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) { + print_graph_retval(s, entry, graph_ret, + (void *)graph_ret->func + iter->tr->text_delta, + flags, tr->trace_flags); + } else { trace_seq_printf(s, "%ps();\n", (void *)func); + } print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, cpu, iter->ent->pid, flags); @@ -846,7 +946,12 @@ print_graph_entry_nested(struct trace_iterator *iter, func = call->func + iter->tr->text_delta; - trace_seq_printf(s, "%ps() {\n", (void *)func); + trace_seq_printf(s, "%ps() {", (void *)func); + if (flags & __TRACE_GRAPH_PRINT_RETADDR && + entry->ent.type == TRACE_GRAPH_RETADDR_ENT) + print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry, + tr->trace_flags, true); + trace_seq_putc(s, '\n'); if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; @@ -1093,11 +1198,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, /* * Always write out the function name and its return value if the - * function-retval option is enabled. + * funcgraph-retval option is enabled. */ if (flags & __TRACE_GRAPH_PRINT_RETVAL) { - print_graph_retval(s, trace->retval, false, (void *)func, - !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX)); + print_graph_retval(s, NULL, trace, (void *)func, flags, tr->trace_flags); } else { /* * If the return function does not have a matching entry, @@ -1212,7 +1316,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) * to print out the missing entry which would never go out. */ if (data && data->failed) { - field = &data->ent; + field = &data->ent.ent; iter->cpu = data->cpu; ret = print_graph_entry(field, s, iter, flags); if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { @@ -1236,6 +1340,16 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) saved = *field; return print_graph_entry(&saved, s, iter, flags); } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + case TRACE_GRAPH_RETADDR_ENT: { + struct fgraph_retaddr_ent_entry saved; + struct fgraph_retaddr_ent_entry *rfield; + + trace_assign_type(rfield, entry); + saved = *rfield; + return print_graph_entry((struct ftrace_graph_ent_entry *)&saved, s, iter, flags); + } +#endif case TRACE_GRAPH_RET: { struct ftrace_graph_ret_entry *field; trace_assign_type(field, entry); @@ -1430,6 +1544,13 @@ static struct trace_event graph_trace_entry_event = { .funcs = &graph_functions, }; +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR +static struct trace_event graph_trace_retaddr_entry_event = { + .type = TRACE_GRAPH_RETADDR_ENT, + .funcs = &graph_functions, +}; +#endif + static struct trace_event graph_trace_ret_event = { .type = TRACE_GRAPH_RET, .funcs = &graph_functions @@ -1516,6 +1637,13 @@ static __init int init_graph_trace(void) return 1; } +#ifdef CONFIG_FUNCTION_GRAPH_RETADDR + if (!register_trace_event(&graph_trace_retaddr_entry_event)) { + pr_warn("Warning: could not register graph trace retaddr events\n"); + return 1; + } +#endif + if (!register_trace_event(&graph_trace_ret_event)) { pr_warn("Warning: could not register graph trace events\n"); return 1; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index fce064e20570..eb3aa36cf10f 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -176,7 +176,8 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) } static int irqsoff_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ae2ace5e515a..155de2551507 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -113,7 +113,8 @@ static int wakeup_display_graph(struct trace_array *tr, int set) } static int wakeup_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index c4ad7cd7e778..fbb99f8c8062 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_PRINT: case TRACE_BRANCH: case TRACE_GRAPH_ENT: + case TRACE_GRAPH_RETADDR_ENT: case TRACE_GRAPH_RET: return 1; } @@ -773,7 +774,8 @@ struct fgraph_fixture { }; static __init int store_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops); const char *type = fixture->store_type_name; @@ -1024,7 +1026,8 @@ static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops) + struct fgraph_ops *gops, + struct fgraph_extras *extras) { /* This is harmlessly racy, we want to approximately detect a hang */ if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { @@ -1038,7 +1041,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, return 0; } - return trace_graph_entry(trace, gops); + return trace_graph_entry(trace, gops, NULL); } static struct fgraph_ops fgraph_ops __initdata = { diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc index e34c0bdef3ed..e8e46378b88d 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc @@ -29,7 +29,7 @@ set -e : "Test printing the error code in signed decimal format" echo 0 > options/funcgraph-retval-hex -count=`cat trace | grep 'proc_reg_write' | grep '= -5' | wc -l` +count=`cat trace | grep 'proc_reg_write' | grep '=-5' | wc -l` if [ $count -eq 0 ]; then fail "Return value can not be printed in signed decimal format" fi From 474ec3e849686a02d00c5bd7a80c3042505b66bb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 16 Sep 2024 19:58:19 +0200 Subject: [PATCH 07/22] function_graph: Remove unnecessary initialization in ftrace_graph_ret_addr() After the commit 29c1c24a2707 ("function_graph: Fix up ftrace_graph_ret_addr()") ftrace_graph_ret_addr() doesn't need to initialize "int i" at the start. Cc: Masami Hiramatsu Cc: Puranjay Mohan Link: https://lore.kernel.org/20240916175818.GA28944@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 875aefe60a13..27e523f01ed2 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -930,7 +930,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, { struct ftrace_ret_stack *ret_stack; unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); - int i = task->curr_ret_stack; + int i; if (ret != return_handler) return ret; From 0a6c61bc9c636e9a32d9f5a4d6d3b031d08763ab Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Thu, 10 Oct 2024 23:59:09 +0900 Subject: [PATCH 08/22] fgraph: Simplify return address printing in function graph tracer Simplify return address printing in the function graph tracer by removing fgraph_extras. Since this feature is only used by the function graph tracer and the feature flags can directly accessible from the function graph tracer, fgraph_extras can be removed from the fgraph callback. Cc: Donglin Peng Link: https://lore.kernel.org/172857234900.270774.15378354017601069781.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 16 ++-------- kernel/trace/fgraph.c | 45 ++++++++++++++++++---------- kernel/trace/ftrace.c | 3 +- kernel/trace/trace.h | 3 +- kernel/trace/trace_functions_graph.c | 18 +++++------ kernel/trace/trace_irqsoff.c | 3 +- kernel/trace/trace_sched_wakeup.c | 3 +- kernel/trace/trace_selftest.c | 8 ++--- 8 files changed, 48 insertions(+), 51 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2ac3b3b53cd0..4c7dd5e58c9f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1068,29 +1068,20 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; -struct fgraph_extras; struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, struct fgraph_ops *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *, - struct fgraph_extras *); /* entry */ + struct fgraph_ops *); /* entry */ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras); + struct fgraph_ops *gops); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* Used to convey some extra datas when creating a graph entry */ -struct fgraph_extras { - u32 flags; - unsigned long retaddr; -}; - struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; @@ -1131,13 +1122,12 @@ function_graph_enter(unsigned long ret, unsigned long func, struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int skip); +unsigned long ftrace_graph_top_ret_addr(struct task_struct *task); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); -u32 graph_tracer_flags_get(u32 flags); - /* * Sometimes we don't want to trace a function with the function * graph tracer but we want them to keep traced by the usual function diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 27e523f01ed2..ee829d65f301 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -290,8 +290,7 @@ static inline unsigned long make_data_type_val(int idx, int size, int offset) } /* ftrace_graph_entry set to this to tell some archs to run function graph */ -static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops, - struct fgraph_extras *extras) +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) { return 0; } @@ -519,8 +518,7 @@ int __weak ftrace_disable_ftrace_graph_caller(void) #endif int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { return 0; } @@ -648,20 +646,13 @@ int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { struct ftrace_graph_ent trace; - struct fgraph_extras extras; unsigned long bitmap = 0; int offset; int i; - int idx = 0; trace.func = func; trace.depth = ++current->curr_ret_depth; - extras.flags = graph_tracer_flags_get(TRACE_GRAPH_PRINT_RETADDR); - if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) - && extras.flags & TRACE_GRAPH_PRINT_RETADDR) - extras.retaddr = ftrace_graph_ret_addr(current, &idx, ret, retp); - offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); if (offset < 0) goto out; @@ -670,7 +661,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (static_branch_likely(&fgraph_do_direct)) { int save_curr_ret_stack = current->curr_ret_stack; - if (static_call(fgraph_func)(&trace, fgraph_direct_gops, &extras)) + if (static_call(fgraph_func)(&trace, fgraph_direct_gops)) bitmap |= BIT(fgraph_direct_gops->idx); else /* Clear out any saved storage */ @@ -688,7 +679,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, save_curr_ret_stack = current->curr_ret_stack; if (ftrace_ops_test(&gops->ops, func, NULL) && - gops->entryfunc(&trace, gops, &extras)) + gops->entryfunc(&trace, gops)) bitmap |= BIT(i); else /* Clear out any saved storage */ @@ -905,6 +896,29 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int idx) return ret_stack; } +/** + * ftrace_graph_top_ret_addr - return the top return address in the shadow stack + * @task: The task to read the shadow stack from. + * + * Return the first return address on the shadow stack of the @task, which is + * not the fgraph's return_to_handler. + */ +unsigned long ftrace_graph_top_ret_addr(struct task_struct *task) +{ + unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); + struct ftrace_ret_stack *ret_stack = NULL; + int offset = task->curr_ret_stack; + + if (offset < 0) + return 0; + + do { + ret_stack = get_ret_stack(task, offset, &offset); + } while (ret_stack && ret_stack->ret == return_handler); + + return ret_stack ? ret_stack->ret : 0; +} + /** * ftrace_graph_ret_addr - return the original value of the return address * @task: The task the unwinder is being executed on @@ -1145,8 +1159,7 @@ void ftrace_graph_exit_task(struct task_struct *t) #ifdef CONFIG_DYNAMIC_FTRACE static int fgraph_pid_func(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct trace_array *tr = gops->ops.private; int pid; @@ -1160,7 +1173,7 @@ static int fgraph_pid_func(struct ftrace_graph_ent *trace, return 0; } - return gops->saved_func(trace, gops, NULL); + return gops->saved_func(trace, gops); } void fgraph_update_pid_func(void) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5d87dac83b80..cae388122ca8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -827,8 +827,7 @@ struct profile_fgraph_data { }; static int profile_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct profile_fgraph_data *profile_data; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 13f08f257c0b..6adf48ef4312 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -695,8 +695,7 @@ void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); -int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, - struct fgraph_extras *extras); +int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 3dd63ae2afe8..20d0c579d3b5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -90,9 +90,9 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; -u32 graph_tracer_flags_get(u32 flags) +static bool tracer_flags_is_set(u32 flags) { - return tracer_flags.val & flags; + return (tracer_flags.val & flags) == flags; } /* @@ -179,8 +179,7 @@ struct fgraph_times { }; int trace_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; @@ -246,11 +245,12 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { trace_ctx = tracing_gen_ctx_flags(flags); - if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && extras - && (extras->flags & TRACE_GRAPH_PRINT_RETADDR))) - ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, - extras->retaddr); - else + if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) && + tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) { + unsigned long retaddr = ftrace_graph_top_ret_addr(current); + + ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr); + } else ret = __trace_graph_entry(tr, trace, trace_ctx); } else { ret = 0; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index eb3aa36cf10f..fce064e20570 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -176,8 +176,7 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) } static int irqsoff_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 155de2551507..ae2ace5e515a 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -113,8 +113,7 @@ static int wakeup_display_graph(struct trace_array *tr, int set) } static int wakeup_graph_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index fbb99f8c8062..d3a14ae47e26 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -774,8 +774,7 @@ struct fgraph_fixture { }; static __init int store_entry(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops); const char *type = fixture->store_type_name; @@ -1026,8 +1025,7 @@ static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops, - struct fgraph_extras *extras) + struct fgraph_ops *gops) { /* This is harmlessly racy, we want to approximately detect a hang */ if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { @@ -1041,7 +1039,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, return 0; } - return trace_graph_entry(trace, gops, NULL); + return trace_graph_entry(trace, gops); } static struct fgraph_ops fgraph_ops __initdata = { From c73eb02a4781aee53ee4122132967356361e4f1a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Oct 2024 19:40:20 -0400 Subject: [PATCH 09/22] fgragh: No need to invoke the function call_filter_check_discard() The function call_filter_check_discard() has been removed in the commit 49e4154f4b16 ("tracing: Remove TRACE_EVENT_FL_FILTERED logic"), from another topic branch. But when merged together with commit 21e92806d39c6 ("function_graph: Support recording and printing the function return address") which added another call to call_filter_check_discard(), it caused the build to fail. Since the function call_filter_check_discard() is useless, it can simply be removed regardless of being merged with commit 49e4154f4b16 or not. Link: https://lore.kernel.org/all/20241010134649.43ed357c@canb.auug.org.au/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Donglin Peng Link: https://lore.kernel.org/20241010194020.46192b21@gandalf.local.home Reported-by: Stephen Rothwell Fixes: 21e92806d39c6 ("function_graph: Support recording and printing the function return address") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 20d0c579d3b5..03c5a0d300a5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -137,7 +137,6 @@ int __trace_graph_retaddr_entry(struct trace_array *tr, unsigned int trace_ctx, unsigned long retaddr) { - struct trace_event_call *call = &event_fgraph_retaddr_entry; struct ring_buffer_event *event; struct trace_buffer *buffer = tr->array_buffer.buffer; struct fgraph_retaddr_ent_entry *entry; @@ -150,8 +149,7 @@ int __trace_graph_retaddr_entry(struct trace_array *tr, entry->graph_ent.func = trace->func; entry->graph_ent.depth = trace->depth; entry->graph_ent.retaddr = retaddr; - if (!call_filter_check_discard(call, entry, buffer, event)) - trace_buffer_unlock_commit_nostack(buffer, event); + trace_buffer_unlock_commit_nostack(buffer, event); return 1; } From 7888af4166d4ab07ba51234be6ba332b7807e901 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Oct 2024 19:05:28 -0400 Subject: [PATCH 10/22] ftrace: Make ftrace_regs abstract from direct use ftrace_regs was created to hold registers that store information to save function parameters, return value and stack. Since it is a subset of pt_regs, it should only be used by its accessor functions. But because pt_regs can easily be taken from ftrace_regs (on most archs), it is tempting to use it directly. But when running on other architectures, it may fail to build or worse, build but crash the kernel! Instead, make struct ftrace_regs an empty structure and have the architectures define __arch_ftrace_regs and all the accessor functions will typecast to it to get to the actual fields. This will help avoid usage of ftrace_regs directly. Link: https://lore.kernel.org/all/20241007171027.629bdafd@gandalf.local.home/ Cc: "linux-arch@vger.kernel.org" Cc: "x86@kernel.org" Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Will Deacon Cc: Huacai Chen Cc: WANG Xuerui Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Naveen N Rao Cc: Madhavan Srinivasan Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Link: https://lore.kernel.org/20241008230628.958778821@goodmis.org Acked-by: Catalin Marinas Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Acked-by: Heiko Carstens # s390 Signed-off-by: Steven Rostedt (Google) --- arch/arm64/include/asm/ftrace.h | 20 +++++++++-------- arch/arm64/kernel/asm-offsets.c | 22 +++++++++---------- arch/arm64/kernel/ftrace.c | 10 ++++----- arch/loongarch/include/asm/ftrace.h | 22 ++++++++++--------- arch/loongarch/kernel/ftrace_dyn.c | 2 +- arch/powerpc/include/asm/ftrace.h | 21 ++++++++++-------- arch/powerpc/kernel/trace/ftrace.c | 4 ++-- arch/powerpc/kernel/trace/ftrace_64_pg.c | 2 +- arch/riscv/include/asm/ftrace.h | 21 ++++++++++-------- arch/riscv/kernel/asm-offsets.c | 28 ++++++++++++------------ arch/riscv/kernel/ftrace.c | 2 +- arch/s390/include/asm/ftrace.h | 23 ++++++++++--------- arch/s390/kernel/asm-offsets.c | 4 ++-- arch/s390/kernel/ftrace.c | 2 +- arch/s390/lib/test_unwind.c | 4 ++-- arch/x86/include/asm/ftrace.h | 25 +++++++++++---------- arch/x86/kernel/ftrace.c | 2 +- include/linux/ftrace.h | 21 +++++++++++++++--- kernel/trace/ftrace.c | 2 +- 19 files changed, 134 insertions(+), 103 deletions(-) diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index dc9cf0bd2a4c..bbb69c7751b9 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -56,6 +56,8 @@ unsigned long ftrace_call_adjust(unsigned long addr); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS struct dyn_ftrace; struct ftrace_ops; +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) #define arch_ftrace_get_regs(regs) NULL @@ -63,7 +65,7 @@ struct ftrace_ops; * Note: sizeof(struct ftrace_regs) must be a multiple of 16 to ensure correct * stack alignment */ -struct ftrace_regs { +struct __arch_ftrace_regs { /* x0 - x8 */ unsigned long regs[9]; @@ -83,47 +85,47 @@ struct ftrace_regs { static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { - return fregs->pc; + return arch_ftrace_regs(fregs)->pc; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long pc) { - fregs->pc = pc; + arch_ftrace_regs(fregs)->pc = pc; } static __always_inline unsigned long ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs) { - return fregs->sp; + return arch_ftrace_regs(fregs)->sp; } static __always_inline unsigned long ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n) { if (n < 8) - return fregs->regs[n]; + return arch_ftrace_regs(fregs)->regs[n]; return 0; } static __always_inline unsigned long ftrace_regs_get_return_value(const struct ftrace_regs *fregs) { - return fregs->regs[0]; + return arch_ftrace_regs(fregs)->regs[0]; } static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs, unsigned long ret) { - fregs->regs[0] = ret; + arch_ftrace_regs(fregs)->regs[0] = ret; } static __always_inline void ftrace_override_function_with_return(struct ftrace_regs *fregs) { - fregs->pc = fregs->lr; + arch_ftrace_regs(fregs)->pc = arch_ftrace_regs(fregs)->lr; } int ftrace_regs_query_register_offset(const char *name); @@ -143,7 +145,7 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, * The ftrace trampoline will return to this address instead of the * instrumented function. */ - fregs->direct_tramp = addr; + arch_ftrace_regs(fregs)->direct_tramp = addr; } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 27de1dddb0ab..a5de57f68219 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -84,19 +84,19 @@ int main(void) DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS - DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0])); - DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2])); - DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4])); - DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6])); - DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8])); - DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp)); - DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr)); - DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp)); - DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc)); + DEFINE(FREGS_X0, offsetof(struct __arch_ftrace_regs, regs[0])); + DEFINE(FREGS_X2, offsetof(struct __arch_ftrace_regs, regs[2])); + DEFINE(FREGS_X4, offsetof(struct __arch_ftrace_regs, regs[4])); + DEFINE(FREGS_X6, offsetof(struct __arch_ftrace_regs, regs[6])); + DEFINE(FREGS_X8, offsetof(struct __arch_ftrace_regs, regs[8])); + DEFINE(FREGS_FP, offsetof(struct __arch_ftrace_regs, fp)); + DEFINE(FREGS_LR, offsetof(struct __arch_ftrace_regs, lr)); + DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); + DEFINE(FREGS_PC, offsetof(struct __arch_ftrace_regs, pc)); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct ftrace_regs, direct_tramp)); + DEFINE(FREGS_DIRECT_TRAMP, offsetof(struct __arch_ftrace_regs, direct_tramp)); #endif - DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs)); + DEFINE(FREGS_SIZE, sizeof(struct __arch_ftrace_regs)); BLANK(); #endif #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index a650f5e11fc5..b2d947175cbe 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -23,10 +23,10 @@ struct fregs_offset { int offset; }; -#define FREGS_OFFSET(n, field) \ -{ \ - .name = n, \ - .offset = offsetof(struct ftrace_regs, field), \ +#define FREGS_OFFSET(n, field) \ +{ \ + .name = n, \ + .offset = offsetof(struct __arch_ftrace_regs, field), \ } static const struct fregs_offset fregs_offsets[] = { @@ -481,7 +481,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - prepare_ftrace_return(ip, &fregs->lr, fregs->fp); + prepare_ftrace_return(ip, &arch_ftrace_regs(fregs)->lr, arch_ftrace_regs(fregs)->fp); } #else /* diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index c0a682808e07..0e15d36ce251 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -43,38 +43,40 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent); #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_ops; +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) -struct ftrace_regs { +struct __arch_ftrace_regs { struct pt_regs regs; }; static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { - return &fregs->regs; + return &arch_ftrace_regs(fregs)->regs; } static __always_inline unsigned long ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) { - return instruction_pointer(&fregs->regs); + return instruction_pointer(&arch_ftrace_regs(fregs)->regs); } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - instruction_pointer_set(&fregs->regs, ip); + instruction_pointer_set(&arch_ftrace_regs(fregs)->regs, ip); } #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) @@ -90,7 +92,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) } #define arch_ftrace_set_direct_caller(fregs, addr) \ - __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) + __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #endif diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c index bff058317062..18056229e22e 100644 --- a/arch/loongarch/kernel/ftrace_dyn.c +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -241,7 +241,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; unsigned long *parent = (unsigned long *)®s->regs[1]; prepare_ftrace_return(ip, (unsigned long *)parent); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 559560286e6d..e299fd47d201 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -32,39 +32,42 @@ struct dyn_arch_ftrace { int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -struct ftrace_regs { +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct __arch_ftrace_regs { struct pt_regs regs; }; static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { /* We clear regs.msr in ftrace_call */ - return fregs->regs.msr ? &fregs->regs : NULL; + return arch_ftrace_regs(fregs)->regs.msr ? &arch_ftrace_regs(fregs)->regs : NULL; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - regs_set_return_ip(&fregs->regs, ip); + regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip); } static __always_inline unsigned long ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) { - return instruction_pointer(&fregs->regs); + return instruction_pointer(&arch_ftrace_regs(fregs)->regs); } #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d8d6b4fd9a14..df41f4a7c738 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -421,7 +421,7 @@ int __init ftrace_dyn_arch_init(void) void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - unsigned long sp = fregs->regs.gpr[1]; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1]; int bit; if (unlikely(ftrace_graph_is_dead())) @@ -439,6 +439,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, ftrace_test_recursion_unlock(bit); out: - fregs->regs.link = parent_ip; + arch_ftrace_regs(fregs)->regs.link = parent_ip; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 12fab1803bcf..d3c5552e4984 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -829,7 +829,7 @@ out: void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); + arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, arch_ftrace_regs(fregs)->regs.gpr[1]); } #else unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 2cddd79ff21b..c6bcdff105b5 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -126,7 +126,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define arch_ftrace_get_regs(regs) NULL struct ftrace_ops; -struct ftrace_regs { +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct __arch_ftrace_regs { unsigned long epc; unsigned long ra; unsigned long sp; @@ -150,42 +153,42 @@ struct ftrace_regs { static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { - return fregs->epc; + return arch_ftrace_regs(fregs)->epc; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long pc) { - fregs->epc = pc; + arch_ftrace_regs(fregs)->epc = pc; } static __always_inline unsigned long ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs) { - return fregs->sp; + return arch_ftrace_regs(fregs)->sp; } static __always_inline unsigned long ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n) { if (n < 8) - return fregs->args[n]; + return arch_ftrace_regs(fregs)->args[n]; return 0; } static __always_inline unsigned long ftrace_regs_get_return_value(const struct ftrace_regs *fregs) { - return fregs->a0; + return arch_ftrace_regs(fregs)->a0; } static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs, unsigned long ret) { - fregs->a0 = ret; + arch_ftrace_regs(fregs)->a0 = ret; } static __always_inline void ftrace_override_function_with_return(struct ftrace_regs *fregs) { - fregs->epc = fregs->ra; + arch_ftrace_regs(fregs)->epc = arch_ftrace_regs(fregs)->ra; } int ftrace_regs_query_register_offset(const char *name); @@ -196,7 +199,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { - fregs->t1 = addr; + arch_ftrace_regs(fregs)->t1 = addr; } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index e94180ba432f..f6f5a277ba9d 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -498,19 +498,19 @@ void asm_offsets(void) OFFSET(STACKFRAME_RA, stackframe, ra); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS - DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct ftrace_regs), STACK_ALIGN)); - DEFINE(FREGS_EPC, offsetof(struct ftrace_regs, epc)); - DEFINE(FREGS_RA, offsetof(struct ftrace_regs, ra)); - DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp)); - DEFINE(FREGS_S0, offsetof(struct ftrace_regs, s0)); - DEFINE(FREGS_T1, offsetof(struct ftrace_regs, t1)); - DEFINE(FREGS_A0, offsetof(struct ftrace_regs, a0)); - DEFINE(FREGS_A1, offsetof(struct ftrace_regs, a1)); - DEFINE(FREGS_A2, offsetof(struct ftrace_regs, a2)); - DEFINE(FREGS_A3, offsetof(struct ftrace_regs, a3)); - DEFINE(FREGS_A4, offsetof(struct ftrace_regs, a4)); - DEFINE(FREGS_A5, offsetof(struct ftrace_regs, a5)); - DEFINE(FREGS_A6, offsetof(struct ftrace_regs, a6)); - DEFINE(FREGS_A7, offsetof(struct ftrace_regs, a7)); + DEFINE(FREGS_SIZE_ON_STACK, ALIGN(sizeof(struct __arch_ftrace_regs), STACK_ALIGN)); + DEFINE(FREGS_EPC, offsetof(struct __arch_ftrace_regs, epc)); + DEFINE(FREGS_RA, offsetof(struct __arch_ftrace_regs, ra)); + DEFINE(FREGS_SP, offsetof(struct __arch_ftrace_regs, sp)); + DEFINE(FREGS_S0, offsetof(struct __arch_ftrace_regs, s0)); + DEFINE(FREGS_T1, offsetof(struct __arch_ftrace_regs, t1)); + DEFINE(FREGS_A0, offsetof(struct __arch_ftrace_regs, a0)); + DEFINE(FREGS_A1, offsetof(struct __arch_ftrace_regs, a1)); + DEFINE(FREGS_A2, offsetof(struct __arch_ftrace_regs, a2)); + DEFINE(FREGS_A3, offsetof(struct __arch_ftrace_regs, a3)); + DEFINE(FREGS_A4, offsetof(struct __arch_ftrace_regs, a4)); + DEFINE(FREGS_A5, offsetof(struct __arch_ftrace_regs, a5)); + DEFINE(FREGS_A6, offsetof(struct __arch_ftrace_regs, a6)); + DEFINE(FREGS_A7, offsetof(struct __arch_ftrace_regs, a7)); #endif } diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 4b95c574fd04..5081ad886841 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -214,7 +214,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - prepare_ftrace_return(&fregs->ra, ip, fregs->s0); + prepare_ftrace_return(&arch_ftrace_regs(fregs)->ra, ip, arch_ftrace_regs(fregs)->s0); } #else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */ extern void ftrace_graph_call(void); diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 406746666eb7..1498d0a9c762 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -51,13 +51,16 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } -struct ftrace_regs { +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct __arch_ftrace_regs { struct pt_regs regs; }; static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; if (test_pt_regs_flag(regs, PIF_FTRACE_FULL_REGS)) return regs; @@ -84,26 +87,26 @@ static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { - return fregs->regs.psw.addr; + return arch_ftrace_regs(fregs)->regs.psw.addr; } static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { - fregs->regs.psw.addr = ip; + arch_ftrace_regs(fregs)->regs.psw.addr = ip; } #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) @@ -117,7 +120,7 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, */ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; regs->orig_gpr2 = addr; } #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 5529248d84fb..db9659980175 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -184,8 +184,8 @@ int main(void) OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); #endif - OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); - DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); + OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs); + DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs)); OFFSET(__PCPU_FLAGS, pcpu, flags); return 0; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 0b6e62d1d8b8..51439a71e392 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -318,7 +318,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - kmsan_unpoison_memory(fregs, sizeof(*fregs)); + kmsan_unpoison_memory(fregs, ftrace_regs_size()); regs = ftrace_get_regs(fregs); p = get_kprobe((kprobe_opcode_t *)ip); if (!regs || unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 8b7f981e6f34..6e42100875e7 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -270,9 +270,9 @@ static void notrace __used test_unwind_ftrace_handler(unsigned long ip, struct ftrace_ops *fops, struct ftrace_regs *fregs) { - struct unwindme *u = (struct unwindme *)fregs->regs.gprs[2]; + struct unwindme *u = (struct unwindme *)arch_ftrace_regs(fregs)->regs.gprs[2]; - u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &fregs->regs : NULL, + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? &arch_ftrace_regs(fregs)->regs : NULL, (u->flags & UWM_SP) ? u->sp : 0); } diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 0152a81d9b4a..87943f7a299b 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -33,7 +33,10 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS -struct ftrace_regs { +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct __arch_ftrace_regs { struct pt_regs regs; }; @@ -41,27 +44,27 @@ static __always_inline struct pt_regs * arch_ftrace_get_regs(struct ftrace_regs *fregs) { /* Only when FL_SAVE_REGS is set, cs will be non zero */ - if (!fregs->regs.cs) + if (!arch_ftrace_regs(fregs)->regs.cs) return NULL; - return &fregs->regs; + return &arch_ftrace_regs(fregs)->regs; } #define ftrace_regs_set_instruction_pointer(fregs, _ip) \ - do { (fregs)->regs.ip = (_ip); } while (0) + do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0) #define ftrace_regs_get_instruction_pointer(fregs) \ - ((fregs)->regs.ip) + arch_ftrace_regs(fregs)->regs.ip) #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&(fregs)->regs, n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&(fregs)->regs) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(&(fregs)->regs) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&(fregs)->regs, ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&(fregs)->regs) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) @@ -88,7 +91,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) regs->orig_ax = addr; } #define arch_ftrace_set_direct_caller(fregs, addr) \ - __arch_ftrace_set_direct_caller(&(fregs)->regs, addr) + __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr) #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8da0e66ca22d..adb09f78edb2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -647,7 +647,7 @@ void prepare_ftrace_return(unsigned long ip, unsigned long *parent, void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - struct pt_regs *regs = &fregs->regs; + struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs; unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs); prepare_ftrace_return(ip, (unsigned long *)stack, 0); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4c7dd5e58c9f..66f10291a0b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -115,8 +115,6 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val extern int ftrace_enabled; -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS - /** * ftrace_regs - ftrace partial/optimal register set * @@ -142,11 +140,28 @@ extern int ftrace_enabled; * * NOTE: user *must not* access regs directly, only do it via APIs, because * the member can be changed according to the architecture. + * This is why the structure is empty here, so that nothing accesses + * the ftrace_regs directly. */ struct ftrace_regs { + /* Nothing to see here, use the accessor functions! */ +}; + +#define ftrace_regs_size() sizeof(struct __arch_ftrace_regs) + +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + +struct __arch_ftrace_regs { struct pt_regs regs; }; -#define arch_ftrace_get_regs(fregs) (&(fregs)->regs) + +struct ftrace_regs; +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &arch_ftrace_regs(fregs)->regs; +} /* * ftrace_regs_set_instruction_pointer() is to be defined by the architecture diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cae388122ca8..e9fd4fb2769e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7943,7 +7943,7 @@ out: void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - kmsan_unpoison_memory(fregs, sizeof(*fregs)); + kmsan_unpoison_memory(fregs, ftrace_regs_size()); __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } #else From e4cf33ca48128d580e25ebe779b7ba7b4b4cf733 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Oct 2024 20:21:14 -0400 Subject: [PATCH 11/22] ftrace: Consolidate ftrace_regs accessor functions for archs using pt_regs Most architectures use pt_regs within ftrace_regs making a lot of the accessor functions just calls to the pt_regs internally. Instead of duplication this effort, use a HAVE_ARCH_FTRACE_REGS for architectures that have their own ftrace_regs that is not based on pt_regs and will define all the accessor functions, and for the architectures that just use pt_regs, it will leave it undefined, and the default accessor functions will be used. Note, this will also make it easier to add new accessor functions to ftrace_regs as it will mean having to touch less architectures. Cc: Cc: "x86@kernel.org" Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Will Deacon Cc: Huacai Chen Cc: WANG Xuerui Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Naveen N Rao Cc: Madhavan Srinivasan Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Link: https://lore.kernel.org/20241010202114.2289f6fd@gandalf.local.home Acked-by: Masami Hiramatsu (Google) Acked-by: Heiko Carstens # s390 Acked-by: Catalin Marinas Acked-by: Michael Ellerman # powerpc Suggested-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- arch/arm64/include/asm/ftrace.h | 1 + arch/loongarch/include/asm/ftrace.h | 25 +------------------- arch/powerpc/include/asm/ftrace.h | 26 +-------------------- arch/riscv/include/asm/ftrace.h | 1 + arch/s390/include/asm/ftrace.h | 26 +-------------------- arch/x86/include/asm/ftrace.h | 21 +---------------- include/linux/ftrace.h | 32 ++++++------------------- include/linux/ftrace_regs.h | 36 +++++++++++++++++++++++++++++ 8 files changed, 49 insertions(+), 119 deletions(-) create mode 100644 include/linux/ftrace_regs.h diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index bbb69c7751b9..5ccff4de7f09 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -54,6 +54,7 @@ extern void return_to_handler(void); unsigned long ftrace_call_adjust(unsigned long addr); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +#define HAVE_ARCH_FTRACE_REGS struct dyn_ftrace; struct ftrace_ops; struct ftrace_regs; diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 0e15d36ce251..8f13eaeaa325 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -43,43 +43,20 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent); #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_ops; -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) -struct __arch_ftrace_regs { - struct pt_regs regs; -}; +#include static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { return &arch_ftrace_regs(fregs)->regs; } -static __always_inline unsigned long -ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) -{ - return instruction_pointer(&arch_ftrace_regs(fregs)->regs); -} - static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) { instruction_pointer_set(&arch_ftrace_regs(fregs)->regs, ip); } -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) - #define ftrace_graph_func ftrace_graph_func void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index e299fd47d201..0edfb874eb02 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -32,12 +32,7 @@ struct dyn_arch_ftrace { int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #define ftrace_init_nop ftrace_init_nop -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) - -struct __arch_ftrace_regs { - struct pt_regs regs; -}; +#include static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { @@ -52,25 +47,6 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip); } -static __always_inline unsigned long -ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs) -{ - return instruction_pointer(&arch_ftrace_regs(fregs)->regs); -} - -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) - struct ftrace_ops; #define ftrace_graph_func ftrace_graph_func diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index c6bcdff105b5..3d66437a1029 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -125,6 +125,7 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define arch_ftrace_get_regs(regs) NULL +#define HAVE_ARCH_FTRACE_REGS struct ftrace_ops; struct ftrace_regs; #define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 1498d0a9c762..fc97d75dc752 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -51,12 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) - -struct __arch_ftrace_regs { - struct pt_regs regs; -}; +#include static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { @@ -84,12 +79,6 @@ static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -static __always_inline unsigned long -ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) -{ - return arch_ftrace_regs(fregs)->regs.psw.addr; -} - static __always_inline void ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip) @@ -97,19 +86,6 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, arch_ftrace_regs(fregs)->regs.psw.addr = ip; } -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* * When an ftrace registered caller is tracing a function that is diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 87943f7a299b..8f02d28c571a 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -33,12 +33,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) -struct __arch_ftrace_regs { - struct pt_regs regs; -}; +#include static __always_inline struct pt_regs * arch_ftrace_get_regs(struct ftrace_regs *fregs) @@ -52,21 +48,6 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) #define ftrace_regs_set_instruction_pointer(fregs, _ip) \ do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0) -#define ftrace_regs_get_instruction_pointer(fregs) \ - arch_ftrace_regs(fregs)->regs.ip) - -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) struct ftrace_ops; #define ftrace_graph_func ftrace_graph_func diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 66f10291a0b2..aa9ddd1e4bb6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -113,6 +113,8 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val #ifdef CONFIG_FUNCTION_TRACER +#include + extern int ftrace_enabled; /** @@ -150,14 +152,11 @@ struct ftrace_regs { #define ftrace_regs_size() sizeof(struct __arch_ftrace_regs) #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS - -struct __arch_ftrace_regs { - struct pt_regs regs; -}; - -struct ftrace_regs; -#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) - +/* + * Architectures that define HAVE_DYNAMIC_FTRACE_WITH_ARGS must define their own + * arch_ftrace_get_regs() where it only returns pt_regs *if* it is fully + * populated. It should return NULL otherwise. + */ static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) { return &arch_ftrace_regs(fregs)->regs; @@ -191,23 +190,6 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) return ftrace_get_regs(fregs) != NULL; } -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS -#define ftrace_regs_get_instruction_pointer(fregs) \ - instruction_pointer(ftrace_get_regs(fregs)) -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(ftrace_get_regs(fregs), n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(ftrace_get_regs(fregs)) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(ftrace_get_regs(fregs)) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(ftrace_get_regs(fregs), ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(ftrace_get_regs(fregs)) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) -#endif - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h new file mode 100644 index 000000000000..dea6a0851b74 --- /dev/null +++ b/include/linux/ftrace_regs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FTRACE_REGS_H +#define _LINUX_FTRACE_REGS_H + +/* + * For archs that just copy pt_regs in ftrace regs, it can use this default. + * If an architecture does not use pt_regs, it must define all the below + * accessor functions. + */ +#ifndef HAVE_ARCH_FTRACE_REGS +struct __arch_ftrace_regs { + struct pt_regs regs; +}; + +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct ftrace_regs; + +#define ftrace_regs_get_instruction_pointer(fregs) \ + instruction_pointer(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(arch_ftrace_get_regs(fregs), n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(arch_ftrace_get_regs(fregs), ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(arch_ftrace_get_regs(fregs)) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) + +#endif /* HAVE_ARCH_FTRACE_REGS */ + +#endif /* _LINUX_FTRACE_REGS_H */ From 0b582611a8f4270fa357a22a546909b2dd5fc5fe Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 15 Oct 2024 10:28:43 +0900 Subject: [PATCH 12/22] ftrace: Use arch_ftrace_regs() for ftrace_regs_*() macros Since the arch_ftrace_get_regs(fregs) is only valid when the FL_SAVE_REGS is set, we need to use `&arch_ftrace_regs()->regs` for ftrace_regs_*() APIs because those APIs are for ftrace_regs, not complete pt_regs. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Alan Maguire Cc: Mark Rutland Link: https://lore.kernel.org/172895572290.107311.16057631001860177198.stgit@devnote2 Fixes: e4cf33ca4812 ("ftrace: Consolidate ftrace_regs accessor functions for archs using pt_regs") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace_regs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index dea6a0851b74..b78a0a60515b 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -17,17 +17,17 @@ struct __arch_ftrace_regs { struct ftrace_regs; #define ftrace_regs_get_instruction_pointer(fregs) \ - instruction_pointer(arch_ftrace_get_regs(fregs)) + instruction_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(arch_ftrace_get_regs(fregs), n) + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(arch_ftrace_get_regs(fregs)) + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_return_value(fregs) \ - regs_return_value(arch_ftrace_get_regs(fregs)) + regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(arch_ftrace_get_regs(fregs), ret) + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) #define ftrace_override_function_with_return(fregs) \ - override_function_with_return(arch_ftrace_get_regs(fregs)) + override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) From 8684f2f37d65eeb30b6f704750c691b6e697854b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Oct 2024 13:20:42 -0400 Subject: [PATCH 13/22] selftests/ftrace: Fix check of return value in fgraph-retval.tc test The addition of recording both the function name and return address to the function graph tracer updated the selftest to check for "=-5" from "= -5". But this causes the test to fail on certain configs, as "= -5" is still a value that can be returned if function addresses are not enabled (older kernels). Check for both "=-5" and " -5" as a success value. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Shuah Khan Cc: Donglin Peng Link: https://lore.kernel.org/20241011132042.435f43cc@gandalf.local.home Fixes: 21e92806d39c6 ("function_graph: Support recording and printing the function return address") Signed-off-by: Steven Rostedt (Google) --- tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc index e8e46378b88d..4307d4eef417 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc @@ -29,7 +29,7 @@ set -e : "Test printing the error code in signed decimal format" echo 0 > options/funcgraph-retval-hex -count=`cat trace | grep 'proc_reg_write' | grep '=-5' | wc -l` +count=`cat trace | grep 'proc_reg_write' | grep -e '=-5 ' -e '= -5 ' | wc -l` if [ $count -eq 0 ]; then fail "Return value can not be printed in signed decimal format" fi From 2d17932da44fdc1ba835ad05110ab996d2912dbf Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 15 Oct 2024 10:28:53 +0900 Subject: [PATCH 14/22] ftrace: Rename ftrace_regs_return_value to ftrace_regs_get_return_value Rename ftrace_regs_return_value to ftrace_regs_get_return_value as same as other ftrace_regs_get/set_* APIs. arm64 and riscv are already using this new name. Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Alan Maguire Link: https://lore.kernel.org/172895573350.107311.7564634260652361511.stgit@devnote2 Acked-by: Mark Rutland Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index b78a0a60515b..be1ed0c891d0 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -22,7 +22,7 @@ struct ftrace_regs; regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) #define ftrace_regs_get_stack_pointer(fregs) \ kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) -#define ftrace_regs_return_value(fregs) \ +#define ftrace_regs_get_return_value(fregs) \ regs_return_value(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_set_return_value(fregs, ret) \ regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) From 6ea8b69da6bf1b8a0e4683f37a006a2e2f4fc943 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 19 Oct 2024 15:29:51 -0400 Subject: [PATCH 15/22] fgraph: Separate size of ret_stack from PAGE_SIZE The ret_stack (shadow stack used by function graph infrastructure) is currently defined as PAGE_SIZE. But some architectures which have 64K PAGE_SIZE, this is way overkill. Also there's an effort to allow the PAGE_SIZE to be defined at boot up. Hard code it for now to 4096. In the future, this size may change and even be dependent on specific architectures. Link: https://lore.kernel.org/all/e5067bb8-0fcd-4739-9bca-0e872037d5a1@arm.com/ Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20241019152951.053f9646@rorschach.local.home Suggested-by: Ryan Roberts Reviewed-by: Ryan Roberts Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 13fcc25d15a0..4ce87982966a 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -153,7 +153,7 @@ enum { * SHADOW_STACK_OFFSET: The size in long words of the shadow stack * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added */ -#define SHADOW_STACK_SIZE (PAGE_SIZE) +#define SHADOW_STACK_SIZE (4096) #define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long)) /* Leave on a buffer at the end */ #define SHADOW_STACK_MAX_OFFSET \ From 434098485bfc8f5336d5164adea0a9d102a0a09a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 26 Oct 2024 06:32:10 -0400 Subject: [PATCH 16/22] fgraph: Give ret_stack its own kmem cache The ret_stack (shadow stack used by function graph infrastructure) is created for every task on the system when function graph is enabled. Give it its own kmem_cache. This will make it easier to see how much memory is being used specifically for function graph shadow stacks. In the future, this size may change and may not be a power of two. Having its own cache can also keep it from fragmenting memory. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Ryan Roberts Link: https://lore.kernel.org/20241026063210.7d4910a7@rorschach.local.home Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 4ce87982966a..001abf376c0c 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -172,6 +172,8 @@ enum { DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; +static struct kmem_cache *fgraph_stack_cachep; + static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE]; static unsigned long fgraph_array_bitmask; @@ -1022,8 +1024,11 @@ static int alloc_retstack_tasklist(unsigned long **ret_stack_list) int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; struct task_struct *g, *t; + if (WARN_ON_ONCE(!fgraph_stack_cachep)) + return -ENOMEM; + for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); + ret_stack_list[i] = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL); if (!ret_stack_list[i]) { start = 0; end = i; @@ -1054,7 +1059,7 @@ unlock: rcu_read_unlock(); free: for (i = start; i < end; i++) - kfree(ret_stack_list[i]); + kmem_cache_free(fgraph_stack_cachep, ret_stack_list[i]); return ret; } @@ -1117,9 +1122,12 @@ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) if (ftrace_graph_active) { unsigned long *ret_stack; + if (WARN_ON_ONCE(!fgraph_stack_cachep)) + return; + ret_stack = per_cpu(idle_ret_stack, cpu); if (!ret_stack) { - ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); + ret_stack = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL); if (!ret_stack) return; per_cpu(idle_ret_stack, cpu) = ret_stack; @@ -1139,7 +1147,10 @@ void ftrace_graph_init_task(struct task_struct *t) if (ftrace_graph_active) { unsigned long *ret_stack; - ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); + if (WARN_ON_ONCE(!fgraph_stack_cachep)) + return; + + ret_stack = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL); if (!ret_stack) return; graph_init_task(t, ret_stack); @@ -1154,7 +1165,11 @@ void ftrace_graph_exit_task(struct task_struct *t) /* NULL must become visible to IRQs before we free it: */ barrier(); - kfree(ret_stack); + if (ret_stack) { + if (WARN_ON_ONCE(!fgraph_stack_cachep)) + return; + kmem_cache_free(fgraph_stack_cachep, ret_stack); + } } #ifdef CONFIG_DYNAMIC_FTRACE @@ -1294,6 +1309,14 @@ int register_ftrace_graph(struct fgraph_ops *gops) guard(mutex)(&ftrace_lock); + if (!fgraph_stack_cachep) { + fgraph_stack_cachep = kmem_cache_create("fgraph_stack", + SHADOW_STACK_SIZE, + SHADOW_STACK_SIZE, 0, NULL); + if (!fgraph_stack_cachep) + return -ENOMEM; + } + if (!fgraph_initialized) { ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph:online", fgraph_cpu_init, NULL); From 6348a3fa723a4fa2e5651b0b93fbcddd5293e92b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 28 Oct 2024 03:12:29 -0400 Subject: [PATCH 17/22] fgraph: Use guard(mutex)(&ftrace_lock) for unregister_ftrace_graph() The ftrace_lock is held throughout unregister_ftrace_graph(), use a guard to simplify the error paths. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Link: https://lore.kernel.org/20241028071307.770550792@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 001abf376c0c..0bf78517b5d4 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1381,17 +1381,17 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) { int command = 0; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); if (unlikely(!ftrace_graph_active)) - goto out; + return; if (unlikely(gops->idx < 0 || gops->idx >= FGRAPH_ARRAY_SIZE || fgraph_array[gops->idx] != gops)) - goto out; + return; if (fgraph_lru_release_index(gops->idx) < 0) - goto out; + return; fgraph_array[gops->idx] = &fgraph_stub; @@ -1413,7 +1413,5 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); } - out: gops->saved_func = NULL; - mutex_unlock(&ftrace_lock); } From 06294cf04500317c50b5b4c4c29440123cd15d48 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 28 Oct 2024 03:12:30 -0400 Subject: [PATCH 18/22] ftrace: Use guard for match_records() The ftrace_lock is held for most of match_records() until the end of the function. Use guard to make error paths simpler. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Link: https://lore.kernel.org/20241028071307.927146604@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e9fd4fb2769e..44adc34643c9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4829,15 +4829,13 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) mod_g.len = strlen(mod_g.search); } - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); if (unlikely(ftrace_disabled)) - goto out_unlock; + return 0; - if (func_g.type == MATCH_INDEX) { - found = add_rec_by_index(hash, &func_g, clear_filter); - goto out_unlock; - } + if (func_g.type == MATCH_INDEX) + return add_rec_by_index(hash, &func_g, clear_filter); do_for_each_ftrace_rec(pg, rec) { @@ -4846,16 +4844,12 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ret = enter_record(hash, rec, clear_filter); - if (ret < 0) { - found = ret; - goto out_unlock; - } + if (ret < 0) + return ret; found = 1; } cond_resched(); } while_for_each_ftrace_rec(); - out_unlock: - mutex_unlock(&ftrace_lock); return found; } From 1432afb50de460e56ad90938a7bb42b6128790e6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 28 Oct 2024 03:12:31 -0400 Subject: [PATCH 19/22] ftrace: Use guard to lock ftrace_lock in cache_mod() The ftrace_lock is held throughout cache_mod(), use guard to simplify the error paths. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Link: https://lore.kernel.org/20241028071308.088458856@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 44adc34643c9..64997416415e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4947,14 +4947,14 @@ static int cache_mod(struct trace_array *tr, { struct ftrace_mod_load *ftrace_mod, *n; struct list_head *head = enable ? &tr->mod_trace : &tr->mod_notrace; - int ret; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); /* We do not cache inverse filters */ if (func[0] == '!') { + int ret = -EINVAL; + func++; - ret = -EINVAL; /* Look to remove this hash */ list_for_each_entry_safe(ftrace_mod, n, head, list) { @@ -4970,20 +4970,15 @@ static int cache_mod(struct trace_array *tr, continue; } } - goto out; + return ret; } - ret = -EINVAL; /* We only care about modules that have not been loaded yet */ if (module_exists(module)) - goto out; + return -EINVAL; /* Save this string off, and execute it when the module is loaded */ - ret = ftrace_add_mod(tr, func, module, enable); - out: - mutex_unlock(&ftrace_lock); - - return ret; + return ftrace_add_mod(tr, func, module, enable); } static int From 9687bbf219549fc93f2364c78aa91fd9ffc9eca0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 28 Oct 2024 03:12:32 -0400 Subject: [PATCH 20/22] ftrace: Use guard to take the ftrace_lock in release_probe() The ftrace_lock is held throughout the entire release_probe() function. Use guard to simplify any exit paths. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Link: https://lore.kernel.org/20241028071308.250787901@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 64997416415e..c0fabd7da5b2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5288,7 +5288,7 @@ static void release_probe(struct ftrace_func_probe *probe) { struct ftrace_probe_ops *probe_ops; - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); WARN_ON(probe->ref <= 0); @@ -5306,7 +5306,6 @@ static void release_probe(struct ftrace_func_probe *probe) list_del(&probe->list); kfree(probe); } - mutex_unlock(&ftrace_lock); } static void acquire_probe_locked(struct ftrace_func_probe *probe) From 8b0cb3a4c5e85dda8957ba6a4c8c081a9aec6e80 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 28 Oct 2024 03:12:33 -0400 Subject: [PATCH 21/22] ftrace: Use guard to take ftrace_lock in ftrace_graph_set_hash() The ftrace_lock is taken for most of the ftrace_graph_set_hash() function throughout the end. Use guard to take the ftrace_lock to simplify the exit paths. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Link: https://lore.kernel.org/20241028071308.406073025@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c0fabd7da5b2..b4ef469f4fd2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6816,12 +6816,10 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) func_g.len = strlen(func_g.search); - mutex_lock(&ftrace_lock); + guard(mutex)(&ftrace_lock); - if (unlikely(ftrace_disabled)) { - mutex_unlock(&ftrace_lock); + if (unlikely(ftrace_disabled)) return -ENODEV; - } do_for_each_ftrace_rec(pg, rec) { @@ -6837,7 +6835,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) if (entry) continue; if (add_hash_entry(hash, rec->ip) == NULL) - goto out; + return 0; } else { if (entry) { free_hash_entry(hash, entry); @@ -6846,13 +6844,8 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) } } } while_for_each_ftrace_rec(); -out: - mutex_unlock(&ftrace_lock); - if (fail) - return -EINVAL; - - return 0; + return fail ? -EINVAL : 0; } static ssize_t From 36a367b8912a3aac023d9e35c815f7b1e609f4a3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Oct 2024 11:31:05 -0400 Subject: [PATCH 22/22] ftrace: Show timings of how long nop patching took Since the beginning of ftrace, the code that did the patching had its timings saved on how long it took to complete. But this information was never exposed. It was used for debugging and exposing it was always something that was on the TODO list. Now it's time to expose it. There's even a file that is where it should go! Also include how long patching modules took as a separate value. # cat /sys/kernel/tracing/dyn_ftrace_total_info 57680 pages:231 groups: 9 ftrace boot update time = 14024666 (ns) ftrace module total update time = 126070 (ns) Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Mike Rapoport Cc: Peter Zijlstra Link: https://lore.kernel.org/20241017113105.1edfa943@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 11 ++++++++--- kernel/trace/trace.c | 15 +++++++++++---- kernel/trace/trace.h | 2 ++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b4ef469f4fd2..71e53eaba8bc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3686,7 +3686,8 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, } -static u64 ftrace_update_time; +u64 ftrace_update_time; +u64 ftrace_total_mod_time; unsigned long ftrace_update_tot_cnt; unsigned long ftrace_number_of_pages; unsigned long ftrace_number_of_groups; @@ -3706,7 +3707,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) bool init_nop = ftrace_need_init_nop(); struct ftrace_page *pg; struct dyn_ftrace *p; - u64 start, stop; + u64 start, stop, update_time; unsigned long update_cnt = 0; unsigned long rec_flags = 0; int i; @@ -3750,7 +3751,11 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) } stop = ftrace_now(raw_smp_processor_id()); - ftrace_update_time = stop - start; + update_time = stop - start; + if (mod) + ftrace_total_mod_time += update_time; + else + ftrace_update_time = update_time; ftrace_update_tot_cnt += update_cnt; return 0; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c01375adc471..405dcf498159 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8548,15 +8548,22 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf, char *buf; int r; - /* 256 should be plenty to hold the amount needed */ - buf = kmalloc(256, GFP_KERNEL); + /* 512 should be plenty to hold the amount needed */ +#define DYN_INFO_BUF_SIZE 512 + + buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; - r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n", + r = scnprintf(buf, DYN_INFO_BUF_SIZE, + "%ld pages:%ld groups: %ld\n" + "ftrace boot update time = %llu (ns)\n" + "ftrace module total update time = %llu (ns)\n", ftrace_update_tot_cnt, ftrace_number_of_pages, - ftrace_number_of_groups); + ftrace_number_of_groups, + ftrace_update_time, + ftrace_total_mod_time); ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); kfree(buf); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6adf48ef4312..3307dad4d917 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -775,6 +775,8 @@ extern void trace_event_follow_fork(struct trace_array *tr, bool enable); extern unsigned long ftrace_update_tot_cnt; extern unsigned long ftrace_number_of_pages; extern unsigned long ftrace_number_of_groups; +extern u64 ftrace_update_time; +extern u64 ftrace_total_mod_time; void ftrace_init_trace_array(struct trace_array *tr); #else static inline void ftrace_init_trace_array(struct trace_array *tr) { }