Various fixes for tracing:

- Fix tp_printk command line option crashing the kernel
 
   With the code that can handle a buffer from a previous boot, the
   trace_check_vprintf() needed access to the delta of the address
   space used by the old buffer and the current buffer. To do so,
   the trace_array (tr) parameter was used. But when tp_printk is
   enabled on the kernel command line, no trace buffer is used and
   the trace event is sent directly to printk(). That meant the tr
   field of the iterator descriptor was NULL, and since tp_printk still
   uses trace_check_vprintf() it caused a NULL dereference.
 
 - Add ptrace.h include to x86 ftrace file for completeness
 
 - Fix rtla installation when done with out-of-tree build
 
 - Fix the help messages in rtla that were incorrect
 
 - Several fixes to fix races with the timerlat and hwlat code
 
   Several locking issues were discovered with the coordination
   between timerlat kthread creation and hotplug. As timerlat has
   callbacks from hotplug code to start kthreads when CPUs come online.
   There are also locking issues with grabbing the cpu_read_lock()
   and the locks within timerlat.
 -----BEGIN PGP SIGNATURE-----
 
 iIoEABYIADIWIQRRSw7ePDh/lE+zeZMp5XQQmuv6qgUCZwAUghQccm9zdGVkdEBn
 b29kbWlzLm9yZwAKCRAp5XQQmuv6qmMCAP9Z+0sN/Tx+F+5LmrXV1R9UxRPykmpm
 4NeZYEp+hAQSegD+MdHsEJLIDfmsZnGOBivmzfepuv35GMLrqQMIhhWQOA0=
 =/OQq
 -----END PGP SIGNATURE-----

Merge tag 'trace-v6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing fixes from Steven Rostedt:

 - Fix tp_printk command line option crashing the kernel

   With the code that can handle a buffer from a previous boot, the
   trace_check_vprintf() needed access to the delta of the address space
   used by the old buffer and the current buffer. To do so, the
   trace_array (tr) parameter was used. But when tp_printk is enabled on
   the kernel command line, no trace buffer is used and the trace event
   is sent directly to printk(). That meant the tr field of the iterator
   descriptor was NULL, and since tp_printk still uses
   trace_check_vprintf() it caused a NULL dereference.

 - Add ptrace.h include to x86 ftrace file for completeness

 - Fix rtla installation when done with out-of-tree build

 - Fix the help messages in rtla that were incorrect

 - Several fixes to fix races with the timerlat and hwlat code

   Several locking issues were discovered with the coordination between
   timerlat kthread creation and hotplug. As timerlat has callbacks from
   hotplug code to start kthreads when CPUs come online. There are also
   locking issues with grabbing the cpu_read_lock() and the locks within
   timerlat.

* tag 'trace-v6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  tracing/hwlat: Fix a race during cpuhp processing
  tracing/timerlat: Fix a race during cpuhp processing
  tracing/timerlat: Drop interface_lock in stop_kthread()
  tracing/timerlat: Fix duplicated kthread creation due to CPU online/offline
  x86/ftrace: Include <asm/ptrace.h>
  rtla: Fix the help text in osnoise and timerlat top tools
  tools/rtla: Fix installation from out-of-tree build
  tracing: Fix trace_check_vprintf() when tp_printk is used
This commit is contained in:
Linus Torvalds 2024-10-04 12:11:06 -07:00
commit 622a3ed1ac
7 changed files with 34 additions and 15 deletions

View File

@ -2,6 +2,8 @@
#ifndef _ASM_X86_FTRACE_H
#define _ASM_X86_FTRACE_H
#include <asm/ptrace.h>
#ifdef CONFIG_FUNCTION_TRACER
#ifndef CC_USING_FENTRY
# error Compiler does not support fentry?

View File

@ -3697,8 +3697,8 @@ static void test_can_verify(void)
void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
va_list ap)
{
long text_delta = iter->tr->text_delta;
long data_delta = iter->tr->data_delta;
long text_delta = 0;
long data_delta = 0;
const char *p = fmt;
const char *str;
bool good;
@ -3710,6 +3710,17 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
if (static_branch_unlikely(&trace_no_verify))
goto print;
/*
* When the kernel is booted with the tp_printk command line
* parameter, trace events go directly through to printk().
* It also is checked by this function, but it does not
* have an associated trace_array (tr) for it.
*/
if (iter->tr) {
text_delta = iter->tr->text_delta;
data_delta = iter->tr->data_delta;
}
/* Don't bother checking when doing a ftrace_dump() */
if (iter->fmt == static_fmt_buf)
goto print;

View File

@ -520,6 +520,8 @@ static void hwlat_hotplug_workfn(struct work_struct *dummy)
if (!hwlat_busy || hwlat_data.thread_mode != MODE_PER_CPU)
goto out_unlock;
if (!cpu_online(cpu))
goto out_unlock;
if (!cpumask_test_cpu(cpu, tr->tracing_cpumask))
goto out_unlock;

View File

@ -1953,12 +1953,8 @@ static void stop_kthread(unsigned int cpu)
{
struct task_struct *kthread;
mutex_lock(&interface_lock);
kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
if (kthread) {
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
mutex_unlock(&interface_lock);
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
!WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
kthread_stop(kthread);
@ -1972,7 +1968,6 @@ static void stop_kthread(unsigned int cpu)
put_task_struct(kthread);
}
} else {
mutex_unlock(&interface_lock);
/* if no workload, just return */
if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
/*
@ -1994,8 +1989,12 @@ static void stop_per_cpu_kthreads(void)
{
int cpu;
for_each_possible_cpu(cpu)
cpus_read_lock();
for_each_online_cpu(cpu)
stop_kthread(cpu);
cpus_read_unlock();
}
/*
@ -2007,6 +2006,10 @@ static int start_kthread(unsigned int cpu)
void *main = osnoise_main;
char comm[24];
/* Do not start a new thread if it is already running */
if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
return 0;
if (timerlat_enabled()) {
snprintf(comm, 24, "timerlat/%d", cpu);
main = timerlat_main;
@ -2061,11 +2064,10 @@ static int start_per_cpu_kthreads(void)
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
struct task_struct *kthread;
kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
if (!WARN_ON(!kthread))
kthread_stop(kthread);
}
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
}
for_each_cpu(cpu, current_mask) {
@ -2095,6 +2097,8 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
mutex_lock(&interface_lock);
cpus_read_lock();
if (!cpu_online(cpu))
goto out_unlock;
if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
goto out_unlock;

View File

@ -38,7 +38,7 @@ BINDIR := /usr/bin
.PHONY: install
install: doc_install
@$(MKDIR) -p $(DESTDIR)$(BINDIR)
$(call QUIET_INSTALL,rtla)$(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR)
$(call QUIET_INSTALL,rtla)$(INSTALL) $(RTLA) -m 755 $(DESTDIR)$(BINDIR)
@$(STRIP) $(DESTDIR)$(BINDIR)/rtla
@test ! -f $(DESTDIR)$(BINDIR)/osnoise || $(RM) $(DESTDIR)$(BINDIR)/osnoise
@$(LN) rtla $(DESTDIR)$(BINDIR)/osnoise

View File

@ -442,7 +442,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
case 'd':
params->duration = parse_seconds_duration(optarg);
if (!params->duration)
osnoise_top_usage(params, "Invalid -D duration\n");
osnoise_top_usage(params, "Invalid -d duration\n");
break;
case 'e':
tevent = trace_event_alloc(optarg);

View File

@ -459,7 +459,7 @@ static void timerlat_top_usage(char *usage)
" -c/--cpus cpus: run the tracer only on the given cpus",
" -H/--house-keeping cpus: run rtla control threads only on the given cpus",
" -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited",
" -d/--duration time[m|h|d]: duration of the session in seconds",
" -d/--duration time[s|m|h|d]: duration of the session",
" -D/--debug: print debug info",
" --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
" -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]",
@ -613,7 +613,7 @@ static struct timerlat_top_params
case 'd':
params->duration = parse_seconds_duration(optarg);
if (!params->duration)
timerlat_top_usage("Invalid -D duration\n");
timerlat_top_usage("Invalid -d duration\n");
break;
case 'e':
tevent = trace_event_alloc(optarg);