The timerlat tracer aims to help the preemptive kernel developers to
found souces of wakeup latencies of real-time threads. Like cyclictest,
the tracer sets a periodic timer that wakes up a thread. The thread then
computes a *wakeup latency* value as the difference between the *current
time* and the *absolute time* that the timer was set to expire. The main
goal of timerlat is tracing in such a way to help kernel developers.
Usage
Write the ASCII text "timerlat" into the current_tracer file of the
tracing system (generally mounted at /sys/kernel/tracing).
For example:
        [root@f32 ~]# cd /sys/kernel/tracing/
        [root@f32 tracing]# echo timerlat > current_tracer
It is possible to follow the trace by reading the trace trace file:
  [root@f32 tracing]# cat trace
  # tracer: timerlat
  #
  #                              _-----=> irqs-off
  #                             / _----=> need-resched
  #                            | / _---=> hardirq/softirq
  #                            || / _--=> preempt-depth
  #                            || /
  #                            ||||             ACTIVATION
  #         TASK-PID      CPU# ||||   TIMESTAMP    ID            CONTEXT                LATENCY
  #            | |         |   ||||      |         |                  |                       |
          <idle>-0       [000] d.h1    54.029328: #1     context    irq timer_latency       932 ns
           <...>-867     [000] ....    54.029339: #1     context thread timer_latency     11700 ns
          <idle>-0       [001] dNh1    54.029346: #1     context    irq timer_latency      2833 ns
           <...>-868     [001] ....    54.029353: #1     context thread timer_latency      9820 ns
          <idle>-0       [000] d.h1    54.030328: #2     context    irq timer_latency       769 ns
           <...>-867     [000] ....    54.030330: #2     context thread timer_latency      3070 ns
          <idle>-0       [001] d.h1    54.030344: #2     context    irq timer_latency       935 ns
           <...>-868     [001] ....    54.030347: #2     context thread timer_latency      4351 ns
The tracer creates a per-cpu kernel thread with real-time priority that
prints two lines at every activation. The first is the *timer latency*
observed at the *hardirq* context before the activation of the thread.
The second is the *timer latency* observed by the thread, which is the
same level that cyclictest reports. The ACTIVATION ID field
serves to relate the *irq* execution to its respective *thread* execution.
The irq/thread splitting is important to clarify at which context
the unexpected high value is coming from. The *irq* context can be
delayed by hardware related actions, such as SMIs, NMIs, IRQs
or by a thread masking interrupts. Once the timer happens, the delay
can also be influenced by blocking caused by threads. For example, by
postponing the scheduler execution via preempt_disable(),  by the
scheduler execution, or by masking interrupts. Threads can
also be delayed by the interference from other threads and IRQs.
The timerlat can also take advantage of the osnoise: traceevents.
For example:
        [root@f32 ~]# cd /sys/kernel/tracing/
        [root@f32 tracing]# echo timerlat > current_tracer
        [root@f32 tracing]# echo osnoise > set_event
        [root@f32 tracing]# echo 25 > osnoise/stop_tracing_total_us
        [root@f32 tracing]# tail -10 trace
             cc1-87882   [005] d..h...   548.771078: #402268 context    irq timer_latency      1585 ns
             cc1-87882   [005] dNLh1..   548.771082: irq_noise: local_timer:236 start 548.771077442 duration 4597 ns
             cc1-87882   [005] dNLh2..   548.771083: irq_noise: reschedule:253 start 548.771083017 duration 56 ns
             cc1-87882   [005] dNLh2..   548.771086: irq_noise: call_function_single:251 start 548.771083811 duration 2048 ns
             cc1-87882   [005] dNLh2..   548.771088: irq_noise: call_function_single:251 start 548.771086814 duration 1495 ns
             cc1-87882   [005] dNLh2..   548.771091: irq_noise: call_function_single:251 start 548.771089194 duration 1558 ns
             cc1-87882   [005] dNLh2..   548.771094: irq_noise: call_function_single:251 start 548.771091719 duration 1932 ns
             cc1-87882   [005] dNLh2..   548.771096: irq_noise: call_function_single:251 start 548.771094696 duration 1050 ns
             cc1-87882   [005] d...3..   548.771101: thread_noise:      cc1:87882 start 548.771078243 duration 10909 ns
      timerlat/5-1035    [005] .......   548.771103: #402268 context thread timer_latency     25960 ns
For further information see: Documentation/trace/timerlat-tracer.rst
Link: https://lkml.kernel.org/r/71f18efc013e1194bcaea1e54db957de2b19ba62.1624372313.git.bristot@redhat.com
Cc: Phil Auld <pauld@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Kate Carcia <kcarcia@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>
Cc: Clark Willaims <williams@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
		
	
			
		
			
				
	
	
		
			404 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			404 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| // SPDX-License-Identifier: GPL-2.0
 | |
| /*
 | |
|  * This file defines the trace event structures that go into the ring
 | |
|  * buffer directly. They are created via macros so that changes for them
 | |
|  * appear in the format file. Using macros will automate this process.
 | |
|  *
 | |
|  * The macro used to create a ftrace data structure is:
 | |
|  *
 | |
|  * FTRACE_ENTRY( name, struct_name, id, structure, print )
 | |
|  *
 | |
|  * @name: the name used the event name, as well as the name of
 | |
|  *   the directory that holds the format file.
 | |
|  *
 | |
|  * @struct_name: the name of the structure that is created.
 | |
|  *
 | |
|  * @id: The event identifier that is used to detect what event
 | |
|  *    this is from the ring buffer.
 | |
|  *
 | |
|  * @structure: the structure layout
 | |
|  *
 | |
|  *  - __field(	type,	item	)
 | |
|  *	  This is equivalent to declaring
 | |
|  *		type	item;
 | |
|  *	  in the structure.
 | |
|  *  - __array(	type,	item,	size	)
 | |
|  *	  This is equivalent to declaring
 | |
|  *		type	item[size];
 | |
|  *	  in the structure.
 | |
|  *
 | |
|  *   * for structures within structures, the format of the internal
 | |
|  *	structure is laid out. This allows the internal structure
 | |
|  *	to be deciphered for the format file. Although these macros
 | |
|  *	may become out of sync with the internal structure, they
 | |
|  *	will create a compile error if it happens. Since the
 | |
|  *	internal structures are just tracing helpers, this is not
 | |
|  *	an issue.
 | |
|  *
 | |
|  *	When an internal structure is used, it should use:
 | |
|  *
 | |
|  *	__field_struct(	type,	item	)
 | |
|  *
 | |
|  *	instead of __field. This will prevent it from being shown in
 | |
|  *	the output file. The fields in the structure should use.
 | |
|  *
 | |
|  *	__field_desc(	type,	container,	item		)
 | |
|  *	__array_desc(	type,	container,	item,	len	)
 | |
|  *
 | |
|  *	type, item and len are the same as __field and __array, but
 | |
|  *	container is added. This is the name of the item in
 | |
|  *	__field_struct that this is describing.
 | |
|  *
 | |
|  *
 | |
|  * @print: the print format shown to users in the format file.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * Function trace entry - function address and parent function address:
 | |
|  */
 | |
| FTRACE_ENTRY_REG(function, ftrace_entry,
 | |
| 
 | |
| 	TRACE_FN,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field_fn(	unsigned long,	ip		)
 | |
| 		__field_fn(	unsigned long,	parent_ip	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk(" %ps <-- %ps",
 | |
| 		 (void *)__entry->ip, (void *)__entry->parent_ip),
 | |
| 
 | |
| 	perf_ftrace_event_register
 | |
| );
 | |
| 
 | |
| /* Function call entry */
 | |
| FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
 | |
| 
 | |
| 	TRACE_GRAPH_ENT,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field_struct(	struct ftrace_graph_ent,	graph_ent	)
 | |
| 		__field_packed(	unsigned long,	graph_ent,	func		)
 | |
| 		__field_packed(	int,		graph_ent,	depth		)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
 | |
| );
 | |
| 
 | |
| /* Function return entry */
 | |
| FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
 | |
| 
 | |
| 	TRACE_GRAPH_RET,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field_struct(	struct ftrace_graph_ret,	ret	)
 | |
| 		__field_packed(	unsigned long,	ret,		func	)
 | |
| 		__field_packed(	int,		ret,		depth	)
 | |
| 		__field_packed(	unsigned int,	ret,		overrun	)
 | |
| 		__field_packed(	unsigned long long, ret,	calltime)
 | |
| 		__field_packed(	unsigned long long, ret,	rettime	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("<-- %ps (%d) (start: %llx  end: %llx) over: %d",
 | |
| 		 (void *)__entry->func, __entry->depth,
 | |
| 		 __entry->calltime, __entry->rettime,
 | |
| 		 __entry->depth)
 | |
| );
 | |
| 
 | |
| /*
 | |
|  * Context switch trace entry - which task (and prio) we switched from/to:
 | |
|  *
 | |
|  * This is used for both wakeup and context switches. We only want
 | |
|  * to create one structure, but we need two outputs for it.
 | |
|  */
 | |
| #define FTRACE_CTX_FIELDS					\
 | |
| 	__field(	unsigned int,	prev_pid	)	\
 | |
| 	__field(	unsigned int,	next_pid	)	\
 | |
| 	__field(	unsigned int,	next_cpu	)       \
 | |
| 	__field(	unsigned char,	prev_prio	)	\
 | |
| 	__field(	unsigned char,	prev_state	)	\
 | |
| 	__field(	unsigned char,	next_prio	)	\
 | |
| 	__field(	unsigned char,	next_state	)
 | |
| 
 | |
| FTRACE_ENTRY(context_switch, ctx_switch_entry,
 | |
| 
 | |
| 	TRACE_CTX,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		FTRACE_CTX_FIELDS
 | |
| 	),
 | |
| 
 | |
| 	F_printk("%u:%u:%u  ==> %u:%u:%u [%03u]",
 | |
| 		 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
 | |
| 		 __entry->next_pid, __entry->next_prio, __entry->next_state,
 | |
| 		 __entry->next_cpu)
 | |
| );
 | |
| 
 | |
| /*
 | |
|  * FTRACE_ENTRY_DUP only creates the format file, it will not
 | |
|  *  create another structure.
 | |
|  */
 | |
| FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
 | |
| 
 | |
| 	TRACE_WAKE,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		FTRACE_CTX_FIELDS
 | |
| 	),
 | |
| 
 | |
| 	F_printk("%u:%u:%u  ==+ %u:%u:%u [%03u]",
 | |
| 		 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
 | |
| 		 __entry->next_pid, __entry->next_prio, __entry->next_state,
 | |
| 		 __entry->next_cpu)
 | |
| );
 | |
| 
 | |
| /*
 | |
|  * Stack-trace entry:
 | |
|  */
 | |
| 
 | |
| #define FTRACE_STACK_ENTRIES	8
 | |
| 
 | |
| FTRACE_ENTRY(kernel_stack, stack_entry,
 | |
| 
 | |
| 	TRACE_STACK,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	int,		size	)
 | |
| 		__array(	unsigned long,	caller,	FTRACE_STACK_ENTRIES	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
 | |
| 		 "\t=> %ps\n\t=> %ps\n\t=> %ps\n"
 | |
| 		 "\t=> %ps\n\t=> %ps\n",
 | |
| 		 (void *)__entry->caller[0], (void *)__entry->caller[1],
 | |
| 		 (void *)__entry->caller[2], (void *)__entry->caller[3],
 | |
| 		 (void *)__entry->caller[4], (void *)__entry->caller[5],
 | |
| 		 (void *)__entry->caller[6], (void *)__entry->caller[7])
 | |
| );
 | |
| 
 | |
| FTRACE_ENTRY(user_stack, userstack_entry,
 | |
| 
 | |
| 	TRACE_USER_STACK,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	unsigned int,	tgid	)
 | |
| 		__array(	unsigned long,	caller, FTRACE_STACK_ENTRIES	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
 | |
| 		 "\t=> %ps\n\t=> %ps\n\t=> %ps\n"
 | |
| 		 "\t=> %ps\n\t=> %ps\n",
 | |
| 		 (void *)__entry->caller[0], (void *)__entry->caller[1],
 | |
| 		 (void *)__entry->caller[2], (void *)__entry->caller[3],
 | |
| 		 (void *)__entry->caller[4], (void *)__entry->caller[5],
 | |
| 		 (void *)__entry->caller[6], (void *)__entry->caller[7])
 | |
| );
 | |
| 
 | |
| /*
 | |
|  * trace_printk entry:
 | |
|  */
 | |
| FTRACE_ENTRY(bprint, bprint_entry,
 | |
| 
 | |
| 	TRACE_BPRINT,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	unsigned long,	ip	)
 | |
| 		__field(	const char *,	fmt	)
 | |
| 		__dynamic_array(	u32,	buf	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("%ps: %s",
 | |
| 		 (void *)__entry->ip, __entry->fmt)
 | |
| );
 | |
| 
 | |
| FTRACE_ENTRY_REG(print, print_entry,
 | |
| 
 | |
| 	TRACE_PRINT,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	unsigned long,	ip	)
 | |
| 		__dynamic_array(	char,	buf	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("%ps: %s",
 | |
| 		 (void *)__entry->ip, __entry->buf),
 | |
| 
 | |
| 	ftrace_event_register
 | |
| );
 | |
| 
 | |
| FTRACE_ENTRY(raw_data, raw_data_entry,
 | |
| 
 | |
| 	TRACE_RAW_DATA,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	unsigned int,	id	)
 | |
| 		__dynamic_array(	char,	buf	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("id:%04x %08x",
 | |
| 		 __entry->id, (int)__entry->buf[0])
 | |
| );
 | |
| 
 | |
| FTRACE_ENTRY(bputs, bputs_entry,
 | |
| 
 | |
| 	TRACE_BPUTS,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	unsigned long,	ip	)
 | |
| 		__field(	const char *,	str	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("%ps: %s",
 | |
| 		 (void *)__entry->ip, __entry->str)
 | |
| );
 | |
| 
 | |
| FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
 | |
| 
 | |
| 	TRACE_MMIO_RW,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field_struct(	struct mmiotrace_rw,	rw	)
 | |
| 		__field_desc(	resource_size_t, rw,	phys	)
 | |
| 		__field_desc(	unsigned long,	rw,	value	)
 | |
| 		__field_desc(	unsigned long,	rw,	pc	)
 | |
| 		__field_desc(	int,		rw,	map_id	)
 | |
| 		__field_desc(	unsigned char,	rw,	opcode	)
 | |
| 		__field_desc(	unsigned char,	rw,	width	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("%lx %lx %lx %d %x %x",
 | |
| 		 (unsigned long)__entry->phys, __entry->value, __entry->pc,
 | |
| 		 __entry->map_id, __entry->opcode, __entry->width)
 | |
| );
 | |
| 
 | |
| FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
 | |
| 
 | |
| 	TRACE_MMIO_MAP,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field_struct(	struct mmiotrace_map,	map	)
 | |
| 		__field_desc(	resource_size_t, map,	phys	)
 | |
| 		__field_desc(	unsigned long,	map,	virt	)
 | |
| 		__field_desc(	unsigned long,	map,	len	)
 | |
| 		__field_desc(	int,		map,	map_id	)
 | |
| 		__field_desc(	unsigned char,	map,	opcode	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("%lx %lx %lx %d %x",
 | |
| 		 (unsigned long)__entry->phys, __entry->virt, __entry->len,
 | |
| 		 __entry->map_id, __entry->opcode)
 | |
| );
 | |
| 
 | |
| 
 | |
| #define TRACE_FUNC_SIZE 30
 | |
| #define TRACE_FILE_SIZE 20
 | |
| 
 | |
| FTRACE_ENTRY(branch, trace_branch,
 | |
| 
 | |
| 	TRACE_BRANCH,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	unsigned int,	line				)
 | |
| 		__array(	char,		func,	TRACE_FUNC_SIZE+1	)
 | |
| 		__array(	char,		file,	TRACE_FILE_SIZE+1	)
 | |
| 		__field(	char,		correct				)
 | |
| 		__field(	char,		constant			)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("%u:%s:%s (%u)%s",
 | |
| 		 __entry->line,
 | |
| 		 __entry->func, __entry->file, __entry->correct,
 | |
| 		 __entry->constant ? " CONSTANT" : "")
 | |
| );
 | |
| 
 | |
| 
 | |
| FTRACE_ENTRY(hwlat, hwlat_entry,
 | |
| 
 | |
| 	TRACE_HWLAT,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	u64,			duration	)
 | |
| 		__field(	u64,			outer_duration	)
 | |
| 		__field(	u64,			nmi_total_ts	)
 | |
| 		__field_struct( struct timespec64,	timestamp	)
 | |
| 		__field_desc(	s64,	timestamp,	tv_sec		)
 | |
| 		__field_desc(	long,	timestamp,	tv_nsec		)
 | |
| 		__field(	unsigned int,		nmi_count	)
 | |
| 		__field(	unsigned int,		seqnum		)
 | |
| 		__field(	unsigned int,		count		)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tcount:%d\tnmi-ts:%llu\tnmi-count:%u\n",
 | |
| 		 __entry->seqnum,
 | |
| 		 __entry->tv_sec,
 | |
| 		 __entry->tv_nsec,
 | |
| 		 __entry->duration,
 | |
| 		 __entry->outer_duration,
 | |
| 		 __entry->count,
 | |
| 		 __entry->nmi_total_ts,
 | |
| 		 __entry->nmi_count)
 | |
| );
 | |
| 
 | |
| #define FUNC_REPEATS_GET_DELTA_TS(entry)				\
 | |
| 	(((u64)(entry)->top_delta_ts << 32) | (entry)->bottom_delta_ts)	\
 | |
| 
 | |
| FTRACE_ENTRY(func_repeats, func_repeats_entry,
 | |
| 
 | |
| 	TRACE_FUNC_REPEATS,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	unsigned long,	ip		)
 | |
| 		__field(	unsigned long,	parent_ip	)
 | |
| 		__field(	u16	,	count		)
 | |
| 		__field(	u16	,	top_delta_ts	)
 | |
| 		__field(	u32	,	bottom_delta_ts	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk(" %ps <-%ps\t(repeats:%u  delta: -%llu)",
 | |
| 		 (void *)__entry->ip,
 | |
| 		 (void *)__entry->parent_ip,
 | |
| 		 __entry->count,
 | |
| 		 FUNC_REPEATS_GET_DELTA_TS(__entry))
 | |
| );
 | |
| 
 | |
| FTRACE_ENTRY(osnoise, osnoise_entry,
 | |
| 
 | |
| 	TRACE_OSNOISE,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	u64,			noise		)
 | |
| 		__field(	u64,			runtime		)
 | |
| 		__field(	u64,			max_sample	)
 | |
| 		__field(	unsigned int,		hw_count	)
 | |
| 		__field(	unsigned int,		nmi_count	)
 | |
| 		__field(	unsigned int,		irq_count	)
 | |
| 		__field(	unsigned int,		softirq_count	)
 | |
| 		__field(	unsigned int,		thread_count	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("noise:%llu\tmax_sample:%llu\thw:%u\tnmi:%u\tirq:%u\tsoftirq:%u\tthread:%u\n",
 | |
| 		 __entry->noise,
 | |
| 		 __entry->max_sample,
 | |
| 		 __entry->hw_count,
 | |
| 		 __entry->nmi_count,
 | |
| 		 __entry->irq_count,
 | |
| 		 __entry->softirq_count,
 | |
| 		 __entry->thread_count)
 | |
| );
 | |
| 
 | |
| FTRACE_ENTRY(timerlat, timerlat_entry,
 | |
| 
 | |
| 	TRACE_TIMERLAT,
 | |
| 
 | |
| 	F_STRUCT(
 | |
| 		__field(	unsigned int,		seqnum		)
 | |
| 		__field(	int,			context		)
 | |
| 		__field(	u64,			timer_latency	)
 | |
| 	),
 | |
| 
 | |
| 	F_printk("seq:%u\tcontext:%d\ttimer_latency:%llu\n",
 | |
| 		 __entry->seqnum,
 | |
| 		 __entry->context,
 | |
| 		 __entry->timer_latency)
 | |
| );
 |