Merge tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo:
"New features:
perf ftrace:
- Add -n/--use-nsec option to the 'latency' subcommand.
Default: usecs:
$ sudo perf ftrace latency -T dput -a sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 2098375 | ############################# |
1 - 2 us | 61 | |
2 - 4 us | 33 | |
4 - 8 us | 13 | |
8 - 16 us | 124 | |
16 - 32 us | 123 | |
32 - 64 us | 1 | |
64 - 128 us | 0 | |
128 - 256 us | 1 | |
256 - 512 us | 0 | |
Better granularity with nsec:
$ sudo perf ftrace latency -T dput -a -n sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 0 | |
1 - 2 ns | 0 | |
2 - 4 ns | 0 | |
4 - 8 ns | 0 | |
8 - 16 ns | 0 | |
16 - 32 ns | 0 | |
32 - 64 ns | 0 | |
64 - 128 ns | 1163434 | ############## |
128 - 256 ns | 914102 | ############# |
256 - 512 ns | 884 | |
512 - 1024 ns | 613 | |
1 - 2 us | 31 | |
2 - 4 us | 17 | |
4 - 8 us | 7 | |
8 - 16 us | 123 | |
16 - 32 us | 83 | |
perf lock:
- Add -c/--combine-locks option to merge lock instances in the same
class into a single entry.
# perf lock report -c
Name acquired contended avg wait(ns) total wait(ns) max wait(ns) min wait(ns)
rcu_read_lock 251225 0 0 0 0 0
hrtimer_bases.lock 39450 0 0 0 0 0
&sb->s_type->i_l... 10301 1 662 662 662 662
ptlock_ptr(page) 10173 2 701 1402 760 642
&(ei->i_block_re... 8732 0 0 0 0 0
&xa->xa_lock 8088 0 0 0 0 0
&base->lock 6705 0 0 0 0 0
&p->pi_lock 5549 0 0 0 0 0
&dentry->d_lockr... 5010 4 1274 5097 1844 789
&ep->lock 3958 0 0 0 0 0
- Add -F/--field option to customize the list of fields to output:
$ perf lock report -F contended,wait_max -k avg_wait
Name contended max wait(ns) avg wait(ns)
slock-AF_INET6 1 23543 23543
&lruvec->lru_lock 5 18317 11254
slock-AF_INET6 1 10379 10379
rcu_node_1 1 2104 2104
&dentry->d_lockr... 1 1844 1844
&dentry->d_lockr... 1 1672 1672
&newf->file_lock 15 2279 1025
&dentry->d_lockr... 1 792 792
- Add --synth=no option for record, as there is no need to symbolize,
lock names comes from the tracepoints.
perf record:
- Threaded recording, opt-in, via the new --threads command line
option.
- Improve AMD IBS (Instruction-Based Sampling) error handling
messages.
perf script:
- Add 'brstackinsnlen' field (use it with -F) for branch stacks.
- Output branch sample type in 'perf script'.
perf report:
- Add "addr_from" and "addr_to" sort dimensions.
- Print branch stack entry type in 'perf report --dump-raw-trace'
- Fix symbolization for chrooted workloads.
Hardware tracing:
Intel PT:
- Add CFE (Control Flow Event) and EVD (Event Data) packets support.
- Add MODE.Exec IFLAG bit support.
Explanation about these features from the "Intel® 64 and IA-32
architectures software developer’s manual combined volumes: 1, 2A,
2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" PDF at:
https://cdrdv2.intel.com/v1/dl/getContent/671200
At page 3951:
"32.2.4
Event Trace is a capability that exposes details about the
asynchronous events, when they are generated, and when their
corresponding software event handler completes execution. These
include:
o Interrupts, including NMI and SMI, including the interrupt
vector when defined.
o Faults, exceptions including the fault vector.
- Page faults additionally include the page fault address,
when in context.
o Event handler returns, including IRET and RSM.
o VM exits and VM entries.¹
- VM exits include the values written to the “exit reason”
and “exit qualification” VMCS fields. INIT and SIPI events.
o TSX aborts, including the abort status returned for the RTM
instructions.
o Shutdown.
Additionally, it provides indication of the status of the
Interrupt Flag (IF), to indicate when interrupts are masked"
ARM CoreSight:
- Use advertised caps/min_interval as default sample_period on ARM
spe.
- Update deduction of TRCCONFIGR register for branch broadcast on
ARM's CoreSight ETM.
Vendor Events (JSON):
Intel:
- Update events and metrics for: Alderlake, Broadwell, Broadwell DE,
BroadwellX, CascadelakeX, Elkhartlake, Bonnell, Goldmont,
GoldmontPlus, Westmere EP-DP, Haswell, HaswellX, Icelake, IcelakeX,
Ivybridge, Ivytown, Jaketown, Knights Landing, Nehalem EP,
Sandybridge, Silvermont, Skylake, Skylake Server, SkylakeX,
Tigerlake, TremontX, Westmere EP-SP, and Westmere EX.
ARM:
- Add support for HiSilicon CPA PMU aliasing.
perf stat:
- Fix forked applications enablement of counters.
- The 'slots' should only be printed on a different order than the
one specified on the command line when 'topdown' events are
present, fix it.
Miscellaneous:
- Sync msr-index, cpufeatures header files with the kernel sources.
- Stop using some deprecated libbpf APIs in 'perf trace'.
- Fix some spelling mistakes.
- Refactor the maps pointers usage to pave the way for using refcount
debugging.
- Only offer the --tui option on perf top, report and annotate when
perf was built with libslang.
- Don't mention --to-ctf in 'perf data --help' when not linking with
the required library, libbabeltrace.
- Use ARRAY_SIZE() instead of ad hoc equivalent, spotted by
array_size.cocci.
- Enhance the matching of sub-commands abbreviations:
'perf c2c rec' -> 'perf c2c record'
'perf c2c recport -> error
- Set build-id using build-id header on new mmap records.
- Fix generation of 'perf --version' string.
perf test:
- Add test for the arm_spe event.
- Add test to check unwinding using fame-pointer (fp) mode on arm64.
- Make metric testing more robust in 'perf test'.
- Add error message for unsupported branch stack cases.
libperf:
- Add API for allocating new thread map array.
- Fix typo in perf_evlist__open() failure error messages in libperf
tests.
perf c2c:
- Replace bitmap_weight() with bitmap_empty() where appropriate"
* tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (143 commits)
perf evsel: Improve AMD IBS (Instruction-Based Sampling) error handling messages
perf python: Add perf_env stubs that will be needed in evsel__open_strerror()
perf tools: Enhance the matching of sub-commands abbreviations
libperf tests: Fix typo in perf_evlist__open() failure error messages
tools arm64: Import cputype.h
perf lock: Add -F/--field option to control output
perf lock: Extend struct lock_key to have print function
perf lock: Add --synth=no option for record
tools headers cpufeatures: Sync with the kernel sources
tools headers cpufeatures: Sync with the kernel sources
perf stat: Fix forked applications enablement of counters
tools arch x86: Sync the msr-index.h copy with the kernel sources
perf evsel: Make evsel__env() always return a valid env
perf build-id: Fix spelling mistake "Cant" -> "Can't"
perf header: Fix spelling mistake "could't" -> "couldn't"
perf script: Add 'brstackinsnlen' for branch stacks
perf parse-events: Move slots only with topdown
perf ftrace latency: Update documentation
perf ftrace latency: Add -n/--use-nsec option
perf tools: Fix version kernel tag
...
This commit is contained in:
@@ -62,11 +62,12 @@ SYNOPSIS
|
||||
struct perf_thread_map;
|
||||
|
||||
struct perf_thread_map *perf_thread_map__new_dummy(void);
|
||||
struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array);
|
||||
|
||||
void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
|
||||
char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
|
||||
void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid);
|
||||
char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
|
||||
int perf_thread_map__nr(struct perf_thread_map *threads);
|
||||
pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
|
||||
pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
|
||||
|
||||
struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
|
||||
void perf_thread_map__put(struct perf_thread_map *map);
|
||||
|
||||
@@ -8,11 +8,12 @@
|
||||
struct perf_thread_map;
|
||||
|
||||
LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void);
|
||||
LIBPERF_API struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array);
|
||||
|
||||
LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid);
|
||||
LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread);
|
||||
LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid);
|
||||
LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
|
||||
LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);
|
||||
LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread);
|
||||
LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
|
||||
|
||||
LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
|
||||
LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map);
|
||||
|
||||
@@ -12,6 +12,7 @@ LIBPERF_0.0.1 {
|
||||
perf_cpu_map__empty;
|
||||
perf_cpu_map__max;
|
||||
perf_cpu_map__has;
|
||||
perf_thread_map__new_array;
|
||||
perf_thread_map__new_dummy;
|
||||
perf_thread_map__set_pid;
|
||||
perf_thread_map__comm;
|
||||
|
||||
@@ -69,7 +69,7 @@ static int test_stat_cpu(void)
|
||||
perf_evlist__set_maps(evlist, cpus, NULL);
|
||||
|
||||
err = perf_evlist__open(evlist);
|
||||
__T("failed to open evsel", err == 0);
|
||||
__T("failed to open evlist", err == 0);
|
||||
|
||||
perf_evlist__for_each_evsel(evlist, evsel) {
|
||||
cpus = perf_evsel__cpus(evsel);
|
||||
@@ -130,7 +130,7 @@ static int test_stat_thread(void)
|
||||
perf_evlist__set_maps(evlist, NULL, threads);
|
||||
|
||||
err = perf_evlist__open(evlist);
|
||||
__T("failed to open evsel", err == 0);
|
||||
__T("failed to open evlist", err == 0);
|
||||
|
||||
perf_evlist__for_each_evsel(evlist, evsel) {
|
||||
perf_evsel__read(evsel, 0, 0, &counts);
|
||||
@@ -187,7 +187,7 @@ static int test_stat_thread_enable(void)
|
||||
perf_evlist__set_maps(evlist, NULL, threads);
|
||||
|
||||
err = perf_evlist__open(evlist);
|
||||
__T("failed to open evsel", err == 0);
|
||||
__T("failed to open evlist", err == 0);
|
||||
|
||||
perf_evlist__for_each_evsel(evlist, evsel) {
|
||||
perf_evsel__read(evsel, 0, 0, &counts);
|
||||
@@ -507,7 +507,7 @@ static int test_stat_multiplexing(void)
|
||||
perf_evlist__set_maps(evlist, NULL, threads);
|
||||
|
||||
err = perf_evlist__open(evlist);
|
||||
__T("failed to open evsel", err == 0);
|
||||
__T("failed to open evlist", err == 0);
|
||||
|
||||
perf_evlist__enable(evlist);
|
||||
|
||||
|
||||
@@ -11,9 +11,43 @@ static int libperf_print(enum libperf_print_level level,
|
||||
return vfprintf(stderr, fmt, ap);
|
||||
}
|
||||
|
||||
static int test_threadmap_array(int nr, pid_t *array)
|
||||
{
|
||||
struct perf_thread_map *threads;
|
||||
int i;
|
||||
|
||||
threads = perf_thread_map__new_array(nr, array);
|
||||
__T("Failed to allocate new thread map", threads);
|
||||
|
||||
__T("Unexpected number of threads", perf_thread_map__nr(threads) == nr);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
__T("Unexpected initial value of thread",
|
||||
perf_thread_map__pid(threads, i) == (array ? array[i] : -1));
|
||||
}
|
||||
|
||||
for (i = 1; i < nr; i++)
|
||||
perf_thread_map__set_pid(threads, i, i * 100);
|
||||
|
||||
__T("Unexpected value of thread 0",
|
||||
perf_thread_map__pid(threads, 0) == (array ? array[0] : -1));
|
||||
|
||||
for (i = 1; i < nr; i++) {
|
||||
__T("Unexpected thread value",
|
||||
perf_thread_map__pid(threads, i) == i * 100);
|
||||
}
|
||||
|
||||
perf_thread_map__put(threads);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define THREADS_NR 10
|
||||
int test_threadmap(int argc, char **argv)
|
||||
{
|
||||
struct perf_thread_map *threads;
|
||||
pid_t thr_array[THREADS_NR];
|
||||
int i;
|
||||
|
||||
__T_START;
|
||||
|
||||
@@ -27,6 +61,13 @@ int test_threadmap(int argc, char **argv)
|
||||
perf_thread_map__put(threads);
|
||||
perf_thread_map__put(threads);
|
||||
|
||||
test_threadmap_array(THREADS_NR, NULL);
|
||||
|
||||
for (i = 0; i < THREADS_NR; i++)
|
||||
thr_array[i] = i + 100;
|
||||
|
||||
test_threadmap_array(THREADS_NR, thr_array);
|
||||
|
||||
__T_END;
|
||||
return tests_failed == 0 ? 0 : -1;
|
||||
}
|
||||
|
||||
@@ -32,26 +32,36 @@ struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, in
|
||||
|
||||
#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr)
|
||||
|
||||
void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid)
|
||||
void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid)
|
||||
{
|
||||
map->map[thread].pid = pid;
|
||||
map->map[idx].pid = pid;
|
||||
}
|
||||
|
||||
char *perf_thread_map__comm(struct perf_thread_map *map, int thread)
|
||||
char *perf_thread_map__comm(struct perf_thread_map *map, int idx)
|
||||
{
|
||||
return map->map[thread].comm;
|
||||
return map->map[idx].comm;
|
||||
}
|
||||
|
||||
struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array)
|
||||
{
|
||||
struct perf_thread_map *threads = thread_map__alloc(nr_threads);
|
||||
int i;
|
||||
|
||||
if (!threads)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < nr_threads; i++)
|
||||
perf_thread_map__set_pid(threads, i, array ? array[i] : -1);
|
||||
|
||||
threads->nr = nr_threads;
|
||||
refcount_set(&threads->refcnt, 1);
|
||||
|
||||
return threads;
|
||||
}
|
||||
|
||||
struct perf_thread_map *perf_thread_map__new_dummy(void)
|
||||
{
|
||||
struct perf_thread_map *threads = thread_map__alloc(1);
|
||||
|
||||
if (threads != NULL) {
|
||||
perf_thread_map__set_pid(threads, 0, -1);
|
||||
threads->nr = 1;
|
||||
refcount_set(&threads->refcnt, 1);
|
||||
}
|
||||
return threads;
|
||||
return perf_thread_map__new_array(1, NULL);
|
||||
}
|
||||
|
||||
static void perf_thread_map__delete(struct perf_thread_map *threads)
|
||||
@@ -85,7 +95,7 @@ int perf_thread_map__nr(struct perf_thread_map *threads)
|
||||
return threads ? threads->nr : 1;
|
||||
}
|
||||
|
||||
pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread)
|
||||
pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx)
|
||||
{
|
||||
return map->map[thread].pid;
|
||||
return map->map[idx].pid;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user