From 46dad054a19297af65c417c97cb920aa5bdf7e8c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 Mar 2016 18:48:45 -0300 Subject: [PATCH 01/19] perf jitdump: DWARF is also needed While building on a Docker container for ubuntu and installing package by package one ends up with: MKDIR /tmp/build/util/ CC /tmp/build/util/genelf.o util/genelf.c:22:19: fatal error: dwarf.h: No such file or directory #include ^ compilation terminated. mv: cannot stat '/tmp/build/util/.genelf.o.tmp': No such file or directory Because the jitdump code needs the DWARF related development packages to be installed. So make it dependent on that so that the build can succeed without jitdump support. Cc: Adrian Hunter Cc: Stephane Eranian Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-le498robnmxd40237wej3w62@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 10 +++++----- tools/perf/util/Build | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b2885776b602..e219ed458d97 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -73,7 +73,7 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } -#ifdef HAVE_LIBELF_SUPPORT +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe __maybe_unused) @@ -245,7 +245,7 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } -#ifdef HAVE_LIBELF_SUPPORT +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) static int perf_event__jit_repipe_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -283,7 +283,7 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool, return err; } -#ifdef HAVE_LIBELF_SUPPORT +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -795,7 +795,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "perf inject []", NULL }; -#ifndef HAVE_LIBELF_SUPPORT +#if !defined(HAVE_LIBELF_SUPPORT) || !defined(HAVE_DWARF_SUPPORT) set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); #endif argc = parse_options(argc, argv, options, inject_usage, 0); @@ -833,7 +833,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) inject.tool.ordered_events = true; inject.tool.ordering_requires_timestamps = true; } -#ifdef HAVE_LIBELF_SUPPORT +#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) if (inject.jit_mode) { inject.tool.mmap2 = perf_event__jit_repipe_mmap2; inject.tool.mmap = perf_event__jit_repipe_mmap; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index df2b690970ac..f130ce240158 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -107,9 +107,12 @@ libperf-y += scripting-engines/ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o libperf-y += demangle-java.o + +ifdef CONFIG_DWARF libperf-$(CONFIG_LIBELF) += jitdump.o libperf-$(CONFIG_LIBELF) += genelf.o libperf-$(CONFIG_LIBELF) += genelf_debug.o +endif CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" # avoid compiler warnings in 32-bit mode From 616df645d7238e45d3b369933a30fee4e4e305e2 Mon Sep 17 00:00:00 2001 From: Chris Phlipot Date: Tue, 8 Mar 2016 21:11:54 -0800 Subject: [PATCH 02/19] perf tools: Fix perf script python database export crash Remove the union in evsel so that the database id and priv pointer can be used simultainously without conflicting and crashing. Detailed Description for the fixed bug follows: perf script crashes with a segmentation fault on user space tool version 4.5.rc7.ge2857b when using the python database export API. It works properly in 4.4 and prior versions. the crash fist appeared in: cfc8874a4859 ("perf script: Process cpu/threads maps") How to reproduce the bug: Remove any temporary files left over from a previous crash (if you have already attemped to reproduce the bug): $ rm -r test_db-perf-data $ dropdb test_db $ perf record timeout 1 yes >/dev/null $ perf script -s scripts/python/export-to-postgresql.py test_db Stack Trace: Program received signal SIGSEGV, Segmentation fault. __GI___libc_free (mem=0x1) at malloc.c:2929 2929 malloc.c: No such file or directory. (gdb) bt at util/stat.c:122 argv=, prefix=) at builtin-script.c:2231 argc=argc@entry=4, argv=argv@entry=0x7fffffffdf70) at perf.c:390 at perf.c:451 Signed-off-by: Chris Phlipot Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Peter Zijlstra Fixes: cfc8874a4859 ("perf script: Process cpu/threads maps") Link: http://lkml.kernel.org/r/1457500314-8912-1-git-send-email-cphlipot0@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index efad78f811ad..501ea6e565f1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -93,10 +93,8 @@ struct perf_evsel { const char *unit; struct event_format *tp_format; off_t id_offset; - union { - void *priv; - u64 db_id; - }; + void *priv; + u64 db_id; struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; From d7b617f51be4fffa3cbb5adf6d4258e616dce294 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 9 Mar 2016 11:04:17 +0100 Subject: [PATCH 03/19] perf tools: Pass perf_hpp_list all the way through setup_sort_list Pass perf_hpp_list all the way through setup_sort_list so that the sort entry can be added on the arbitrary list. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20160309100417.GA30910@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 44 +++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 041f236379e0..59a101e43457 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1614,19 +1614,21 @@ int hist_entry__filter(struct hist_entry *he, int type, const void *arg) return hse->se->se_filter(he, type, arg); } -static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, int level) +static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, + struct perf_hpp_list *list, + int level) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; - perf_hpp__register_sort_field(&hse->hpp); + perf_hpp_list__register_sort_field(list, &hse->hpp); return 0; } -static int __sort_dimension__add_hpp_output(struct perf_hpp_list *list, - struct sort_dimension *sd) +static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, + struct perf_hpp_list *list) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); @@ -2147,12 +2149,14 @@ out: return ret; } -static int __sort_dimension__add(struct sort_dimension *sd, int level) +static int __sort_dimension__add(struct sort_dimension *sd, + struct perf_hpp_list *list, + int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_sort(sd, level) < 0) + if (__sort_dimension__add_hpp_sort(sd, list, level) < 0) return -1; if (sd->entry->se_collapse) @@ -2163,7 +2167,9 @@ static int __sort_dimension__add(struct sort_dimension *sd, int level) return 0; } -static int __hpp_dimension__add(struct hpp_dimension *hd, int level) +static int __hpp_dimension__add(struct hpp_dimension *hd, + struct perf_hpp_list *list, + int level) { struct perf_hpp_fmt *fmt; @@ -2175,7 +2181,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd, int level) return -1; hd->taken = 1; - perf_hpp__register_sort_field(fmt); + perf_hpp_list__register_sort_field(list, fmt); return 0; } @@ -2185,7 +2191,7 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list, if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(list, sd) < 0) + if (__sort_dimension__add_hpp_output(sd, list) < 0) return -1; sd->taken = 1; @@ -2215,7 +2221,8 @@ int hpp_dimension__add_output(unsigned col) return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); } -static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, +static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, + struct perf_evlist *evlist __maybe_unused, int level) { unsigned int i; @@ -2255,7 +2262,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, sort__has_thread = 1; } - return __sort_dimension__add(sd, level); + return __sort_dimension__add(sd, list, level); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -2264,7 +2271,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add(hd, level); + return __hpp_dimension__add(hd, list, level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -2279,7 +2286,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) sort__has_sym = 1; - __sort_dimension__add(sd, level); + __sort_dimension__add(sd, list, level); return 0; } @@ -2295,7 +2302,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, if (sd->entry == &sort_mem_daddr_sym) sort__has_sym = 1; - __sort_dimension__add(sd, level); + __sort_dimension__add(sd, list, level); return 0; } @@ -2305,7 +2312,8 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist, return -ESRCH; } -static int setup_sort_list(char *str, struct perf_evlist *evlist) +static int setup_sort_list(struct perf_hpp_list *list, char *str, + struct perf_evlist *evlist) { char *tmp, *tok; int ret = 0; @@ -2332,7 +2340,7 @@ static int setup_sort_list(char *str, struct perf_evlist *evlist) } if (*tok) { - ret = sort_dimension__add(tok, evlist, level); + ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { error("Invalid --sort key: `%s'", tok); break; @@ -2480,7 +2488,7 @@ static int __setup_sorting(struct perf_evlist *evlist) } } - ret = setup_sort_list(str, evlist); + ret = setup_sort_list(&perf_hpp_list, str, evlist); free(str); return ret; @@ -2725,7 +2733,7 @@ int setup_sorting(struct perf_evlist *evlist) return err; if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add("parent", evlist, -1); + err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1); if (err < 0) return err; } From ea8f75f981918c5946fc4029acdc86707fa901c1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 8 Mar 2016 19:42:30 +0100 Subject: [PATCH 04/19] perf tools: Omit unnecessary cast in perf_pmu__parse_scale There's no need to use a const char pointer, we can used char pointer from the beginning and omit the unnecessary cast. Reported-by: Ingo Molnar Signed-off-by: Jiri Olsa Cc: David Ahern Cc: H. Peter Anvin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160308184230.GB7897@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d8cd038baed2..adef23b1352e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -98,7 +98,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * char scale[128]; int fd, ret = -1; char path[PATH_MAX]; - const char *lc; + char *lc; snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); @@ -146,7 +146,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * /* restore locale */ setlocale(LC_NUMERIC, lc); - free((char *) lc); + free(lc); ret = 0; error: From 9eb42dee2b11635174c74a7996934b6ca18f2179 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 26 Feb 2016 18:13:28 -0500 Subject: [PATCH 05/19] tools lib traceevent: Add '~' operation within arg_num_eval() When evaluating values for print flags, if the value included a '~' operator, the parsing would fail. This broke kmalloc's parsing of: __print_flags(REC->gfp_flags, "|", {(unsigned long)((((((( gfp_t)(0x400000u|0x2000000u)) | (( gfp_t)0x40u) | (( gfp_t)0x80u) | (( gfp_t)0x20000u)) | (( gfp_t)0x02u)) | (( gfp_t)0x08u)) | (( gfp_t)0x4000u) | (( gfp_t)0x10000u) | (( gfp_t)0x1000u) | (( gfp_t)0x200u)) & ~(( gfp_t)0x2000000u)) ^ | here Signed-off-by: Steven Rostedt Reported-by: Arnaldo Carvalho de Melo Tested-by: David Ahern Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/20160226181328.22f47129@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 865dea55454b..190cc886ab91 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2398,6 +2398,12 @@ static int arg_num_eval(struct print_arg *arg, long long *val) break; *val = left + right; break; + case '~': + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + *val = ~right; + break; default: do_warning("unknown op '%s'", arg->op.op); ret = 0; From e12b202f8fb9b62a3997cad8e93401f85293390c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 10 Mar 2016 17:41:13 +0100 Subject: [PATCH 06/19] perf jitdump: Build only on supported archs Build jitdump only on architectures defined in util/genelf.h file, to avoid breaking the build on such arches. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Borislav Petkov Cc: Colin Ian King Cc: David Ahern Cc: Davidlohr Bueso Cc: He Kuang Cc: Mel Gorman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Cc: Wang Nan Link: http://lkml.kernel.org/r/20160310164113.GA11357@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/Makefile | 1 + tools/perf/arch/arm64/Makefile | 1 + tools/perf/arch/powerpc/Makefile | 1 + tools/perf/arch/x86/Makefile | 1 + tools/perf/builtin-inject.c | 12 +++++++----- tools/perf/config/Makefile | 7 +++++++ tools/perf/util/Build | 2 +- 7 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 7fbca175099e..18b13518d8d8 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,3 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 7fbca175099e..18b13518d8d8 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,3 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 9f9cea3478fd..56e05f126ad8 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 09ba923debe8..269af2143735 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index e219ed458d97..7fa68663ed72 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -73,7 +73,7 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } -#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) +#ifdef HAVE_JITDUMP static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe __maybe_unused) @@ -245,7 +245,7 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } -#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) +#ifdef HAVE_JITDUMP static int perf_event__jit_repipe_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -283,7 +283,7 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool, return err; } -#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) +#ifdef HAVE_JITDUMP static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -778,7 +778,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, "Merge sched-stat and sched-switch for getting events " "where and how long tasks slept"), +#ifdef HAVE_JITDUMP OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), +#endif OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", @@ -795,7 +797,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "perf inject []", NULL }; -#if !defined(HAVE_LIBELF_SUPPORT) || !defined(HAVE_DWARF_SUPPORT) +#ifndef HAVE_JITDUMP set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); #endif argc = parse_options(argc, argv, options, inject_usage, 0); @@ -833,7 +835,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) inject.tool.ordered_events = true; inject.tool.ordering_requires_timestamps = true; } -#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT) +#ifdef HAVE_JITDUMP if (inject.jit_mode) { inject.tool.mmap2 = perf_event__jit_repipe_mmap2; inject.tool.mmap = perf_event__jit_repipe_mmap; diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f7aeaf303f5a..eca6a912e8c2 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -328,6 +328,13 @@ ifndef NO_LIBELF endif # NO_LIBBPF endif # NO_LIBELF +ifdef PERF_HAVE_JITDUMP + ifndef NO_DWARF + $(call detected,CONFIG_JITDUMP) + CFLAGS += -DHAVE_JITDUMP + endif +endif + ifeq ($(ARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f130ce240158..eea25e2424e9 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -108,7 +108,7 @@ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o libperf-y += demangle-java.o -ifdef CONFIG_DWARF +ifdef CONFIG_JITDUMP libperf-$(CONFIG_LIBELF) += jitdump.o libperf-$(CONFIG_LIBELF) += genelf.o libperf-$(CONFIG_LIBELF) += genelf_debug.o From f4954cfb1cda4cf0abf36d23213c702e94666c3f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:46:56 +0900 Subject: [PATCH 07/19] perf tools: Fix hist_entry__filter() for hierarchy When hierarchy mode is enabled each output format is in a separate hpp list. So when applying a filter it should check all formats in the list. Currently it only checks a single ->fmt field which was not set properly. Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 59a101e43457..8a49a07ebea6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1602,16 +1602,30 @@ int hist_entry__filter(struct hist_entry *he, int type, const void *arg) { struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; + int ret = -1; + int r; - fmt = he->fmt; - if (fmt == NULL || !perf_hpp__is_sort_entry(fmt)) - return -1; + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; - hse = container_of(fmt, struct hpp_sort_entry, hpp); - if (hse->se->se_filter == NULL) - return -1; + hse = container_of(fmt, struct hpp_sort_entry, hpp); + if (hse->se->se_filter == NULL) + continue; - return hse->se->se_filter(he, type, arg); + /* + * hist entry is filtered if any of sort key in the hpp list + * is applied. But it should skip non-matched filter types. + */ + r = hse->se->se_filter(he, type, arg); + if (r >= 0) { + if (ret < 0) + ret = 0; + ret |= r; + } + } + + return ret; } static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, From 4945cf2aa1ed61994c158f22f26ea6101059a8d4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:46:57 +0900 Subject: [PATCH 08/19] perf tools: Add more sort entry check functions Those functions are for checkinf if a given perf_hpp_fmt is a filter-related sort entry. With hierarchy mode, it needs to check filters on the hist entries with its own hpp format list. Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.h | 4 ++++ tools/perf/util/sort.c | 46 ++++++++++++++++-------------------------- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2cb017f28f9e..6870a1bfd762 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -318,6 +318,10 @@ bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *his bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 8a49a07ebea6..61c74022e47f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1488,38 +1488,26 @@ bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) return format->header == __sort__hpp_header; } -bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt) -{ - struct hpp_sort_entry *hse; - - if (!perf_hpp__is_sort_entry(fmt)) - return false; - - hse = container_of(fmt, struct hpp_sort_entry, hpp); - return hse->se == &sort_trace; +#define MK_SORT_ENTRY_CHK(key) \ +bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt) \ +{ \ + struct hpp_sort_entry *hse; \ + \ + if (!perf_hpp__is_sort_entry(fmt)) \ + return false; \ + \ + hse = container_of(fmt, struct hpp_sort_entry, hpp); \ + return hse->se == &sort_ ## key ; \ } -bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt) -{ - struct hpp_sort_entry *hse; +MK_SORT_ENTRY_CHK(trace) +MK_SORT_ENTRY_CHK(srcline) +MK_SORT_ENTRY_CHK(srcfile) +MK_SORT_ENTRY_CHK(thread) +MK_SORT_ENTRY_CHK(comm) +MK_SORT_ENTRY_CHK(dso) +MK_SORT_ENTRY_CHK(sym) - if (!perf_hpp__is_sort_entry(fmt)) - return false; - - hse = container_of(fmt, struct hpp_sort_entry, hpp); - return hse->se == &sort_srcline; -} - -bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt) -{ - struct hpp_sort_entry *hse; - - if (!perf_hpp__is_sort_entry(fmt)) - return false; - - hse = container_of(fmt, struct hpp_sort_entry, hpp); - return hse->se == &sort_srcfile; -} static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) { From aec13a7ec78d9322a348fb26940097b0bdfef1bd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:46:58 +0900 Subject: [PATCH 09/19] perf tools: Fix command line filters in hierarchy mode When a command-line filter is applied in hierarchy mode, output is broken especially when filtering on lower level. The higher level entries doesn't show up so it's hard to see the results. Also it needs to handle multi sort keys in a single hierarchy level. Before: $ perf report --hierarchy -s 'cpu,{dso,comm}' --comms swapper --stdio ... # Overhead CPU / Shared Object+Command # ........... ........................... # 13.79% [kernel.vmlinux] swapper 31.71% 000 13.80% [kernel.vmlinux] swapper 0.43% [e1000e] swapper 11.89% [kernel.vmlinux] swapper 9.18% [kernel.vmlinux] swapper After: # Overhead CPU / Shared Object+Command # ........... ............................... # 33.09% 003 13.79% [kernel.vmlinux] swapper 31.71% 000 13.80% [kernel.vmlinux] swapper 0.43% [e1000e] swapper 21.90% 002 11.89% [kernel.vmlinux] swapper 13.30% 001 9.18% [kernel.vmlinux] swapper Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 100 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 97 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 29da9e0d8db9..a98f9345f686 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1087,10 +1087,103 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, */ static void hists__apply_filters(struct hists *hists, struct hist_entry *he); +static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he, + enum hist_filter type); + +typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt); + +static bool check_thread_entry(struct perf_hpp_fmt *fmt) +{ + return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt); +} + +static void hist_entry__check_and_remove_filter(struct hist_entry *he, + enum hist_filter type, + fmt_chk_fn check) +{ + struct perf_hpp_fmt *fmt; + bool type_match = false; + struct hist_entry *parent = he->parent_he; + + switch (type) { + case HIST_FILTER__THREAD: + if (symbol_conf.comm_list == NULL && + symbol_conf.pid_list == NULL && + symbol_conf.tid_list == NULL) + return; + break; + case HIST_FILTER__DSO: + if (symbol_conf.dso_list == NULL) + return; + break; + case HIST_FILTER__SYMBOL: + if (symbol_conf.sym_list == NULL) + return; + break; + case HIST_FILTER__PARENT: + case HIST_FILTER__GUEST: + case HIST_FILTER__HOST: + case HIST_FILTER__SOCKET: + default: + return; + } + + /* if it's filtered by own fmt, it has to have filter bits */ + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + if (check(fmt)) { + type_match = true; + break; + } + } + + if (type_match) { + /* + * If the filter is for current level entry, propagate + * filter marker to parents. The marker bit was + * already set by default so it only needs to clear + * non-filtered entries. + */ + if (!(he->filtered & (1 << type))) { + while (parent) { + parent->filtered &= ~(1 << type); + parent = parent->parent_he; + } + } + } else { + /* + * If current entry doesn't have matching formats, set + * filter marker for upper level entries. it will be + * cleared if its lower level entries is not filtered. + * + * For lower-level entries, it inherits parent's + * filter bit so that lower level entries of a + * non-filtered entry won't set the filter marker. + */ + if (parent == NULL) + he->filtered |= (1 << type); + else + he->filtered |= (parent->filtered & (1 << type)); + } +} + +static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) +{ + hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD, + check_thread_entry); + + hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO, + perf_hpp__is_dso_entry); + + hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, + perf_hpp__is_sym_entry); + + hists__apply_filters(he->hists, he); +} static struct hist_entry *hierarchy_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he, + struct hist_entry *parent_he, struct perf_hpp_list *hpp_list) { struct rb_node **p = &root->rb_node; @@ -1125,11 +1218,13 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, if (new == NULL) return NULL; - hists__apply_filters(hists, new); hists->nr_entries++; /* save related format list for output */ new->hpp_list = hpp_list; + new->parent_he = parent_he; + + hist_entry__apply_hierarchy_filters(new); /* some fields are now passed to 'new' */ perf_hpp_list__for_each_sort_list(hpp_list, fmt) { @@ -1170,14 +1265,13 @@ static int hists__hierarchy_insert_entry(struct hists *hists, continue; /* insert copy of 'he' for each fmt into the hierarchy */ - new_he = hierarchy_insert_entry(hists, root, he, &node->hpp); + new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp); if (new_he == NULL) { ret = -1; break; } root = &new_he->hroot_in; - new_he->parent_he = parent; new_he->depth = depth++; parent = new_he; } From a515d8ff7085d5e9fde867f2048b8da36b95dc51 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:46:59 +0900 Subject: [PATCH 10/19] perf tools: Remove hist_entry->fmt field It's not used anymore and the output format is accessed by the hpp_list pointer instead when hierarchy is enabled. Let's get rid of it. Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ea1f722cffea..151afc1b6c2f 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -129,7 +129,6 @@ struct hist_entry { void *raw_data; u32 raw_size; void *trace_output; - struct perf_hpp_fmt *fmt; struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; union { From 325a62834e81452d2a6e253444022cf493bbabfc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:47:00 +0900 Subject: [PATCH 11/19] perf hists browser: Cleanup hist_browser__fprintf_hierarchy_entry() The hist_browser__fprintf_hierarchy_entry() if to dump current output into a file so it needs to be sync-ed with the corresponding function hist_browser__show_hierarchy_entry(). So use hists->nr_hpp_node to indent width and use first fmt_node to print overhead columns instead of checking whether it's a sort entry (or dynamic entry). Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e0e217ec856b..aed9c8f011f7 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1928,8 +1928,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, struct hist_entry *he, - FILE *fp, int level, - int nr_sort_keys) + FILE *fp, int level) { char s[8192]; int printed = 0; @@ -1939,23 +1938,20 @@ static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, .size = sizeof(s), }; struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; bool first = true; int ret; - int hierarchy_indent = nr_sort_keys * HIERARCHY_INDENT; + int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT; printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, ""); folded_sign = hist_entry__folded(he); printed += fprintf(fp, "%c", folded_sign); - hists__for_each_format(he->hists, fmt) { - if (perf_hpp__should_skip(fmt, he->hists)) - continue; - - if (perf_hpp__is_sort_entry(fmt) || - perf_hpp__is_dynamic_entry(fmt)) - break; - + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&he->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { if (!first) { ret = scnprintf(hpp.buf, hpp.size, " "); advance_hpp(&hpp, ret); @@ -1992,7 +1988,6 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), browser->min_pcnt); int printed = 0; - int nr_sort = browser->hists->nr_sort_keys; while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); @@ -2000,8 +1995,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) if (symbol_conf.report_hierarchy) { printed += hist_browser__fprintf_hierarchy_entry(browser, h, fp, - h->depth, - nr_sort); + h->depth); } else { printed += hist_browser__fprintf_entry(browser, h, fp); } From 86e3ee5224c17b7967aac39aa15539393c144de7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:47:01 +0900 Subject: [PATCH 12/19] perf tools: Remove nr_sort_keys field The nr_sort_keys field is to carry the number of sort entries in a hpp_list or hists to determine the depth of indentation of a hist entry. As it's only used in hierarchy mode and now we have used nr_hpp_node for this reason, there's no need to keep it anymore. Let's get rid of it. Signed-off-by: Namhyung Kim Tested-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 3 --- tools/perf/util/hist.h | 2 -- tools/perf/util/sort.c | 26 -------------------------- 3 files changed, 31 deletions(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index f03c4f70438f..3baeaa6e71b5 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -515,9 +515,6 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list, void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, struct perf_hpp_fmt *format) { - if (perf_hpp__is_sort_entry(format) || perf_hpp__is_dynamic_entry(format)) - list->nr_sort_keys++; - list_add_tail(&format->sort_list, &list->sorts); } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 6870a1bfd762..ead18c82294f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -79,7 +79,6 @@ struct hists { int socket_filter; struct perf_hpp_list *hpp_list; struct list_head hpp_formats; - int nr_sort_keys; int nr_hpp_node; }; @@ -241,7 +240,6 @@ struct perf_hpp_fmt { struct perf_hpp_list { struct list_head fields; struct list_head sorts; - int nr_sort_keys; }; extern struct perf_hpp_list perf_hpp_list; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 61c74022e47f..ced849e51e6b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2703,29 +2703,6 @@ out: return ret; } -static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - struct perf_hpp_fmt *fmt; - struct hists *hists = evsel__hists(evsel); - - hists->nr_sort_keys = perf_hpp_list.nr_sort_keys; - - /* - * If dynamic entries were used, it might add multiple - * entries to each evsel for a single field name. Set - * actual number of sort keys for each hists. - */ - perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && - !perf_hpp__defined_dynamic_entry(fmt, hists)) - hists->nr_sort_keys--; - } - } -} - int setup_sorting(struct perf_evlist *evlist) { int err; @@ -2740,9 +2717,6 @@ int setup_sorting(struct perf_evlist *evlist) return err; } - if (evlist != NULL) - evlist__set_hists_nr_sort_keys(evlist); - reset_dimensions(); /* From f7fb538afea55383a9383dac5c56887c601af5f4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 22:47:02 +0900 Subject: [PATCH 13/19] perf tools: Recalc total periods using top-level entries in hierarchy When hierarchy mode is enabled, each entry in a hierarchy level shares the period. IOW an upper level entry's period is the sum of lower level entries. Thus perf uses only one of them to calculate the total period of hists. It was lowest-level (leaf) entries but it has a problem when it comes to filters. If a filter is applied, entries in the same level will be filtered or not. But upper level entries still have period of their sum including filtered one. So total sum of upper level entries will not be same as sum of lower level entries. This resulted in entries having more than 100% of overhead and it can be produced using perf top with filter(s). Reported-and-Tested-by: Jiri Olsa Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Wang Nan Link: http://lkml.kernel.org/r/1457531222-18130-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 44 ++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a98f9345f686..290b3cbf6877 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1453,6 +1453,31 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->stats.total_period += h->stat.period; } +static void hierarchy_recalc_total_periods(struct hists *hists) +{ + struct rb_node *node; + struct hist_entry *he; + + node = rb_first(&hists->entries); + + hists->stats.total_period = 0; + hists->stats.total_non_filtered_period = 0; + + /* + * recalculate total period using top-level entries only + * since lower level entries only see non-filtered entries + * but upper level entries have sum of both entries. + */ + while (node) { + he = rb_entry(node, struct hist_entry, rb_node); + node = rb_next(node); + + hists->stats.total_period += he->stat.period; + if (!he->filtered) + hists->stats.total_non_filtered_period += he->stat.period; + } +} + static void hierarchy_insert_output_entry(struct rb_root *root, struct hist_entry *he) { @@ -1518,11 +1543,6 @@ static void hists__hierarchy_output_resort(struct hists *hists, continue; } - /* only update stat for leaf entries to avoid duplication */ - hists__inc_stats(hists, he); - if (!he->filtered) - hists__calc_col_len(hists, he); - if (!use_callchain) continue; @@ -1602,11 +1622,13 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, hists__reset_col_len(hists); if (symbol_conf.report_hierarchy) { - return hists__hierarchy_output_resort(hists, prog, - &hists->entries_collapsed, - &hists->entries, - min_callchain_hits, - use_callchain); + hists__hierarchy_output_resort(hists, prog, + &hists->entries_collapsed, + &hists->entries, + min_callchain_hits, + use_callchain); + hierarchy_recalc_total_periods(hists); + return; } if (sort__need_collapse) @@ -1927,6 +1949,8 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a } } + hierarchy_recalc_total_periods(hists); + /* * resort output after applying a new filter since filter in a lower * hierarchy can change periods in a upper hierarchy. From 078b8d4a406fa8ce4a3c9d5145c27be1ed2b1dfd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 23:20:51 +0900 Subject: [PATCH 14/19] perf tools: Add sort__has_comm variable The sort__has_comm variable is to check whether the comm sort key is given. This is necessary to support thread filtering in the TUI hists browser later. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1457533253-21419-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 3 +++ tools/perf/util/sort.h | 1 + 2 files changed, 4 insertions(+) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ced849e51e6b..93fa136b0025 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -27,6 +27,7 @@ int sort__has_sym = 0; int sort__has_dso = 0; int sort__has_socket = 0; int sort__has_thread = 0; +int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; /* @@ -2262,6 +2263,8 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, sort__has_socket = 1; } else if (sd->entry == &sort_thread) { sort__has_thread = 1; + } else if (sd->entry == &sort_comm) { + sort__has_comm = 1; } return __sort_dimension__add(sd, list, level); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 151afc1b6c2f..3f4e35998119 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -37,6 +37,7 @@ extern int sort__has_parent; extern int sort__has_sym; extern int sort__has_socket; extern int sort__has_thread; +extern int sort__has_comm; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; From 6962ccb37b50366014074aec6fd14497cf719642 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 10 Mar 2016 00:14:50 +0900 Subject: [PATCH 15/19] perf hists browser: Allow thread filtering for comm sort key The commit 2eafd410e669 ("perf hists browser: Only 'Zoom into thread' only when sort order has 'pid'") disabled thread filtering in hist browser for the default sort key. However the he->thread is still valid even if 'pid' sort key is not given. Only thing it should not use is the pid (or tid) of the thread. So allow to filter by thread when 'comm' sort key is given and show pid only if 'pid' sort key is given. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1457536490-24084-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 42 ++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index aed9c8f011f7..cb4191bf6cec 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2136,11 +2136,18 @@ static int hists__browser_title(struct hists *hists, if (hists->uid_filter_str) printed += snprintf(bf + printed, size - printed, ", UID: %s", hists->uid_filter_str); - if (thread) - printed += scnprintf(bf + printed, size - printed, + if (thread) { + if (sort__has_thread) { + printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", (thread->comm_set ? thread__comm_str(thread) : ""), thread->tid); + } else { + printed += scnprintf(bf + printed, size - printed, + ", Thread: %s", + (thread->comm_set ? thread__comm_str(thread) : "")); + } + } if (dso) printed += scnprintf(bf + printed, size - printed, ", DSO: %s", dso->short_name); @@ -2321,9 +2328,15 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) thread__zput(browser->hists->thread_filter); ui_helpline__pop(); } else { - ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid); + if (sort__has_thread) { + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + } else { + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"", + thread->comm_set ? thread__comm_str(thread) : ""); + } + browser->hists->thread_filter = thread__get(thread); perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(browser->pstack, &browser->hists->thread_filter); @@ -2338,13 +2351,22 @@ static int add_thread_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct thread *thread) { - if (!sort__has_thread || thread == NULL) + int ret; + + if ((!sort__has_thread && !sort__has_comm) || thread == NULL) return 0; - if (asprintf(optstr, "Zoom %s %s(%d) thread", - browser->hists->thread_filter ? "out of" : "into", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid) < 0) + if (sort__has_thread) { + ret = asprintf(optstr, "Zoom %s %s(%d) thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + } else { + ret = asprintf(optstr, "Zoom %s %s thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : ""); + } + if (ret < 0) return 0; act->thread = thread; From 599a2f38a989a79df99838f22cb607f5e2b5b56c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Mar 2016 23:20:53 +0900 Subject: [PATCH 16/19] perf hists browser: Check sort keys before hot key actions The context menu in TUI hists browser checks corresponding sort keys when creating the menu item. But hotkey actions lacks these checks so it can filter using incorrect info. For example, default sort key of 'perf top' doesn't contain 'comm' or 'pid' sort key so each hist entry's thread info is not reliable. Thus it should prohibit using thread filter on 't' key. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1457533253-21419-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cb4191bf6cec..4b9816555946 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2322,6 +2322,9 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) { struct thread *thread = act->thread; + if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + return 0; + if (browser->hists->thread_filter) { pstack__remove(browser->pstack, &browser->hists->thread_filter); perf_hpp__set_elide(HISTC_THREAD, false); @@ -2379,6 +2382,9 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { struct map *map = act->ms.map; + if (!sort__has_dso || map == NULL) + return 0; + if (browser->hists->dso_filter) { pstack__remove(browser->pstack, &browser->hists->dso_filter); perf_hpp__set_elide(HISTC_DSO, false); @@ -2530,6 +2536,9 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, static int do_zoom_socket(struct hist_browser *browser, struct popup_action *act) { + if (!sort__has_socket || act->socket < 0) + return 0; + if (browser->hists->socket_filter > -1) { pstack__remove(browser->pstack, &browser->hists->socket_filter); browser->hists->socket_filter = -1; From 6b45f7b2a37b0e00693985fd0abfc8e0319f91ce Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 3 Mar 2016 15:57:35 -0800 Subject: [PATCH 17/19] perf stat: Document CSV format in manpage With all the recently added fields in the perf stat CSV output we should finally document them in the man page. Do this here. v2: Fix fields in documentation (Jiri) v3: fix order of fields again (Jiri) v4: Change order again. v5: Document more fields (Jiri) v6: Move time stamp first v7: More fixes (Jiri) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1457049458-28956-5-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 14d9e8ffaff7..8812d7319edb 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -219,6 +219,29 @@ $ perf stat -- make -j Wall-clock time elapsed: 719.554352 msecs +CSV FORMAT +---------- + +With -x, perf stat is able to output a not-quite-CSV format output +Commas in the output are not put into "". To make it easy to parse +it is recommended to use a different character like -x \; + +The fields are in this order: + + - optional usec time stamp in fractions of second (with -I xxx) + - optional CPU, core, or socket identifier + - optional number of logical CPUs aggregated + - counter value + - unit of the counter value or empty + - event name + - run time of counter + - percentage of measurement time the counter was running + - optional variance if multiple values are collected with -r + - optional metric value + - optional unit of metric + +Additional metrics may be printed with all earlier fields being empty. + SEE ALSO -------- linkperf:perf-top[1], linkperf:perf-list[1] From 54b5091606c18f68a7fc8b4ab03ac4592c7d2922 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 3 Mar 2016 15:57:36 -0800 Subject: [PATCH 18/19] perf stat: Implement --metric-only mode Add a new mode to only print metrics. Sometimes we don't care about the raw values, just want the computed metrics. This allows more compact printing, so with -I each sample is only a single line. This also allows easier plotting and processing with other tools. The main target is with using --topdown, but it also works with -T and standard perf stat. A few metrics are not supported. To avoiding having to hardcode all the metrics in the code it uses a two pass approach: first compute dummy metrics and only print the headers in the print_metric callback. Then use the callback to print the actual values. There are some additional changes in the stat printout code to handle all metrics being on a single line. One issue is that the column code doesn't know in advance what events are not supported by the CPU, and it would be hard to find out as this could change based on dynamic conditions. That causes empty columns in some cases. The output can be fairly wide, often you may need more than 80 columns. Example: % perf stat -a -I 1000 --metric-only 1.001452803 frontend cycles idle insn per cycle stalled cycles per insn branch-misses of all branches 1.001452803 158.91% 0.66 2.39 2.92% 2.002192321 180.63% 0.76 2.08 2.96% 3.003088282 150.59% 0.62 2.57 2.84% 4.004369835 196.20% 0.98 1.62 3.79% 5.005227314 231.98% 0.84 1.90 4.71% v2: Lots of updates. v3: Use slightly narrower columns v4: Add comment Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1457049458-28956-6-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 + tools/perf/builtin-stat.c | 211 +++++++++++++++++++++++-- 2 files changed, 205 insertions(+), 10 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8812d7319edb..82f0951754dd 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -147,6 +147,10 @@ Print count deltas every N milliseconds (minimum: 10ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' +--metric-only:: +Only print computed metrics. Print them in a single line. +Don't show any raw values. Not supported with -A or --per-thread. + --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. This is a useful mode to detect imbalance between sockets. To enable this mode, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index baa82078c148..74508c9d0742 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -122,6 +122,7 @@ static bool sync_run = false; static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; +static bool metric_only = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -827,6 +828,99 @@ static void print_metric_csv(void *ctx, fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); } +#define METRIC_ONLY_LEN 20 + +/* Filter out some columns that don't work well in metrics only mode */ + +static bool valid_only_metric(const char *unit) +{ + if (!unit) + return false; + if (strstr(unit, "/sec") || + strstr(unit, "hz") || + strstr(unit, "Hz") || + strstr(unit, "CPUs utilized")) + return false; + return true; +} + +static const char *fixunit(char *buf, struct perf_evsel *evsel, + const char *unit) +{ + if (!strncmp(unit, "of all", 6)) { + snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + unit); + return buf; + } + return unit; +} + +static void print_metric_only(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + char buf[1024]; + unsigned mlen = METRIC_ONLY_LEN; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(buf, os->evsel, unit); + if (color) + n = color_fprintf(out, color, fmt, val); + else + n = fprintf(out, fmt, val); + if (n > METRIC_ONLY_LEN) + n = METRIC_ONLY_LEN; + if (mlen < strlen(unit)) + mlen = strlen(unit) + 1; + fprintf(out, "%*s", mlen - n, ""); +} + +static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, + const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof buf, fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + fprintf(out, "%s%s", vals, csv_sep); +} + +static void new_line_metric(void *ctx __maybe_unused) +{ +} + +static void print_metric_header(void *ctx, const char *color __maybe_unused, + const char *fmt __maybe_unused, + const char *unit, double val __maybe_unused) +{ + struct outstate *os = ctx; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + if (csv_output) + fprintf(os->fh, "%s%s", unit, csv_sep); + else + fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -921,9 +1015,16 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, print_metric_t pm = print_metric_std; void (*nl)(void *); - nl = new_line_std; + if (metric_only) { + nl = new_line_metric; + if (csv_output) + pm = print_metric_only_csv; + else + pm = print_metric_only; + } else + nl = new_line_std; - if (csv_output) { + if (csv_output && !metric_only) { static int aggr_fields[] = { [AGGR_GLOBAL] = 0, [AGGR_THREAD] = 1, @@ -940,6 +1041,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, os.nfields++; } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { + if (metric_only) { + pm(&os, NULL, "", "", 0); + return; + } aggr_printout(counter, id, nr); fprintf(stat_config.output, "%*s%s", @@ -968,7 +1073,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, return; } - if (nsec_counter(counter)) + if (metric_only) + /* nothing */; + else if (nsec_counter(counter)) nsec_printout(id, nr, counter, uval); else abs_printout(id, nr, counter, uval); @@ -977,7 +1084,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.new_line = nl; out.ctx = &os; - if (csv_output) { + if (csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); } @@ -985,7 +1092,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), &out); - if (!csv_output) { + if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); } @@ -1021,14 +1128,23 @@ static void print_aggr(char *prefix) int cpu, s, s2, id, nr; double uval; u64 ena, run, val; + bool first; if (!(aggr_map || aggr_get_id)) return; aggr_update_shadow(); + /* + * With metric_only everything is on a single line. + * Without each counter has its own line. + */ for (s = 0; s < aggr_map->nr; s++) { + if (prefix && metric_only) + fprintf(output, "%s", prefix); + id = aggr_map->map[s]; + first = true; evlist__for_each(evsel_list, counter) { val = ena = run = 0; nr = 0; @@ -1041,13 +1157,20 @@ static void print_aggr(char *prefix) run += perf_counts(counter->counts, cpu, 0)->run; nr++; } - if (prefix) + if (first && metric_only) { + first = false; + aggr_printout(counter, id, nr); + } + if (prefix && !metric_only) fprintf(output, "%s", prefix); uval = val * counter->scale; printout(id, nr, counter, uval, prefix, run, ena, 1.0); - fputc('\n', output); + if (!metric_only) + fputc('\n', output); } + if (metric_only) + fputc('\n', output); } } @@ -1092,12 +1215,13 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) avg_enabled = avg_stats(&ps->res_stats[1]); avg_running = avg_stats(&ps->res_stats[2]); - if (prefix) + if (prefix && !metric_only) fprintf(output, "%s", prefix); uval = avg * counter->scale; printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); - fprintf(output, "\n"); + if (!metric_only) + fprintf(output, "\n"); } /* @@ -1126,6 +1250,43 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } } +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_SOCKET] = 12, + [AGGR_NONE] = 15, + [AGGR_THREAD] = 24, + [AGGR_GLOBAL] = 0, +}; + +static void print_metric_headers(char *prefix) +{ + struct perf_stat_output_ctx out; + struct perf_evsel *counter; + struct outstate os = { + .fh = stat_config.output + }; + + if (prefix) + fprintf(stat_config.output, "%s", prefix); + + if (!csv_output) + fprintf(stat_config.output, "%*s", + aggr_header_lens[stat_config.aggr_mode], ""); + + /* Print metrics headers only */ + evlist__for_each(evsel_list, counter) { + os.evsel = counter; + out.ctx = &os; + out.print_metric = print_metric_header; + out.new_line = new_line_metric; + os.evsel = counter; + perf_stat__print_shadow_stats(counter, 0, + 0, + &out); + } + fputc('\n', stat_config.output); +} + static void print_interval(char *prefix, struct timespec *ts) { FILE *output = stat_config.output; @@ -1133,7 +1294,7 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output) { + if (num_print_interval == 0 && !csv_output && !metric_only) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); @@ -1220,6 +1381,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) else print_header(argc, argv); + if (metric_only) { + static int num_print_iv; + + if (num_print_iv == 0) + print_metric_headers(prefix); + if (num_print_iv++ == 25) + num_print_iv = 0; + if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) + fprintf(stat_config.output, "%s", prefix); + } + switch (stat_config.aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: @@ -1232,6 +1404,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) print_counter_aggr(counter, prefix); + if (metric_only) + fputc('\n', stat_config.output); break; case AGGR_NONE: evlist__for_each(evsel_list, counter) @@ -1356,6 +1530,8 @@ static const struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), + OPT_BOOLEAN(0, "metric-only", &metric_only, + "Only print computed metrics. No raw values"), OPT_END() }; @@ -1997,6 +2173,21 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } + if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { + fprintf(stderr, "--metric-only is not supported with --per-thread\n"); + goto out; + } + + if (metric_only && stat_config.aggr_mode == AGGR_NONE) { + fprintf(stderr, "--metric-only is not supported with -A\n"); + goto out; + } + + if (metric_only && run_count > 1) { + fprintf(stderr, "--metric-only is not supported with -r\n"); + goto out; + } + if (output_fd < 0) { fprintf(stderr, "argument to --log-fd must be a > 0\n"); parse_options_usage(stat_usage, stat_options, "log-fd", 0); From 206cab651d07563d766c7f4cb73f858c5df3dec5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 3 Mar 2016 15:57:37 -0800 Subject: [PATCH 19/19] perf stat: Add --metric-only support for -A Add metric only support for -A too. This requires a new print function that prints the metrics in the right order. v2: Fix manpage v3: Simplify nrcpus computation Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1457049458-28956-7-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 2 +- tools/perf/builtin-stat.c | 45 +++++++++++++++++++++----- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 82f0951754dd..04f23b404bbc 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -149,7 +149,7 @@ The overhead percentage could be high in some cases, for instance with small, su --metric-only:: Only print computed metrics. Print them in a single line. -Don't show any raw values. Not supported with -A or --per-thread. +Don't show any raw values. Not supported with --per-thread. --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. This diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 74508c9d0742..1f19f2f999c8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1250,10 +1250,40 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } } +static void print_no_aggr_metric(char *prefix) +{ + int cpu; + int nrcpus = 0; + struct perf_evsel *counter; + u64 ena, run, val; + double uval; + + nrcpus = evsel_list->cpus->nr; + for (cpu = 0; cpu < nrcpus; cpu++) { + bool first = true; + + if (prefix) + fputs(prefix, stat_config.output); + evlist__for_each(evsel_list, counter) { + if (first) { + aggr_printout(counter, cpu, 0); + first = false; + } + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; + + uval = val * counter->scale; + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + } + fputc('\n', stat_config.output); + } +} + static int aggr_header_lens[] = { [AGGR_CORE] = 18, [AGGR_SOCKET] = 12, - [AGGR_NONE] = 15, + [AGGR_NONE] = 6, [AGGR_THREAD] = 24, [AGGR_GLOBAL] = 0, }; @@ -1408,8 +1438,12 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) fputc('\n', stat_config.output); break; case AGGR_NONE: - evlist__for_each(evsel_list, counter) - print_counter(counter, prefix); + if (metric_only) + print_no_aggr_metric(prefix); + else { + evlist__for_each(evsel_list, counter) + print_counter(counter, prefix); + } break; case AGGR_UNSET: default: @@ -2178,11 +2212,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) goto out; } - if (metric_only && stat_config.aggr_mode == AGGR_NONE) { - fprintf(stderr, "--metric-only is not supported with -A\n"); - goto out; - } - if (metric_only && run_count > 1) { fprintf(stderr, "--metric-only is not supported with -r\n"); goto out;