From 46dad054a19297af65c417c97cb920aa5bdf7e8c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 7 Mar 2016 18:48:45 -0300
Subject: [PATCH 01/19] perf jitdump: DWARF is also needed

While building on a Docker container for ubuntu and installing package
by package one ends up with:

    MKDIR    /tmp/build/util/
    CC       /tmp/build/util/genelf.o
  util/genelf.c:22:19: fatal error: dwarf.h: No such file or directory
   #include <dwarf.h>
                   ^
  compilation terminated.
  mv: cannot stat '/tmp/build/util/.genelf.o.tmp': No such file or directory

Because the jitdump code needs the DWARF related development packages to
be installed. So make it dependent on that so that the build can succeed
without jitdump support.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-le498robnmxd40237wej3w62@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-inject.c | 10 +++++-----
 tools/perf/util/Build       |  3 +++
 2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index b2885776b602..e219ed458d97 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -73,7 +73,7 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool,
 	return perf_event__repipe_synth(tool, event);
 }
 
-#ifdef HAVE_LIBELF_SUPPORT
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT)
 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
 			       union perf_event *event __maybe_unused,
 			       struct ordered_events *oe __maybe_unused)
@@ -245,7 +245,7 @@ static int perf_event__repipe_mmap(struct perf_tool *tool,
 	return err;
 }
 
-#ifdef HAVE_LIBELF_SUPPORT
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT)
 static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
 				       union perf_event *event,
 				       struct perf_sample *sample,
@@ -283,7 +283,7 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool,
 	return err;
 }
 
-#ifdef HAVE_LIBELF_SUPPORT
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT)
 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
 					union perf_event *event,
 					struct perf_sample *sample,
@@ -795,7 +795,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf inject [<options>]",
 		NULL
 	};
-#ifndef HAVE_LIBELF_SUPPORT
+#if !defined(HAVE_LIBELF_SUPPORT) || !defined(HAVE_DWARF_SUPPORT)
 	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
 #endif
 	argc = parse_options(argc, argv, options, inject_usage, 0);
@@ -833,7 +833,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		inject.tool.ordered_events = true;
 		inject.tool.ordering_requires_timestamps = true;
 	}
-#ifdef HAVE_LIBELF_SUPPORT
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT)
 	if (inject.jit_mode) {
 		inject.tool.mmap2	   = perf_event__jit_repipe_mmap2;
 		inject.tool.mmap	   = perf_event__jit_repipe_mmap;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index df2b690970ac..f130ce240158 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -107,9 +107,12 @@ libperf-y += scripting-engines/
 libperf-$(CONFIG_ZLIB) += zlib.o
 libperf-$(CONFIG_LZMA) += lzma.o
 libperf-y += demangle-java.o
+
+ifdef CONFIG_DWARF
 libperf-$(CONFIG_LIBELF) += jitdump.o
 libperf-$(CONFIG_LIBELF) += genelf.o
 libperf-$(CONFIG_LIBELF) += genelf_debug.o
+endif
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 # avoid compiler warnings in 32-bit mode

From 616df645d7238e45d3b369933a30fee4e4e305e2 Mon Sep 17 00:00:00 2001
From: Chris Phlipot <cphlipot0@gmail.com>
Date: Tue, 8 Mar 2016 21:11:54 -0800
Subject: [PATCH 02/19] perf tools: Fix perf script python database export
 crash

Remove the union in evsel so that the database id and priv pointer can
be used simultainously without conflicting and crashing.

Detailed Description for the fixed bug follows:

perf script crashes with a segmentation fault on user space tool version
4.5.rc7.ge2857b when using the python database export API. It works
properly in 4.4 and prior versions.

the crash fist appeared in:

cfc8874a4859 ("perf script: Process cpu/threads maps")

How to reproduce the bug:

Remove any temporary files left over from a previous crash (if you have
already attemped to reproduce the bug):

  $ rm -r test_db-perf-data
  $ dropdb test_db

  $ perf record timeout 1 yes >/dev/null
  $ perf script -s scripts/python/export-to-postgresql.py test_db

  Stack Trace:
  Program received signal SIGSEGV, Segmentation fault.
  __GI___libc_free (mem=0x1) at malloc.c:2929
  2929	malloc.c: No such file or directory.
  (gdb) bt
    at util/stat.c:122
    argv=<optimized out>, prefix=<optimized out>) at builtin-script.c:2231
    argc=argc@entry=4, argv=argv@entry=0x7fffffffdf70) at perf.c:390
    at perf.c:451

Signed-off-by: Chris Phlipot <cphlipot0@gmail.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: cfc8874a4859 ("perf script: Process cpu/threads maps")
Link: http://lkml.kernel.org/r/1457500314-8912-1-git-send-email-cphlipot0@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index efad78f811ad..501ea6e565f1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -93,10 +93,8 @@ struct perf_evsel {
 	const char		*unit;
 	struct event_format	*tp_format;
 	off_t			id_offset;
-	union {
-		void		*priv;
-		u64		db_id;
-	};
+	void			*priv;
+	u64			db_id;
 	struct cgroup_sel	*cgrp;
 	void			*handler;
 	struct cpu_map		*cpus;

From d7b617f51be4fffa3cbb5adf6d4258e616dce294 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 9 Mar 2016 11:04:17 +0100
Subject: [PATCH 03/19] perf tools: Pass perf_hpp_list all the way through
 setup_sort_list

Pass perf_hpp_list all the way through setup_sort_list so that the sort
entry can be added on the arbitrary list.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20160309100417.GA30910@krava.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.c | 44 +++++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 041f236379e0..59a101e43457 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1614,19 +1614,21 @@ int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
 	return hse->se->se_filter(he, type, arg);
 }
 
-static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, int level)
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+					  struct perf_hpp_list *list,
+					  int level)
 {
 	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
 
 	if (hse == NULL)
 		return -1;
 
-	perf_hpp__register_sort_field(&hse->hpp);
+	perf_hpp_list__register_sort_field(list, &hse->hpp);
 	return 0;
 }
 
-static int __sort_dimension__add_hpp_output(struct perf_hpp_list *list,
-					    struct sort_dimension *sd)
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+					    struct perf_hpp_list *list)
 {
 	struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
 
@@ -2147,12 +2149,14 @@ out:
 	return ret;
 }
 
-static int __sort_dimension__add(struct sort_dimension *sd, int level)
+static int __sort_dimension__add(struct sort_dimension *sd,
+				 struct perf_hpp_list *list,
+				 int level)
 {
 	if (sd->taken)
 		return 0;
 
-	if (__sort_dimension__add_hpp_sort(sd, level) < 0)
+	if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
 		return -1;
 
 	if (sd->entry->se_collapse)
@@ -2163,7 +2167,9 @@ static int __sort_dimension__add(struct sort_dimension *sd, int level)
 	return 0;
 }
 
-static int __hpp_dimension__add(struct hpp_dimension *hd, int level)
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+				struct perf_hpp_list *list,
+				int level)
 {
 	struct perf_hpp_fmt *fmt;
 
@@ -2175,7 +2181,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd, int level)
 		return -1;
 
 	hd->taken = 1;
-	perf_hpp__register_sort_field(fmt);
+	perf_hpp_list__register_sort_field(list, fmt);
 	return 0;
 }
 
@@ -2185,7 +2191,7 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list,
 	if (sd->taken)
 		return 0;
 
-	if (__sort_dimension__add_hpp_output(list, sd) < 0)
+	if (__sort_dimension__add_hpp_output(sd, list) < 0)
 		return -1;
 
 	sd->taken = 1;
@@ -2215,7 +2221,8 @@ int hpp_dimension__add_output(unsigned col)
 	return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
 }
 
-static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
+static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			       struct perf_evlist *evlist __maybe_unused,
 			       int level)
 {
 	unsigned int i;
@@ -2255,7 +2262,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 			sort__has_thread = 1;
 		}
 
-		return __sort_dimension__add(sd, level);
+		return __sort_dimension__add(sd, list, level);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2264,7 +2271,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 		if (strncasecmp(tok, hd->name, strlen(tok)))
 			continue;
 
-		return __hpp_dimension__add(hd, level);
+		return __hpp_dimension__add(hd, list, level);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2279,7 +2286,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
 			sort__has_sym = 1;
 
-		__sort_dimension__add(sd, level);
+		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
 
@@ -2295,7 +2302,7 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 		if (sd->entry == &sort_mem_daddr_sym)
 			sort__has_sym = 1;
 
-		__sort_dimension__add(sd, level);
+		__sort_dimension__add(sd, list, level);
 		return 0;
 	}
 
@@ -2305,7 +2312,8 @@ static int sort_dimension__add(const char *tok, struct perf_evlist *evlist,
 	return -ESRCH;
 }
 
-static int setup_sort_list(char *str, struct perf_evlist *evlist)
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+			   struct perf_evlist *evlist)
 {
 	char *tmp, *tok;
 	int ret = 0;
@@ -2332,7 +2340,7 @@ static int setup_sort_list(char *str, struct perf_evlist *evlist)
 		}
 
 		if (*tok) {
-			ret = sort_dimension__add(tok, evlist, level);
+			ret = sort_dimension__add(list, tok, evlist, level);
 			if (ret == -EINVAL) {
 				error("Invalid --sort key: `%s'", tok);
 				break;
@@ -2480,7 +2488,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
 		}
 	}
 
-	ret = setup_sort_list(str, evlist);
+	ret = setup_sort_list(&perf_hpp_list, str, evlist);
 
 	free(str);
 	return ret;
@@ -2725,7 +2733,7 @@ int setup_sorting(struct perf_evlist *evlist)
 		return err;
 
 	if (parent_pattern != default_parent_pattern) {
-		err = sort_dimension__add("parent", evlist, -1);
+		err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
 		if (err < 0)
 			return err;
 	}

From ea8f75f981918c5946fc4029acdc86707fa901c1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 8 Mar 2016 19:42:30 +0100
Subject: [PATCH 04/19] perf tools: Omit unnecessary cast in
 perf_pmu__parse_scale

There's no need to use a const char pointer, we can used char pointer
from the beginning and omit the unnecessary cast.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160308184230.GB7897@krava.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d8cd038baed2..adef23b1352e 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -98,7 +98,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	char scale[128];
 	int fd, ret = -1;
 	char path[PATH_MAX];
-	const char *lc;
+	char *lc;
 
 	snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
 
@@ -146,7 +146,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	/* restore locale */
 	setlocale(LC_NUMERIC, lc);
 
-	free((char *) lc);
+	free(lc);
 
 	ret = 0;
 error:

From 9eb42dee2b11635174c74a7996934b6ca18f2179 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 26 Feb 2016 18:13:28 -0500
Subject: [PATCH 05/19] tools lib traceevent: Add '~' operation within
 arg_num_eval()

When evaluating values for print flags, if the value included a '~'
operator, the parsing would fail. This broke kmalloc's parsing of:

__print_flags(REC->gfp_flags, "|", {(unsigned
long)((((((( gfp_t)(0x400000u|0x2000000u)) | (( gfp_t)0x40u) |
(( gfp_t)0x80u) | (( gfp_t)0x20000u)) | (( gfp_t)0x02u)) |
(( gfp_t)0x08u)) | (( gfp_t)0x4000u) | (( gfp_t)0x10000u) |
(( gfp_t)0x1000u) | (( gfp_t)0x200u)) & ~(( gfp_t)0x2000000u))
                                        ^
                                        |
                                      here

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: David Ahern <dsahern@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20160226181328.22f47129@gandalf.local.home
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 865dea55454b..190cc886ab91 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -2398,6 +2398,12 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
 				break;
 			*val = left + right;
 			break;
+		case '~':
+			ret = arg_num_eval(arg->op.right, &right);
+			if (!ret)
+				break;
+			*val = ~right;
+			break;
 		default:
 			do_warning("unknown op '%s'", arg->op.op);
 			ret = 0;

From e12b202f8fb9b62a3997cad8e93401f85293390c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 10 Mar 2016 17:41:13 +0100
Subject: [PATCH 06/19] perf jitdump: Build only on supported archs

Build jitdump only on architectures defined in util/genelf.h file, to avoid
breaking the build on such arches.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Davidlohr Bueso <dbueso@suse.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Mel Gorman <mgorman@suse.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20160310164113.GA11357@krava.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm/Makefile     |  1 +
 tools/perf/arch/arm64/Makefile   |  1 +
 tools/perf/arch/powerpc/Makefile |  1 +
 tools/perf/arch/x86/Makefile     |  1 +
 tools/perf/builtin-inject.c      | 12 +++++++-----
 tools/perf/config/Makefile       |  7 +++++++
 tools/perf/util/Build            |  2 +-
 7 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index 7fbca175099e..18b13518d8d8 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index 7fbca175099e..18b13518d8d8 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 9f9cea3478fd..56e05f126ad8 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 
 HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 09ba923debe8..269af2143735 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index e219ed458d97..7fa68663ed72 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -73,7 +73,7 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool,
 	return perf_event__repipe_synth(tool, event);
 }
 
-#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT)
+#ifdef HAVE_JITDUMP
 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
 			       union perf_event *event __maybe_unused,
 			       struct ordered_events *oe __maybe_unused)
@@ -245,7 +245,7 @@ static int perf_event__repipe_mmap(struct perf_tool *tool,
 	return err;
 }
 
-#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT)
+#ifdef HAVE_JITDUMP
 static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
 				       union perf_event *event,
 				       struct perf_sample *sample,
@@ -283,7 +283,7 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool,
 	return err;
 }
 
-#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT)
+#ifdef HAVE_JITDUMP
 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
 					union perf_event *event,
 					struct perf_sample *sample,
@@ -778,7 +778,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
 			    "Merge sched-stat and sched-switch for getting events "
 			    "where and how long tasks slept"),
+#ifdef HAVE_JITDUMP
 		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
+#endif
 		OPT_INCR('v', "verbose", &verbose,
 			 "be more verbose (show build ids, etc)"),
 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
@@ -795,7 +797,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf inject [<options>]",
 		NULL
 	};
-#if !defined(HAVE_LIBELF_SUPPORT) || !defined(HAVE_DWARF_SUPPORT)
+#ifndef HAVE_JITDUMP
 	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
 #endif
 	argc = parse_options(argc, argv, options, inject_usage, 0);
@@ -833,7 +835,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		inject.tool.ordered_events = true;
 		inject.tool.ordering_requires_timestamps = true;
 	}
-#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_DWARF_SUPPORT)
+#ifdef HAVE_JITDUMP
 	if (inject.jit_mode) {
 		inject.tool.mmap2	   = perf_event__jit_repipe_mmap2;
 		inject.tool.mmap	   = perf_event__jit_repipe_mmap;
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index f7aeaf303f5a..eca6a912e8c2 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -328,6 +328,13 @@ ifndef NO_LIBELF
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
+ifdef PERF_HAVE_JITDUMP
+  ifndef NO_DWARF
+    $(call detected,CONFIG_JITDUMP)
+    CFLAGS += -DHAVE_JITDUMP
+  endif
+endif
+
 ifeq ($(ARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index f130ce240158..eea25e2424e9 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -108,7 +108,7 @@ libperf-$(CONFIG_ZLIB) += zlib.o
 libperf-$(CONFIG_LZMA) += lzma.o
 libperf-y += demangle-java.o
 
-ifdef CONFIG_DWARF
+ifdef CONFIG_JITDUMP
 libperf-$(CONFIG_LIBELF) += jitdump.o
 libperf-$(CONFIG_LIBELF) += genelf.o
 libperf-$(CONFIG_LIBELF) += genelf_debug.o

From f4954cfb1cda4cf0abf36d23213c702e94666c3f Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 22:46:56 +0900
Subject: [PATCH 07/19] perf tools: Fix hist_entry__filter() for hierarchy

When hierarchy mode is enabled each output format is in a separate hpp
list.  So when applying a filter it should check all formats in the
list.  Currently it only checks a single ->fmt field which was not set
properly.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1457531222-18130-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 59a101e43457..8a49a07ebea6 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1602,16 +1602,30 @@ int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
 {
 	struct perf_hpp_fmt *fmt;
 	struct hpp_sort_entry *hse;
+	int ret = -1;
+	int r;
 
-	fmt = he->fmt;
-	if (fmt == NULL || !perf_hpp__is_sort_entry(fmt))
-		return -1;
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (!perf_hpp__is_sort_entry(fmt))
+			continue;
 
-	hse = container_of(fmt, struct hpp_sort_entry, hpp);
-	if (hse->se->se_filter == NULL)
-		return -1;
+		hse = container_of(fmt, struct hpp_sort_entry, hpp);
+		if (hse->se->se_filter == NULL)
+			continue;
 
-	return hse->se->se_filter(he, type, arg);
+		/*
+		 * hist entry is filtered if any of sort key in the hpp list
+		 * is applied.  But it should skip non-matched filter types.
+		 */
+		r = hse->se->se_filter(he, type, arg);
+		if (r >= 0) {
+			if (ret < 0)
+				ret = 0;
+			ret |= r;
+		}
+	}
+
+	return ret;
 }
 
 static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,

From 4945cf2aa1ed61994c158f22f26ea6101059a8d4 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 22:46:57 +0900
Subject: [PATCH 08/19] perf tools: Add more sort entry check functions

Those functions are for checkinf if a given perf_hpp_fmt is a
filter-related sort entry.  With hierarchy mode, it needs to check
filters on the hist entries with its own hpp format list.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1457531222-18130-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.h |  4 ++++
 tools/perf/util/sort.c | 46 ++++++++++++++++--------------------------
 2 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2cb017f28f9e..6870a1bfd762 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -318,6 +318,10 @@ bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *his
 bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
 bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
 bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
 
 struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 8a49a07ebea6..61c74022e47f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1488,38 +1488,26 @@ bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
 	return format->header == __sort__hpp_header;
 }
 
-bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt)
-{
-	struct hpp_sort_entry *hse;
-
-	if (!perf_hpp__is_sort_entry(fmt))
-		return false;
-
-	hse = container_of(fmt, struct hpp_sort_entry, hpp);
-	return hse->se == &sort_trace;
+#define MK_SORT_ENTRY_CHK(key)					\
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)	\
+{								\
+	struct hpp_sort_entry *hse;				\
+								\
+	if (!perf_hpp__is_sort_entry(fmt))			\
+		return false;					\
+								\
+	hse = container_of(fmt, struct hpp_sort_entry, hpp);	\
+	return hse->se == &sort_ ## key ;			\
 }
 
-bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt)
-{
-	struct hpp_sort_entry *hse;
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
 
-	if (!perf_hpp__is_sort_entry(fmt))
-		return false;
-
-	hse = container_of(fmt, struct hpp_sort_entry, hpp);
-	return hse->se == &sort_srcline;
-}
-
-bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt)
-{
-	struct hpp_sort_entry *hse;
-
-	if (!perf_hpp__is_sort_entry(fmt))
-		return false;
-
-	hse = container_of(fmt, struct hpp_sort_entry, hpp);
-	return hse->se == &sort_srcfile;
-}
 
 static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
 {

From aec13a7ec78d9322a348fb26940097b0bdfef1bd Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 22:46:58 +0900
Subject: [PATCH 09/19] perf tools: Fix command line filters in hierarchy mode

When a command-line filter is applied in hierarchy mode, output is
broken especially when filtering on lower level.  The higher level
entries doesn't show up so it's hard to see the results.

Also it needs to handle multi sort keys in a single hierarchy level.

Before:

  $ perf report --hierarchy -s 'cpu,{dso,comm}' --comms swapper --stdio
  ...
  #    Overhead  CPU / Shared Object+Command
  # ...........  ...........................
  #
         13.79%     [kernel.vmlinux]  swapper
      31.71%     000
         13.80%     [kernel.vmlinux]  swapper
          0.43%     [e1000e]          swapper
         11.89%     [kernel.vmlinux]  swapper
          9.18%     [kernel.vmlinux]  swapper

After:

  #    Overhead  CPU / Shared Object+Command
  # ...........  ...............................
  #
      33.09%     003
         13.79%     [kernel.vmlinux]  swapper
      31.71%     000
         13.80%     [kernel.vmlinux]  swapper
          0.43%     [e1000e]          swapper
      21.90%     002
         11.89%     [kernel.vmlinux]  swapper
      13.30%     001
          9.18%     [kernel.vmlinux]  swapper

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1457531222-18130-4-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c | 100 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 97 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 29da9e0d8db9..a98f9345f686 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1087,10 +1087,103 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
  */
 
 static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+				       enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+	return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+						enum hist_filter type,
+						fmt_chk_fn check)
+{
+	struct perf_hpp_fmt *fmt;
+	bool type_match = false;
+	struct hist_entry *parent = he->parent_he;
+
+	switch (type) {
+	case HIST_FILTER__THREAD:
+		if (symbol_conf.comm_list == NULL &&
+		    symbol_conf.pid_list == NULL &&
+		    symbol_conf.tid_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__DSO:
+		if (symbol_conf.dso_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__SYMBOL:
+		if (symbol_conf.sym_list == NULL)
+			return;
+		break;
+	case HIST_FILTER__PARENT:
+	case HIST_FILTER__GUEST:
+	case HIST_FILTER__HOST:
+	case HIST_FILTER__SOCKET:
+	default:
+		return;
+	}
+
+	/* if it's filtered by own fmt, it has to have filter bits */
+	perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+		if (check(fmt)) {
+			type_match = true;
+			break;
+		}
+	}
+
+	if (type_match) {
+		/*
+		 * If the filter is for current level entry, propagate
+		 * filter marker to parents.  The marker bit was
+		 * already set by default so it only needs to clear
+		 * non-filtered entries.
+		 */
+		if (!(he->filtered & (1 << type))) {
+			while (parent) {
+				parent->filtered &= ~(1 << type);
+				parent = parent->parent_he;
+			}
+		}
+	} else {
+		/*
+		 * If current entry doesn't have matching formats, set
+		 * filter marker for upper level entries.  it will be
+		 * cleared if its lower level entries is not filtered.
+		 *
+		 * For lower-level entries, it inherits parent's
+		 * filter bit so that lower level entries of a
+		 * non-filtered entry won't set the filter marker.
+		 */
+		if (parent == NULL)
+			he->filtered |= (1 << type);
+		else
+			he->filtered |= (parent->filtered & (1 << type));
+	}
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+					    check_thread_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+					    perf_hpp__is_dso_entry);
+
+	hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+					    perf_hpp__is_sym_entry);
+
+	hists__apply_filters(he->hists, he);
+}
 
 static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
 						 struct rb_root *root,
 						 struct hist_entry *he,
+						 struct hist_entry *parent_he,
 						 struct perf_hpp_list *hpp_list)
 {
 	struct rb_node **p = &root->rb_node;
@@ -1125,11 +1218,13 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
 	if (new == NULL)
 		return NULL;
 
-	hists__apply_filters(hists, new);
 	hists->nr_entries++;
 
 	/* save related format list for output */
 	new->hpp_list = hpp_list;
+	new->parent_he = parent_he;
+
+	hist_entry__apply_hierarchy_filters(new);
 
 	/* some fields are now passed to 'new' */
 	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
@@ -1170,14 +1265,13 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
 			continue;
 
 		/* insert copy of 'he' for each fmt into the hierarchy */
-		new_he = hierarchy_insert_entry(hists, root, he, &node->hpp);
+		new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
 		if (new_he == NULL) {
 			ret = -1;
 			break;
 		}
 
 		root = &new_he->hroot_in;
-		new_he->parent_he = parent;
 		new_he->depth = depth++;
 		parent = new_he;
 	}

From a515d8ff7085d5e9fde867f2048b8da36b95dc51 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 22:46:59 +0900
Subject: [PATCH 10/19] perf tools: Remove hist_entry->fmt field

It's not used anymore and the output format is accessed by the hpp_list
pointer instead when hierarchy is enabled.  Let's get rid of it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1457531222-18130-5-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index ea1f722cffea..151afc1b6c2f 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -129,7 +129,6 @@ struct hist_entry {
 	void			*raw_data;
 	u32			raw_size;
 	void			*trace_output;
-	struct perf_hpp_fmt	*fmt;
 	struct perf_hpp_list	*hpp_list;
 	struct hist_entry	*parent_he;
 	union {

From 325a62834e81452d2a6e253444022cf493bbabfc Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 22:47:00 +0900
Subject: [PATCH 11/19] perf hists browser: Cleanup
 hist_browser__fprintf_hierarchy_entry()

The hist_browser__fprintf_hierarchy_entry() if to dump current output
into a file so it needs to be sync-ed with the corresponding function
hist_browser__show_hierarchy_entry().  So use hists->nr_hpp_node to
indent width and use first fmt_node to print overhead columns instead of
checking whether it's a sort entry (or dynamic entry).

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1457531222-18130-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e0e217ec856b..aed9c8f011f7 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1928,8 +1928,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
 
 static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
 						 struct hist_entry *he,
-						 FILE *fp, int level,
-						 int nr_sort_keys)
+						 FILE *fp, int level)
 {
 	char s[8192];
 	int printed = 0;
@@ -1939,23 +1938,20 @@ static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
 		.size = sizeof(s),
 	};
 	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *fmt_node;
 	bool first = true;
 	int ret;
-	int hierarchy_indent = nr_sort_keys * HIERARCHY_INDENT;
+	int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
 
 	printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
 
 	folded_sign = hist_entry__folded(he);
 	printed += fprintf(fp, "%c", folded_sign);
 
-	hists__for_each_format(he->hists, fmt) {
-		if (perf_hpp__should_skip(fmt, he->hists))
-			continue;
-
-		if (perf_hpp__is_sort_entry(fmt) ||
-		    perf_hpp__is_dynamic_entry(fmt))
-			break;
-
+	/* the first hpp_list_node is for overhead columns */
+	fmt_node = list_first_entry(&he->hists->hpp_formats,
+				    struct perf_hpp_list_node, list);
+	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
 		if (!first) {
 			ret = scnprintf(hpp.buf, hpp.size, "  ");
 			advance_hpp(&hpp, ret);
@@ -1992,7 +1988,6 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
 	struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
 						   browser->min_pcnt);
 	int printed = 0;
-	int nr_sort = browser->hists->nr_sort_keys;
 
 	while (nd) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
@@ -2000,8 +1995,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
 		if (symbol_conf.report_hierarchy) {
 			printed += hist_browser__fprintf_hierarchy_entry(browser,
 									 h, fp,
-									 h->depth,
-									 nr_sort);
+									 h->depth);
 		} else {
 			printed += hist_browser__fprintf_entry(browser, h, fp);
 		}

From 86e3ee5224c17b7967aac39aa15539393c144de7 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 22:47:01 +0900
Subject: [PATCH 12/19] perf tools: Remove nr_sort_keys field

The nr_sort_keys field is to carry the number of sort entries in a
hpp_list or hists to determine the depth of indentation of a hist entry.
As it's only used in hierarchy mode and now we have used nr_hpp_node for
this reason, there's no need to keep it anymore.  Let's get rid of it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1457531222-18130-7-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/hist.c   |  3 ---
 tools/perf/util/hist.h |  2 --
 tools/perf/util/sort.c | 26 --------------------------
 3 files changed, 31 deletions(-)

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index f03c4f70438f..3baeaa6e71b5 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -515,9 +515,6 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list,
 void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
 					struct perf_hpp_fmt *format)
 {
-	if (perf_hpp__is_sort_entry(format) || perf_hpp__is_dynamic_entry(format))
-		list->nr_sort_keys++;
-
 	list_add_tail(&format->sort_list, &list->sorts);
 }
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 6870a1bfd762..ead18c82294f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -79,7 +79,6 @@ struct hists {
 	int			socket_filter;
 	struct perf_hpp_list	*hpp_list;
 	struct list_head	hpp_formats;
-	int			nr_sort_keys;
 	int			nr_hpp_node;
 };
 
@@ -241,7 +240,6 @@ struct perf_hpp_fmt {
 struct perf_hpp_list {
 	struct list_head fields;
 	struct list_head sorts;
-	int nr_sort_keys;
 };
 
 extern struct perf_hpp_list perf_hpp_list;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 61c74022e47f..ced849e51e6b 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2703,29 +2703,6 @@ out:
 	return ret;
 }
 
-static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist)
-{
-	struct perf_evsel *evsel;
-
-	evlist__for_each(evlist, evsel) {
-		struct perf_hpp_fmt *fmt;
-		struct hists *hists = evsel__hists(evsel);
-
-		hists->nr_sort_keys = perf_hpp_list.nr_sort_keys;
-
-		/*
-		 * If dynamic entries were used, it might add multiple
-		 * entries to each evsel for a single field name.  Set
-		 * actual number of sort keys for each hists.
-		 */
-		perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
-			if (perf_hpp__is_dynamic_entry(fmt) &&
-			    !perf_hpp__defined_dynamic_entry(fmt, hists))
-				hists->nr_sort_keys--;
-		}
-	}
-}
-
 int setup_sorting(struct perf_evlist *evlist)
 {
 	int err;
@@ -2740,9 +2717,6 @@ int setup_sorting(struct perf_evlist *evlist)
 			return err;
 	}
 
-	if (evlist != NULL)
-		evlist__set_hists_nr_sort_keys(evlist);
-
 	reset_dimensions();
 
 	/*

From f7fb538afea55383a9383dac5c56887c601af5f4 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 22:47:02 +0900
Subject: [PATCH 13/19] perf tools: Recalc total periods using top-level
 entries in hierarchy

When hierarchy mode is enabled, each entry in a hierarchy level shares
the period.  IOW an upper level entry's period is the sum of lower level
entries.  Thus perf uses only one of them to calculate the total period
of hists.  It was lowest-level (leaf) entries but it has a problem when
it comes to filters.

If a filter is applied, entries in the same level will be filtered or
not.  But upper level entries still have period of their sum including
filtered one.  So total sum of upper level entries will not be same as
sum of lower level entries.

This resulted in entries having more than 100% of overhead and it can be
produced using perf top with filter(s).

Reported-and-Tested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1457531222-18130-8-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c | 44 ++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index a98f9345f686..290b3cbf6877 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1453,6 +1453,31 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
 	hists->stats.total_period += h->stat.period;
 }
 
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+	struct rb_node *node;
+	struct hist_entry *he;
+
+	node = rb_first(&hists->entries);
+
+	hists->stats.total_period = 0;
+	hists->stats.total_non_filtered_period = 0;
+
+	/*
+	 * recalculate total period using top-level entries only
+	 * since lower level entries only see non-filtered entries
+	 * but upper level entries have sum of both entries.
+	 */
+	while (node) {
+		he = rb_entry(node, struct hist_entry, rb_node);
+		node = rb_next(node);
+
+		hists->stats.total_period += he->stat.period;
+		if (!he->filtered)
+			hists->stats.total_non_filtered_period += he->stat.period;
+	}
+}
+
 static void hierarchy_insert_output_entry(struct rb_root *root,
 					  struct hist_entry *he)
 {
@@ -1518,11 +1543,6 @@ static void hists__hierarchy_output_resort(struct hists *hists,
 			continue;
 		}
 
-		/* only update stat for leaf entries to avoid duplication */
-		hists__inc_stats(hists, he);
-		if (!he->filtered)
-			hists__calc_col_len(hists, he);
-
 		if (!use_callchain)
 			continue;
 
@@ -1602,11 +1622,13 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
 	hists__reset_col_len(hists);
 
 	if (symbol_conf.report_hierarchy) {
-		return hists__hierarchy_output_resort(hists, prog,
-						      &hists->entries_collapsed,
-						      &hists->entries,
-						      min_callchain_hits,
-						      use_callchain);
+		hists__hierarchy_output_resort(hists, prog,
+					       &hists->entries_collapsed,
+					       &hists->entries,
+					       min_callchain_hits,
+					       use_callchain);
+		hierarchy_recalc_total_periods(hists);
+		return;
 	}
 
 	if (sort__need_collapse)
@@ -1927,6 +1949,8 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a
 		}
 	}
 
+	hierarchy_recalc_total_periods(hists);
+
 	/*
 	 * resort output after applying a new filter since filter in a lower
 	 * hierarchy can change periods in a upper hierarchy.

From 078b8d4a406fa8ce4a3c9d5145c27be1ed2b1dfd Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 23:20:51 +0900
Subject: [PATCH 14/19] perf tools: Add sort__has_comm variable

The sort__has_comm variable is to check whether the comm sort key is
given.  This is necessary to support thread filtering in the TUI hists
browser later.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1457533253-21419-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.c | 3 +++
 tools/perf/util/sort.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index ced849e51e6b..93fa136b0025 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -27,6 +27,7 @@ int		sort__has_sym = 0;
 int		sort__has_dso = 0;
 int		sort__has_socket = 0;
 int		sort__has_thread = 0;
+int		sort__has_comm = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
 
 /*
@@ -2262,6 +2263,8 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 			sort__has_socket = 1;
 		} else if (sd->entry == &sort_thread) {
 			sort__has_thread = 1;
+		} else if (sd->entry == &sort_comm) {
+			sort__has_comm = 1;
 		}
 
 		return __sort_dimension__add(sd, list, level);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 151afc1b6c2f..3f4e35998119 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -37,6 +37,7 @@ extern int sort__has_parent;
 extern int sort__has_sym;
 extern int sort__has_socket;
 extern int sort__has_thread;
+extern int sort__has_comm;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;

From 6962ccb37b50366014074aec6fd14497cf719642 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 10 Mar 2016 00:14:50 +0900
Subject: [PATCH 15/19] perf hists browser: Allow thread filtering for comm
 sort key

The commit 2eafd410e669 ("perf hists browser: Only 'Zoom into thread'
only when sort order has 'pid'") disabled thread filtering in hist
browser for the default sort key.  However the he->thread is still valid
even if 'pid' sort key is not given.  Only thing it should not use is
the pid (or tid) of the thread.  So allow to filter by thread when
'comm' sort key is given and show pid only if 'pid' sort key is given.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1457536490-24084-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 42 ++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index aed9c8f011f7..cb4191bf6cec 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2136,11 +2136,18 @@ static int hists__browser_title(struct hists *hists,
 	if (hists->uid_filter_str)
 		printed += snprintf(bf + printed, size - printed,
 				    ", UID: %s", hists->uid_filter_str);
-	if (thread)
-		printed += scnprintf(bf + printed, size - printed,
+	if (thread) {
+		if (sort__has_thread) {
+			printed += scnprintf(bf + printed, size - printed,
 				    ", Thread: %s(%d)",
 				     (thread->comm_set ? thread__comm_str(thread) : ""),
 				    thread->tid);
+		} else {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s",
+				     (thread->comm_set ? thread__comm_str(thread) : ""));
+		}
+	}
 	if (dso)
 		printed += scnprintf(bf + printed, size - printed,
 				    ", DSO: %s", dso->short_name);
@@ -2321,9 +2328,15 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
 		thread__zput(browser->hists->thread_filter);
 		ui_helpline__pop();
 	} else {
-		ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
-				   thread->comm_set ? thread__comm_str(thread) : "",
-				   thread->tid);
+		if (sort__has_thread) {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "",
+					   thread->tid);
+		} else {
+			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
+					   thread->comm_set ? thread__comm_str(thread) : "");
+		}
+
 		browser->hists->thread_filter = thread__get(thread);
 		perf_hpp__set_elide(HISTC_THREAD, false);
 		pstack__push(browser->pstack, &browser->hists->thread_filter);
@@ -2338,13 +2351,22 @@ static int
 add_thread_opt(struct hist_browser *browser, struct popup_action *act,
 	       char **optstr, struct thread *thread)
 {
-	if (!sort__has_thread || thread == NULL)
+	int ret;
+
+	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
 		return 0;
 
-	if (asprintf(optstr, "Zoom %s %s(%d) thread",
-		     browser->hists->thread_filter ? "out of" : "into",
-		     thread->comm_set ? thread__comm_str(thread) : "",
-		     thread->tid) < 0)
+	if (sort__has_thread) {
+		ret = asprintf(optstr, "Zoom %s %s(%d) thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "",
+			       thread->tid);
+	} else {
+		ret = asprintf(optstr, "Zoom %s %s thread",
+			       browser->hists->thread_filter ? "out of" : "into",
+			       thread->comm_set ? thread__comm_str(thread) : "");
+	}
+	if (ret < 0)
 		return 0;
 
 	act->thread = thread;

From 599a2f38a989a79df99838f22cb607f5e2b5b56c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 9 Mar 2016 23:20:53 +0900
Subject: [PATCH 16/19] perf hists browser: Check sort keys before hot key
 actions

The context menu in TUI hists browser checks corresponding sort keys
when creating the menu item.  But hotkey actions lacks these checks so
it can filter using incorrect info.

For example, default sort key of 'perf top' doesn't contain 'comm' or
'pid' sort key so each hist entry's thread info is not reliable.  Thus
it should prohibit using thread filter on 't' key.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1457533253-21419-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index cb4191bf6cec..4b9816555946 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2322,6 +2322,9 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
 {
 	struct thread *thread = act->thread;
 
+	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+		return 0;
+
 	if (browser->hists->thread_filter) {
 		pstack__remove(browser->pstack, &browser->hists->thread_filter);
 		perf_hpp__set_elide(HISTC_THREAD, false);
@@ -2379,6 +2382,9 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 {
 	struct map *map = act->ms.map;
 
+	if (!sort__has_dso || map == NULL)
+		return 0;
+
 	if (browser->hists->dso_filter) {
 		pstack__remove(browser->pstack, &browser->hists->dso_filter);
 		perf_hpp__set_elide(HISTC_DSO, false);
@@ -2530,6 +2536,9 @@ add_exit_opt(struct hist_browser *browser __maybe_unused,
 static int
 do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
 {
+	if (!sort__has_socket || act->socket < 0)
+		return 0;
+
 	if (browser->hists->socket_filter > -1) {
 		pstack__remove(browser->pstack, &browser->hists->socket_filter);
 		browser->hists->socket_filter = -1;

From 6b45f7b2a37b0e00693985fd0abfc8e0319f91ce Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 3 Mar 2016 15:57:35 -0800
Subject: [PATCH 17/19] perf stat: Document CSV format in manpage

With all the recently added fields in the perf stat CSV output we should
finally document them in the man page. Do this here.

v2: Fix fields in documentation (Jiri)
v3: fix order of fields again (Jiri)
v4: Change order again.
v5: Document more fields (Jiri)
v6: Move time stamp first
v7: More fixes (Jiri)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/1457049458-28956-5-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 14d9e8ffaff7..8812d7319edb 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -219,6 +219,29 @@ $ perf stat -- make -j
 
  Wall-clock time elapsed:   719.554352 msecs
 
+CSV FORMAT
+----------
+
+With -x, perf stat is able to output a not-quite-CSV format output
+Commas in the output are not put into "". To make it easy to parse
+it is recommended to use a different character like -x \;
+
+The fields are in this order:
+
+	- optional usec time stamp in fractions of second (with -I xxx)
+	- optional CPU, core, or socket identifier
+	- optional number of logical CPUs aggregated
+	- counter value
+	- unit of the counter value or empty
+	- event name
+	- run time of counter
+	- percentage of measurement time the counter was running
+	- optional variance if multiple values are collected with -r
+	- optional metric value
+	- optional unit of metric
+
+Additional metrics may be printed with all earlier fields being empty.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-list[1]

From 54b5091606c18f68a7fc8b4ab03ac4592c7d2922 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 3 Mar 2016 15:57:36 -0800
Subject: [PATCH 18/19] perf stat: Implement --metric-only mode

Add a new mode to only print metrics. Sometimes we don't care about the
raw values, just want the computed metrics. This allows more compact
printing, so with -I each sample is only a single line.  This also
allows easier plotting and processing with other tools.

The main target is with using --topdown, but it also works with -T and
standard perf stat. A few metrics are not supported.

To avoiding having to hardcode all the metrics in the code it uses a two
pass approach: first compute dummy metrics and only print the headers in
the print_metric callback. Then use the callback to print the actual
values.

There are some additional changes in the stat printout code to handle
all metrics being on a single line.

One issue is that the column code doesn't know in advance what events
are not supported by the CPU, and it would be hard to find out as this
could change based on dynamic conditions. That causes empty columns in
some cases.

The output can be fairly wide, often you may need more than 80 columns.

Example:

% perf stat -a -I 1000 --metric-only
     1.001452803 frontend cycles idle insn per cycle       stalled cycles per insn branch-misses of all branches
     1.001452803  158.91%               0.66                2.39                    2.92%
     2.002192321  180.63%               0.76                2.08                    2.96%
     3.003088282  150.59%               0.62                2.57                    2.84%
     4.004369835  196.20%               0.98                1.62                    3.79%
     5.005227314  231.98%               0.84                1.90                    4.71%

v2: Lots of updates.
v3: Use slightly narrower columns
v4: Add comment

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/1457049458-28956-6-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt |   4 +
 tools/perf/builtin-stat.c              | 211 +++++++++++++++++++++++--
 2 files changed, 205 insertions(+), 10 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 8812d7319edb..82f0951754dd 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -147,6 +147,10 @@ Print count deltas every N milliseconds (minimum: 10ms)
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
 	example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
+--metric-only::
+Only print computed metrics. Print them in a single line.
+Don't show any raw values. Not supported with -A or --per-thread.
+
 --per-socket::
 Aggregate counts per processor socket for system-wide mode measurements.  This
 is a useful mode to detect imbalance between sockets.  To enable this mode,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index baa82078c148..74508c9d0742 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -122,6 +122,7 @@ static bool			sync_run			= false;
 static unsigned int		initial_delay			= 0;
 static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
+static bool			metric_only			= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
@@ -827,6 +828,99 @@ static void print_metric_csv(void *ctx,
 	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
 }
 
+#define METRIC_ONLY_LEN 20
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+	if (!unit)
+		return false;
+	if (strstr(unit, "/sec") ||
+	    strstr(unit, "hz") ||
+	    strstr(unit, "Hz") ||
+	    strstr(unit, "CPUs utilized"))
+		return false;
+	return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+			   const char *unit)
+{
+	if (!strncmp(unit, "of all", 6)) {
+		snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+			 unit);
+		return buf;
+	}
+	return unit;
+}
+
+static void print_metric_only(void *ctx, const char *color, const char *fmt,
+			      const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	int n;
+	char buf[1024];
+	unsigned mlen = METRIC_ONLY_LEN;
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(buf, os->evsel, unit);
+	if (color)
+		n = color_fprintf(out, color, fmt, val);
+	else
+		n = fprintf(out, fmt, val);
+	if (n > METRIC_ONLY_LEN)
+		n = METRIC_ONLY_LEN;
+	if (mlen < strlen(unit))
+		mlen = strlen(unit) + 1;
+	fprintf(out, "%*s", mlen - n, "");
+}
+
+static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
+				  const char *fmt,
+				  const char *unit, double val)
+{
+	struct outstate *os = ctx;
+	FILE *out = os->fh;
+	char buf[64], *vals, *ends;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	snprintf(buf, sizeof buf, fmt, val);
+	vals = buf;
+	while (isspace(*vals))
+		vals++;
+	ends = vals;
+	while (isdigit(*ends) || *ends == '.')
+		ends++;
+	*ends = 0;
+	fprintf(out, "%s%s", vals, csv_sep);
+}
+
+static void new_line_metric(void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(void *ctx, const char *color __maybe_unused,
+				const char *fmt __maybe_unused,
+				const char *unit, double val __maybe_unused)
+{
+	struct outstate *os = ctx;
+	char tbuf[1024];
+
+	if (!valid_only_metric(unit))
+		return;
+	unit = fixunit(tbuf, os->evsel, unit);
+	if (csv_output)
+		fprintf(os->fh, "%s%s", unit, csv_sep);
+	else
+		fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
+}
+
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
@@ -921,9 +1015,16 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	print_metric_t pm = print_metric_std;
 	void (*nl)(void *);
 
-	nl = new_line_std;
+	if (metric_only) {
+		nl = new_line_metric;
+		if (csv_output)
+			pm = print_metric_only_csv;
+		else
+			pm = print_metric_only;
+	} else
+		nl = new_line_std;
 
-	if (csv_output) {
+	if (csv_output && !metric_only) {
 		static int aggr_fields[] = {
 			[AGGR_GLOBAL] = 0,
 			[AGGR_THREAD] = 1,
@@ -940,6 +1041,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 			os.nfields++;
 	}
 	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+		if (metric_only) {
+			pm(&os, NULL, "", "", 0);
+			return;
+		}
 		aggr_printout(counter, id, nr);
 
 		fprintf(stat_config.output, "%*s%s",
@@ -968,7 +1073,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 		return;
 	}
 
-	if (nsec_counter(counter))
+	if (metric_only)
+		/* nothing */;
+	else if (nsec_counter(counter))
 		nsec_printout(id, nr, counter, uval);
 	else
 		abs_printout(id, nr, counter, uval);
@@ -977,7 +1084,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	out.new_line = nl;
 	out.ctx = &os;
 
-	if (csv_output) {
+	if (csv_output && !metric_only) {
 		print_noise(counter, noise);
 		print_running(run, ena);
 	}
@@ -985,7 +1092,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 	perf_stat__print_shadow_stats(counter, uval,
 				first_shadow_cpu(counter, id),
 				&out);
-	if (!csv_output) {
+	if (!csv_output && !metric_only) {
 		print_noise(counter, noise);
 		print_running(run, ena);
 	}
@@ -1021,14 +1128,23 @@ static void print_aggr(char *prefix)
 	int cpu, s, s2, id, nr;
 	double uval;
 	u64 ena, run, val;
+	bool first;
 
 	if (!(aggr_map || aggr_get_id))
 		return;
 
 	aggr_update_shadow();
 
+	/*
+	 * With metric_only everything is on a single line.
+	 * Without each counter has its own line.
+	 */
 	for (s = 0; s < aggr_map->nr; s++) {
+		if (prefix && metric_only)
+			fprintf(output, "%s", prefix);
+
 		id = aggr_map->map[s];
+		first = true;
 		evlist__for_each(evsel_list, counter) {
 			val = ena = run = 0;
 			nr = 0;
@@ -1041,13 +1157,20 @@ static void print_aggr(char *prefix)
 				run += perf_counts(counter->counts, cpu, 0)->run;
 				nr++;
 			}
-			if (prefix)
+			if (first && metric_only) {
+				first = false;
+				aggr_printout(counter, id, nr);
+			}
+			if (prefix && !metric_only)
 				fprintf(output, "%s", prefix);
 
 			uval = val * counter->scale;
 			printout(id, nr, counter, uval, prefix, run, ena, 1.0);
-			fputc('\n', output);
+			if (!metric_only)
+				fputc('\n', output);
 		}
+		if (metric_only)
+			fputc('\n', output);
 	}
 }
 
@@ -1092,12 +1215,13 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 	avg_enabled = avg_stats(&ps->res_stats[1]);
 	avg_running = avg_stats(&ps->res_stats[2]);
 
-	if (prefix)
+	if (prefix && !metric_only)
 		fprintf(output, "%s", prefix);
 
 	uval = avg * counter->scale;
 	printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
-	fprintf(output, "\n");
+	if (!metric_only)
+		fprintf(output, "\n");
 }
 
 /*
@@ -1126,6 +1250,43 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 	}
 }
 
+static int aggr_header_lens[] = {
+	[AGGR_CORE] = 18,
+	[AGGR_SOCKET] = 12,
+	[AGGR_NONE] = 15,
+	[AGGR_THREAD] = 24,
+	[AGGR_GLOBAL] = 0,
+};
+
+static void print_metric_headers(char *prefix)
+{
+	struct perf_stat_output_ctx out;
+	struct perf_evsel *counter;
+	struct outstate os = {
+		.fh = stat_config.output
+	};
+
+	if (prefix)
+		fprintf(stat_config.output, "%s", prefix);
+
+	if (!csv_output)
+		fprintf(stat_config.output, "%*s",
+			aggr_header_lens[stat_config.aggr_mode], "");
+
+	/* Print metrics headers only */
+	evlist__for_each(evsel_list, counter) {
+		os.evsel = counter;
+		out.ctx = &os;
+		out.print_metric = print_metric_header;
+		out.new_line = new_line_metric;
+		os.evsel = counter;
+		perf_stat__print_shadow_stats(counter, 0,
+					      0,
+					      &out);
+	}
+	fputc('\n', stat_config.output);
+}
+
 static void print_interval(char *prefix, struct timespec *ts)
 {
 	FILE *output = stat_config.output;
@@ -1133,7 +1294,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 
 	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-	if (num_print_interval == 0 && !csv_output) {
+	if (num_print_interval == 0 && !csv_output && !metric_only) {
 		switch (stat_config.aggr_mode) {
 		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
@@ -1220,6 +1381,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	else
 		print_header(argc, argv);
 
+	if (metric_only) {
+		static int num_print_iv;
+
+		if (num_print_iv == 0)
+			print_metric_headers(prefix);
+		if (num_print_iv++ == 25)
+			num_print_iv = 0;
+		if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
+			fprintf(stat_config.output, "%s", prefix);
+	}
+
 	switch (stat_config.aggr_mode) {
 	case AGGR_CORE:
 	case AGGR_SOCKET:
@@ -1232,6 +1404,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	case AGGR_GLOBAL:
 		evlist__for_each(evsel_list, counter)
 			print_counter_aggr(counter, prefix);
+		if (metric_only)
+			fputc('\n', stat_config.output);
 		break;
 	case AGGR_NONE:
 		evlist__for_each(evsel_list, counter)
@@ -1356,6 +1530,8 @@ static const struct option stat_options[] = {
 		     "aggregate counts per thread", AGGR_THREAD),
 	OPT_UINTEGER('D', "delay", &initial_delay,
 		     "ms to wait before starting measurement after program start"),
+	OPT_BOOLEAN(0, "metric-only", &metric_only,
+			"Only print computed metrics. No raw values"),
 	OPT_END()
 };
 
@@ -1997,6 +2173,21 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		goto out;
 	}
 
+	if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
+		goto out;
+	}
+
+	if (metric_only && stat_config.aggr_mode == AGGR_NONE) {
+		fprintf(stderr, "--metric-only is not supported with -A\n");
+		goto out;
+	}
+
+	if (metric_only && run_count > 1) {
+		fprintf(stderr, "--metric-only is not supported with -r\n");
+		goto out;
+	}
+
 	if (output_fd < 0) {
 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
 		parse_options_usage(stat_usage, stat_options, "log-fd", 0);

From 206cab651d07563d766c7f4cb73f858c5df3dec5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 3 Mar 2016 15:57:37 -0800
Subject: [PATCH 19/19] perf stat: Add --metric-only support for -A

Add metric only support for -A too. This requires a new print function
that prints the metrics in the right order.

v2: Fix manpage
v3: Simplify nrcpus computation

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/1457049458-28956-7-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt |  2 +-
 tools/perf/builtin-stat.c              | 45 +++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 82f0951754dd..04f23b404bbc 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -149,7 +149,7 @@ The overhead percentage could be high in some cases, for instance with small, su
 
 --metric-only::
 Only print computed metrics. Print them in a single line.
-Don't show any raw values. Not supported with -A or --per-thread.
+Don't show any raw values. Not supported with --per-thread.
 
 --per-socket::
 Aggregate counts per processor socket for system-wide mode measurements.  This
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 74508c9d0742..1f19f2f999c8 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1250,10 +1250,40 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 	}
 }
 
+static void print_no_aggr_metric(char *prefix)
+{
+	int cpu;
+	int nrcpus = 0;
+	struct perf_evsel *counter;
+	u64 ena, run, val;
+	double uval;
+
+	nrcpus = evsel_list->cpus->nr;
+	for (cpu = 0; cpu < nrcpus; cpu++) {
+		bool first = true;
+
+		if (prefix)
+			fputs(prefix, stat_config.output);
+		evlist__for_each(evsel_list, counter) {
+			if (first) {
+				aggr_printout(counter, cpu, 0);
+				first = false;
+			}
+			val = perf_counts(counter->counts, cpu, 0)->val;
+			ena = perf_counts(counter->counts, cpu, 0)->ena;
+			run = perf_counts(counter->counts, cpu, 0)->run;
+
+			uval = val * counter->scale;
+			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
+		}
+		fputc('\n', stat_config.output);
+	}
+}
+
 static int aggr_header_lens[] = {
 	[AGGR_CORE] = 18,
 	[AGGR_SOCKET] = 12,
-	[AGGR_NONE] = 15,
+	[AGGR_NONE] = 6,
 	[AGGR_THREAD] = 24,
 	[AGGR_GLOBAL] = 0,
 };
@@ -1408,8 +1438,12 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 			fputc('\n', stat_config.output);
 		break;
 	case AGGR_NONE:
-		evlist__for_each(evsel_list, counter)
-			print_counter(counter, prefix);
+		if (metric_only)
+			print_no_aggr_metric(prefix);
+		else {
+			evlist__for_each(evsel_list, counter)
+				print_counter(counter, prefix);
+		}
 		break;
 	case AGGR_UNSET:
 	default:
@@ -2178,11 +2212,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		goto out;
 	}
 
-	if (metric_only && stat_config.aggr_mode == AGGR_NONE) {
-		fprintf(stderr, "--metric-only is not supported with -A\n");
-		goto out;
-	}
-
 	if (metric_only && run_count > 1) {
 		fprintf(stderr, "--metric-only is not supported with -r\n");
 		goto out;