From d646ae0a73deb0d80792a6a9c0757317ad8049c5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 16 Feb 2016 07:37:41 +0100 Subject: [PATCH 01/10] perf jvmti: Add check for java alternatives cmd in Makefile This patch modifies the jvmti makefile to check if the /usr/sbin/java-update-alternatives utility is present. If so, then use it, if not then use the altenatives command. This helps handle the difference between Ubuntu and Fedora Linux distributions. Signed-off-by: Stephane Eranian Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455604661-9357-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile index 5968f8332a28..0277a64b391b 100644 --- a/tools/perf/jvmti/Makefile +++ b/tools/perf/jvmti/Makefile @@ -35,8 +35,12 @@ SOLIBEXT=so # The following works at least on fedora 23, you may need the next # line for other distros. +ifeq (,$(wildcard /usr/sbin/update-java-alternatives)) JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') -#JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +else +JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +endif + # -lrt required in 32-bit mode for clock_gettime() LIBS=-lelf -lrt INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux From 975f14fa8f2996604f248552eee4403def34bf5e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 14 Feb 2016 17:03:42 +0100 Subject: [PATCH 02/10] tools lib api: Add debug output support Adding support for warning/info/debug output within libapi code. Adding following macros: pr_warning(fmt, ...) pr_info(fmt, ...) pr_debug(fmt, ...) Also adding libapi_set_print function to set above functions. This will be used in perf to set standard debug handlers for libapi. Adding 2 header files: debug.h - to be used outside libapi, contains libapi_set_print interface debug-internal.h - to be used within libapi, contains pr_warning/pr_info/pr_debug definitions Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455465826-8426-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Build | 1 + tools/lib/api/Makefile | 1 + tools/lib/api/debug-internal.h | 20 ++++++++++++++++++++ tools/lib/api/debug.c | 28 ++++++++++++++++++++++++++++ tools/lib/api/debug.h | 10 ++++++++++ 5 files changed, 60 insertions(+) create mode 100644 tools/lib/api/debug-internal.h create mode 100644 tools/lib/api/debug.c create mode 100644 tools/lib/api/debug.h diff --git a/tools/lib/api/Build b/tools/lib/api/Build index e8b8a23b9bf4..954c644f7ad9 100644 --- a/tools/lib/api/Build +++ b/tools/lib/api/Build @@ -1,3 +1,4 @@ libapi-y += fd/ libapi-y += fs/ libapi-y += cpu.o +libapi-y += debug.o diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index d85904dc9b38..bbc82c614bee 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -18,6 +18,7 @@ LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +CFLAGS += -I$(srctree)/tools/lib/api RM = rm -f diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h new file mode 100644 index 000000000000..188f7880eafe --- /dev/null +++ b/tools/lib/api/debug-internal.h @@ -0,0 +1,20 @@ +#ifndef __API_DEBUG_INTERNAL_H__ +#define __API_DEBUG_INTERNAL_H__ + +#include "debug.h" + +#define __pr(func, fmt, ...) \ +do { \ + if ((func)) \ + (func)("libapi: " fmt, ##__VA_ARGS__); \ +} while (0) + +extern libapi_print_fn_t __pr_warning; +extern libapi_print_fn_t __pr_info; +extern libapi_print_fn_t __pr_debug; + +#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) + +#endif /* __API_DEBUG_INTERNAL_H__ */ diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c new file mode 100644 index 000000000000..5fa5cf500a1f --- /dev/null +++ b/tools/lib/api/debug.c @@ -0,0 +1,28 @@ +#include +#include +#include "debug.h" +#include "debug-internal.h" + +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +libapi_print_fn_t __pr_warning = __base_pr; +libapi_print_fn_t __pr_info = __base_pr; +libapi_print_fn_t __pr_debug; + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug) +{ + __pr_warning = warn; + __pr_info = info; + __pr_debug = debug; +} diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h new file mode 100644 index 000000000000..a0872f68fc56 --- /dev/null +++ b/tools/lib/api/debug.h @@ -0,0 +1,10 @@ +#ifndef __API_DEBUG_H__ +#define __API_DEBUG_H__ + +typedef int (*libapi_print_fn_t)(const char *, ...); + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug); + +#endif /* __API_DEBUG_H__ */ From 607bfbd7ffc60156ae0831c917497dc91a57dd8d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 14 Feb 2016 17:03:43 +0100 Subject: [PATCH 03/10] tools lib api fs: Adopt filename__read_str from perf We already moved similar functions in here, also it'll be useful for sysfs__read_str addition in following patch. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455465826-8426-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/fs.c | 51 +++++++++++++++++++++++++++++++++++ tools/lib/api/fs/fs.h | 2 ++ tools/perf/util/trace-event.c | 1 + tools/perf/util/util.c | 48 --------------------------------- tools/perf/util/util.h | 1 - 5 files changed, 54 insertions(+), 49 deletions(-) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 459599d1b6c4..2cbf6773ca5d 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -13,6 +13,7 @@ #include #include "fs.h" +#include "debug-internal.h" #define _STR(x) #x #define STR(x) _STR(x) @@ -300,6 +301,56 @@ int filename__read_ull(const char *filename, unsigned long long *value) return err; } +#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ + +int filename__read_str(const char *filename, char **buf, size_t *sizep) +{ + size_t size = 0, alloc_size = 0; + void *bf = NULL, *nbf; + int fd, n, err = 0; + char sbuf[STRERR_BUFSIZE]; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -errno; + + do { + if (size == alloc_size) { + alloc_size += BUFSIZ; + nbf = realloc(bf, alloc_size); + if (!nbf) { + err = -ENOMEM; + break; + } + + bf = nbf; + } + + n = read(fd, bf + size, alloc_size - size); + if (n < 0) { + if (size) { + pr_warning("read failed %d: %s\n", errno, + strerror_r(errno, sbuf, sizeof(sbuf))); + err = 0; + } else + err = -errno; + + break; + } + + size += n; + } while (n > 0); + + if (!err) { + *sizep = size; + *buf = bf; + } else + free(bf); + + close(fd); + return err; +} + int sysfs__read_ull(const char *entry, unsigned long long *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index d024a7f682f6..858922b61141 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -2,6 +2,7 @@ #define __API_FS__ #include +#include /* * On most systems would have given us this, but not on some systems @@ -26,6 +27,7 @@ FS(tracefs) int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); +int filename__read_str(const char *filename, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 802bb868d446..8ae051e0ec79 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "trace-event.h" #include "machine.h" #include "util.h" diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b9e2843cfbe7..35b20dd454de 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -507,54 +507,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) return ret; } -int filename__read_str(const char *filename, char **buf, size_t *sizep) -{ - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; - - fd = open(filename, O_RDONLY); - if (fd < 0) - return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warning("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; - } else - free(bf); - - close(fd); - return err; -} - const char *get_filename_for_perf_kvm(void) { const char *filename; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index a8615816a00d..3dd04089e8be 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -303,7 +303,6 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool unwind_inlines); void free_srcline(char *srcline); -int filename__read_str(const char *filename, char **buf, size_t *sizep); int perf_event_paranoid(void); void mem_bswap_64(void *src, int byte_size); From 51c0396c600f49de9c3ff8f6fd83055de3709c3d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 14 Feb 2016 17:03:44 +0100 Subject: [PATCH 04/10] tools lib api fs: Add sysfs__read_str function Adding sysfs__read_str function to ease up reading string files from sysfs. New interface is: int sysfs__read_str(const char *entry, char **buf, size_t *sizep); Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455465826-8426-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/fs.c | 13 +++++++++++++ tools/lib/api/fs/fs.h | 1 + 2 files changed, 14 insertions(+) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 2cbf6773ca5d..ef78c22ff44d 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -377,6 +377,19 @@ int sysfs__read_int(const char *entry, int *value) return filename__read_int(path, value); } +int sysfs__read_str(const char *entry, char **buf, size_t *sizep) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_str(path, buf, sizep); +} + int sysctl__read_int(const char *sysctl, int *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 858922b61141..9f6598098dc5 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -32,4 +32,5 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); +int sysfs__read_str(const char *entry, char **buf, size_t *sizep); #endif /* __API_FS__ */ From bedbdd4297224efcd7d668198e32fab14b76b98b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Feb 2016 11:48:38 -0300 Subject: [PATCH 05/10] perf debug: Rename __eprintf(va_list args) to veprintf Adhering to the naming convention used when va_args is in a printf like function, e.g. stdio.h. Cc: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-b5l3wt77ct28dcnriguxtvn6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 86d9c7302598..d6c8d2b2cd70 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -22,7 +22,7 @@ int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static int _eprintf(int level, int var, const char *fmt, va_list args) +int veprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; @@ -36,24 +36,19 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } -int veprintf(int level, int var, const char *fmt, va_list args) -{ - return _eprintf(level, var, fmt, args); -} - int eprintf(int level, int var, const char *fmt, ...) { va_list args; int ret; va_start(args, fmt); - ret = _eprintf(level, var, fmt, args); + ret = veprintf(level, var, fmt, args); va_end(args); return ret; } -static int __eprintf_time(u64 t, const char *fmt, va_list args) +static int veprintf_time(u64 t, const char *fmt, va_list args) { int ret = 0; u64 secs, usecs, nsecs = t; @@ -75,7 +70,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) if (var >= level) { va_start(args, fmt); - ret = __eprintf_time(t, fmt, args); + ret = veprintf_time(t, fmt, args); va_end(args); } @@ -91,7 +86,7 @@ void pr_stat(const char *fmt, ...) va_list args; va_start(args, fmt); - _eprintf(1, verbose, fmt, args); + veprintf(1, verbose, fmt, args); va_end(args); eprintf(1, verbose, "\n"); } From dd629cc0975349c99d5483402bca1ef16313c209 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 14 Feb 2016 17:03:45 +0100 Subject: [PATCH 06/10] perf tools: Initialize libapi debug output Setting libapi debug output functions to use perf functions. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1455465826-8426-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 2 ++ tools/perf/util/debug.c | 21 +++++++++++++++++++++ tools/perf/util/debug.h | 1 + 3 files changed, 24 insertions(+) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index a929618b8eb6..144047c396f0 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -613,6 +613,8 @@ int main(int argc, const char **argv) */ pthread__block_sigwinch(); + perf_debug_setup(); + while (1) { static int done_help; int was_alias = run_argv(&argc, &argv); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index d6c8d2b2cd70..ff7e86ad1b06 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "cache.h" #include "color.h" @@ -187,3 +188,23 @@ int perf_debug_option(const char *str) free(s); return 0; } + +#define DEBUG_WRAPPER(__n, __l) \ +static int pr_ ## __n ## _wrapper(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + ret = veprintf(__l, verbose, fmt, args); \ + va_end(args); \ + return ret; \ +} + +DEBUG_WRAPPER(warning, 0); +DEBUG_WRAPPER(debug, 1); + +void perf_debug_setup(void) +{ + libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 8b9a088c32ab..14bafda79eda 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -53,5 +53,6 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__( int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +void perf_debug_setup(void); #endif /* __PERF_DEBUG_H */ From 720e98b5faf10cfd12b7821dbdcc41c9747bd13e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 16 Feb 2016 16:01:43 +0100 Subject: [PATCH 07/10] perf tools: Add perf data cache feature Storing CPU cache details under perf data. It's stored as new HEADER_CACHE feature and it's displayed under header info with -I option: $ perf report --header-only -I ... # CPU cache info: # L1 Data 32K [0-1] # L1 Instruction 32K [0-1] # L1 Data 32K [2-3] # L1 Instruction 32K [2-3] # L2 Unified 256K [0-1] # L2 Unified 256K [2-3] # L3 Unified 4096K [0-3] ... All distinct caches are stored/displayed. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20160216150143.GA7119@krava.brq.redhat.com [ Fixed leak on process_caches(), s/cache_level/cpu_cache_level/g ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 13 ++ tools/perf/util/env.h | 15 +++ tools/perf/util/header.c | 270 +++++++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 1 + 4 files changed, 299 insertions(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 7dd5939dea2e..49a11d9d8b8f 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -6,6 +6,8 @@ struct perf_env perf_env; void perf_env__exit(struct perf_env *env) { + int i; + zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); @@ -19,6 +21,10 @@ void perf_env__exit(struct perf_env *env) zfree(&env->numa_nodes); zfree(&env->pmu_mappings); zfree(&env->cpu); + + for (i = 0; i < env->caches_cnt; i++) + cpu_cache_level__free(&env->caches[i]); + zfree(&env->caches); } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) @@ -75,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) env->nr_cpus_avail = nr_cpus; return 0; } + +void cpu_cache_level__free(struct cpu_cache_level *cache) +{ + free(cache->type); + free(cache->map); + free(cache->size); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 0132b9557c02..56cffb60a0b4 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -1,11 +1,23 @@ #ifndef __PERF_ENV_H #define __PERF_ENV_H +#include + struct cpu_topology_map { int socket_id; int core_id; }; +struct cpu_cache_level { + u32 level; + u32 line_size; + u32 sets; + u32 ways; + char *type; + char *size; + char *map; +}; + struct perf_env { char *hostname; char *os_release; @@ -31,6 +43,8 @@ struct perf_env { char *numa_nodes; char *pmu_mappings; struct cpu_topology_map *cpu; + struct cpu_cache_level *caches; + int caches_cnt; }; extern struct perf_env perf_env; @@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); +void cpu_cache_level__free(struct cpu_cache_level *cache); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f50b7235ecb6..73e38e472ecd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -23,6 +23,8 @@ #include "strbuf.h" #include "build-id.h" #include "data.h" +#include +#include "asm/bug.h" /* * magic2 = "PERFILE2" @@ -868,6 +870,199 @@ static int write_auxtrace(int fd, struct perf_header *h, return err; } +static int cpu_cache_level__sort(const void *a, const void *b) +{ + struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; + struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b; + + return cache_a->level - cache_b->level; +} + +static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b) +{ + if (a->level != b->level) + return false; + + if (a->line_size != b->line_size) + return false; + + if (a->sets != b->sets) + return false; + + if (a->ways != b->ways) + return false; + + if (strcmp(a->type, b->type)) + return false; + + if (strcmp(a->size, b->size)) + return false; + + if (strcmp(a->map, b->map)) + return false; + + return true; +} + +static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level) +{ + char path[PATH_MAX], file[PATH_MAX]; + struct stat st; + size_t len; + + scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level); + scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path); + + if (stat(file, &st)) + return 1; + + scnprintf(file, PATH_MAX, "%s/level", path); + if (sysfs__read_int(file, (int *) &cache->level)) + return -1; + + scnprintf(file, PATH_MAX, "%s/coherency_line_size", path); + if (sysfs__read_int(file, (int *) &cache->line_size)) + return -1; + + scnprintf(file, PATH_MAX, "%s/number_of_sets", path); + if (sysfs__read_int(file, (int *) &cache->sets)) + return -1; + + scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path); + if (sysfs__read_int(file, (int *) &cache->ways)) + return -1; + + scnprintf(file, PATH_MAX, "%s/type", path); + if (sysfs__read_str(file, &cache->type, &len)) + return -1; + + cache->type[len] = 0; + cache->type = rtrim(cache->type); + + scnprintf(file, PATH_MAX, "%s/size", path); + if (sysfs__read_str(file, &cache->size, &len)) { + free(cache->type); + return -1; + } + + cache->size[len] = 0; + cache->size = rtrim(cache->size); + + scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); + if (sysfs__read_str(file, &cache->map, &len)) { + free(cache->map); + free(cache->type); + return -1; + } + + cache->map[len] = 0; + cache->map = rtrim(cache->map); + return 0; +} + +static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c) +{ + fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map); +} + +static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp) +{ + u32 i, cnt = 0; + long ncpus; + u32 nr, cpu; + u16 level; + + ncpus = sysconf(_SC_NPROCESSORS_CONF); + if (ncpus < 0) + return -1; + + nr = (u32)(ncpus & UINT_MAX); + + for (cpu = 0; cpu < nr; cpu++) { + for (level = 0; level < 10; level++) { + struct cpu_cache_level c; + int err; + + err = cpu_cache_level__read(&c, cpu, level); + if (err < 0) + return err; + + if (err == 1) + break; + + for (i = 0; i < cnt; i++) { + if (cpu_cache_level__cmp(&c, &caches[i])) + break; + } + + if (i == cnt) + caches[cnt++] = c; + else + cpu_cache_level__free(&c); + + if (WARN_ONCE(cnt == size, "way too many cpu caches..")) + goto out; + } + } + out: + *cntp = cnt; + return 0; +} + +#define MAX_CACHES 2000 + +static int write_cache(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + struct cpu_cache_level caches[MAX_CACHES]; + u32 cnt = 0, i, version = 1; + int ret; + + ret = build_caches(caches, MAX_CACHES, &cnt); + if (ret) + goto out; + + qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort); + + ret = do_write(fd, &version, sizeof(u32)); + if (ret < 0) + goto out; + + ret = do_write(fd, &cnt, sizeof(u32)); + if (ret < 0) + goto out; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level *c = &caches[i]; + + #define _W(v) \ + ret = do_write(fd, &c->v, sizeof(u32)); \ + if (ret < 0) \ + goto out; + + _W(level) + _W(line_size) + _W(sets) + _W(ways) + #undef _W + + #define _W(v) \ + ret = do_write_string(fd, (const char *) c->v); \ + if (ret < 0) \ + goto out; + + _W(type) + _W(size) + _W(map) + #undef _W + } + +out: + for (i = 0; i < cnt; i++) + cpu_cache_level__free(&caches[i]); + return ret; +} + static int write_stat(int fd __maybe_unused, struct perf_header *h __maybe_unused, struct perf_evlist *evlist __maybe_unused) @@ -1172,6 +1367,18 @@ static void print_stat(struct perf_header *ph __maybe_unused, fprintf(fp, "# contains stat data\n"); } +static void print_cache(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp __maybe_unused) +{ + int i; + + fprintf(fp, "# CPU cache info:\n"); + for (i = 0; i < ph->env.caches_cnt; i++) { + fprintf(fp, "# "); + cpu_cache_level__fprintf(fp, &ph->env.caches[i]); + } +} + static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { @@ -1920,6 +2127,68 @@ static int process_auxtrace(struct perf_file_section *section, return err; } +static int process_cache(struct perf_file_section *section __maybe_unused, + struct perf_header *ph __maybe_unused, int fd __maybe_unused, + void *data __maybe_unused) +{ + struct cpu_cache_level *caches; + u32 cnt, i, version; + + if (readn(fd, &version, sizeof(version)) != sizeof(version)) + return -1; + + if (ph->needs_swap) + version = bswap_32(version); + + if (version != 1) + return -1; + + if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt)) + return -1; + + if (ph->needs_swap) + cnt = bswap_32(cnt); + + caches = zalloc(sizeof(*caches) * cnt); + if (!caches) + return -1; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level c; + + #define _R(v) \ + if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\ + goto out_free_caches; \ + if (ph->needs_swap) \ + c.v = bswap_32(c.v); \ + + _R(level) + _R(line_size) + _R(sets) + _R(ways) + #undef _R + + #define _R(v) \ + c.v = do_read_string(fd, ph); \ + if (!c.v) \ + goto out_free_caches; + + _R(type) + _R(size) + _R(map) + #undef _R + + caches[i] = c; + } + + ph->env.caches = caches; + ph->env.caches_cnt = cnt; + return 0; +out_free_caches: + free(caches); + return -1; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1962,6 +2231,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPP(HEADER_GROUP_DESC, group_desc), FEAT_OPP(HEADER_AUXTRACE, auxtrace), FEAT_OPA(HEADER_STAT, stat), + FEAT_OPF(HEADER_CACHE, cache), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index cff9892452ee..3d87ca823c0a 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -32,6 +32,7 @@ enum { HEADER_GROUP_DESC, HEADER_AUXTRACE, HEADER_STAT, + HEADER_CACHE, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; From 140aeadc1fb51d38130fd06a272a381f22e3070c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Jan 2016 09:06:49 -0800 Subject: [PATCH 08/10] perf stat: Abstract stat metrics printing Abstract the printing of shadow metrics. Instead of every metric calling fprintf directly and taking care of indentation, use two call backs: one to print metrics and another to start a new line. This will allow adding metrics to CSV mode and also using them for other purposes. The computation of padding is now done in the central callback, instead of every metric doing it manually. This makes it easier to add new metrics. v2: Refactor functions, printout now does more. Move shadow printing. Improve fallback callbacks. Don't use void * callback data. v3: Remove unnecessary hunk. Add typedef for new_line v4: Remove unnecessary hunk. Don't print metrics for CSV/interval mode yet. Move printout change to separate patch. v5: Fix bisect bugs. Avoid bogus frontend cycles printing. Fix indentation in different aggregation modes. v6: Delay newline handling Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1454173616-17710-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 71 +++++++++++- tools/perf/util/stat-shadow.c | 211 +++++++++++++++++++--------------- tools/perf/util/stat.h | 15 ++- 3 files changed, 194 insertions(+), 103 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 038e877081b6..fabcadba1f19 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -735,6 +735,58 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) } } +struct outstate { + FILE *fh; + bool newline; +}; + +#define METRIC_LEN 35 + +static void new_line_std(void *ctx) +{ + struct outstate *os = ctx; + + os->newline = true; +} + +static void do_new_line_std(struct outstate *os) +{ + fputc('\n', os->fh); + if (stat_config.aggr_mode == AGGR_NONE) + fprintf(os->fh, " "); + if (stat_config.aggr_mode == AGGR_CORE) + fprintf(os->fh, " "); + if (stat_config.aggr_mode == AGGR_SOCKET) + fprintf(os->fh, " "); + fprintf(os->fh, " "); +} + +static void print_metric_std(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + bool newline = os->newline; + + os->newline = false; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%-*s", METRIC_LEN, ""); + return; + } + + if (newline) + do_new_line_std(os); + + n = fprintf(out, " # "); + if (color) + n += color_fprintf(out, color, fmt, val); + else + n += fprintf(out, fmt, val); + fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -795,20 +847,27 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) static void printout(int id, int nr, struct perf_evsel *counter, double uval) { - int cpu = cpu_map__id_to_cpu(id); + struct outstate os = { .fh = stat_config.output }; + struct perf_stat_output_ctx out; + print_metric_t pm = print_metric_std; + void (*nl)(void *); - if (stat_config.aggr_mode == AGGR_GLOBAL) - cpu = 0; + nl = new_line_std; if (nsec_counter(counter)) nsec_printout(id, nr, counter, uval); else abs_printout(id, nr, counter, uval); + out.print_metric = pm; + out.new_line = nl; + out.ctx = &os; + if (!csv_output && !stat_config.interval) - perf_stat__print_shadow_stats(stat_config.output, counter, - uval, cpu, - stat_config.aggr_mode); + perf_stat__print_shadow_stats(counter, uval, + stat_config.aggr_mode == AGGR_GLOBAL ? 0 : + cpu_map__id_to_cpu(id), + &out); } static void print_aggr(char *prefix) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 6ac03146889d..4d8f18581b9b 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -137,9 +137,10 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static void print_stalled_cycles_frontend(FILE *out, int cpu, +static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel - __maybe_unused, double avg) + __maybe_unused, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -152,14 +153,17 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " frontend cycles idle "); + if (ratio) + out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", + ratio); + else + out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); } -static void print_stalled_cycles_backend(FILE *out, int cpu, +static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel - __maybe_unused, double avg) + __maybe_unused, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -172,14 +176,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " backend cycles idle "); + out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio); } -static void print_branch_misses(FILE *out, int cpu, +static void print_branch_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -192,14 +195,13 @@ static void print_branch_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all branches "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); } -static void print_l1_dcache_misses(FILE *out, int cpu, +static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -212,14 +214,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-dcache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); } -static void print_l1_icache_misses(FILE *out, int cpu, +static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -231,15 +232,13 @@ static void print_l1_icache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-icache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); } -static void print_dtlb_cache_misses(FILE *out, int cpu, +static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -251,15 +250,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all dTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); } -static void print_itlb_cache_misses(FILE *out, int cpu, +static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -271,15 +268,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all iTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); } -static void print_ll_cache_misses(FILE *out, int cpu, +static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -291,15 +286,15 @@ static void print_ll_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all LL-cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr) +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out) { + void *ctxp = out->ctx; + print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; int ctx = evsel_context(evsel); @@ -307,119 +302,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %5.2f insns per cycle ", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "insn per cycle", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + out->new_line(ctxp); if (total && avg) { ratio = total / avg; - fprintf(out, "\n"); - if (aggr == AGGR_NONE) - fprintf(out, " "); - fprintf(out, " # %5.2f stalled cycles per insn", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "stalled cycles per insn", + ratio); + } else { + print_metric(ctxp, NULL, NULL, + "stalled cycles per insn", 0); } - - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && - runtime_branches_stats[ctx][cpu].n != 0) { - print_branch_misses(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { + if (runtime_branches_stats[ctx][cpu].n != 0) + print_branch_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_dcache_stats[ctx][cpu].n != 0) { - print_l1_dcache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_dcache_stats[ctx][cpu].n != 0) + print_l1_dcache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[ctx][cpu].n != 0) { - print_l1_icache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_icache_stats[ctx][cpu].n != 0) + print_l1_icache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[ctx][cpu].n != 0) { - print_dtlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[ctx][cpu].n != 0) { - print_itlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_itlb_cache_stats[ctx][cpu].n != 0) + print_itlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[ctx][cpu].n != 0) { - print_ll_cache_misses(out, cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && - runtime_cacherefs_stats[ctx][cpu].n != 0) { + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_ll_cache_stats[ctx][cpu].n != 0) + print_ll_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); if (total) ratio = avg * 100 / total; - fprintf(out, " # %8.3f %% of all cache refs ", ratio); - + if (runtime_cacherefs_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.3f %%", + "of all cache refs", ratio); + else + print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(out, cpu, evsel, avg); + print_stalled_cycles_frontend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(out, cpu, evsel, avg); + print_stalled_cycles_backend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = avg_stats(&runtime_nsecs_stats[cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %8.3f GHz ", ratio); + print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) - fprintf(out, - " # %5.2f%% transactional cycles ", - 100.0 * (avg / total)); + print_metric(ctxp, NULL, + "%7.2f%%", "transactional cycles", + 100.0 * (avg / total)); + else + print_metric(ctxp, NULL, NULL, "transactional cycles", + 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (total2 < avg) total2 = avg; if (total) - fprintf(out, - " # %5.2f%% aborted cycles ", + print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", 100.0 * ((total2-avg) / total)); - } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + else + print_metric(ctxp, NULL, NULL, "aborted cycles", 0); + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / transaction ", ratio); - } else if (perf_stat_evsel__is(evsel, ELISION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.0f", + "cycles / transaction", ratio); + else + print_metric(ctxp, NULL, NULL, "cycles / transaction", + 0); + } else if (perf_stat_evsel__is(evsel, ELISION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / elision ", ratio); + print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) - fprintf(out, " # %8.3f CPUs utilized ", avg / ratio); + print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", + avg / ratio); else - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; + char unit_buf[10]; total = avg_stats(&runtime_nsecs_stats[cpu]); @@ -429,9 +450,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, ratio *= 1000; unit = 'K'; } - - fprintf(out, " # %8.3f %c/sec ", ratio, unit); + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); + print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, NULL, 0); } } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2af63c9cb59f..f02af68adc04 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -68,11 +68,22 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel); extern struct stats walltime_nsecs_stats; +typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, + const char *fmt, double val); +typedef void (*new_line_t )(void *ctx); + void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, int cpu); -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr); +struct perf_stat_output_ctx { + void *ctx; + print_metric_t print_metric; + new_line_t new_line; +}; + +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); From f94833929032ad23412d3970beed6769a2fdbc19 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Jan 2016 09:06:50 -0800 Subject: [PATCH 09/10] perf stat: Add support for metrics in interval mode Now that we can modify the metrics printout functions easily, it's straight forward to support metric printing for interval mode. All that is needed is to print the time stamp on every new line. Pass the prefix into the context and print it out. v2: Move wrong hunk to here. Committer note: Before: [root@jouet ~]# perf stat -I 1000 -e instructions,cycles sleep 1 # time counts unit events 1.000168216 538,913 instructions 1.000168216 748,765 cycles 1.000660048 153,741 instructions 1.000660048 214,066 cycles After: # perf stat -I 1000 -e instructions,cycles sleep 1 # time counts unit events 1.000215928 519,620 instructions # 0.69 insn per cycle 1.000215928 752,003 cycles 1.000946033 148,502 instructions # 0.33 insn per cycle 1.000946033 160,104 cycles Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1454173616-17710-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fabcadba1f19..5710bdb058d2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -738,6 +738,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) struct outstate { FILE *fh; bool newline; + const char *prefix; }; #define METRIC_LEN 35 @@ -752,6 +753,7 @@ static void new_line_std(void *ctx) static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); + fputs(os->prefix, os->fh); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, " "); if (stat_config.aggr_mode == AGGR_CORE) @@ -845,10 +847,14 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } -static void printout(int id, int nr, struct perf_evsel *counter, double uval) +static void printout(int id, int nr, struct perf_evsel *counter, double uval, + char *prefix) { - struct outstate os = { .fh = stat_config.output }; struct perf_stat_output_ctx out; + struct outstate os = { + .fh = stat_config.output, + .prefix = prefix ? prefix : "" + }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -863,7 +869,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval) out.new_line = nl; out.ctx = &os; - if (!csv_output && !stat_config.interval) + if (!csv_output) perf_stat__print_shadow_stats(counter, uval, stat_config.aggr_mode == AGGR_GLOBAL ? 0 : cpu_map__id_to_cpu(id), @@ -923,7 +929,7 @@ static void print_aggr(char *prefix) continue; } uval = val * counter->scale; - printout(id, nr, counter, uval); + printout(id, nr, counter, uval, prefix); if (!csv_output) print_noise(counter, 1.0); @@ -954,7 +960,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval); + printout(thread, 0, counter, uval, prefix); if (!csv_output) print_noise(counter, 1.0); @@ -1004,7 +1010,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) } uval = avg * counter->scale; - printout(-1, 0, counter, uval); + printout(-1, 0, counter, uval, prefix); print_noise(counter, avg); @@ -1057,7 +1063,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix) } uval = val * counter->scale; - printout(cpu, 0, counter, uval); + printout(cpu, 0, counter, uval, prefix); if (!csv_output) print_noise(counter, 1.0); print_running(run, ena); From cb110f471025f3278978aaccb18f3164ea2b8189 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Jan 2016 09:06:51 -0800 Subject: [PATCH 10/10] perf stat: Move noise/running printing into printout Move the running/noise printing into printout to avoid duplicated code in the callers. v2: Merged with other patches. Remove unnecessary hunk. Readd hunk that ended in earlier patch. v3: Fix noise/running output in CSV mode v4: Merge with later patch that also moves not supported printing. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1454173616-17710-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 123 ++++++++++---------------------------- 1 file changed, 32 insertions(+), 91 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5710bdb058d2..15e4fcf34e0c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -848,7 +848,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } static void printout(int id, int nr, struct perf_evsel *counter, double uval, - char *prefix) + char *prefix, u64 run, u64 ena, double noise) { struct perf_stat_output_ctx out; struct outstate os = { @@ -860,6 +860,30 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, nl = new_line_std; + if (run == 0 || ena == 0) { + aggr_printout(counter, id, nr); + + fprintf(stat_config.output, "%*s%s", + csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + csv_sep); + + fprintf(stat_config.output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(stat_config.output, "%*s", + csv_output ? 0 : -25, + perf_evsel__name(counter)); + + if (counter->cgrp) + fprintf(stat_config.output, "%s%s", + csv_sep, counter->cgrp->name); + + print_running(run, ena); + return; + } + if (nsec_counter(counter)) nsec_printout(id, nr, counter, uval); else @@ -874,6 +898,9 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, stat_config.aggr_mode == AGGR_GLOBAL ? 0 : cpu_map__id_to_cpu(id), &out); + + print_noise(counter, noise); + print_running(run, ena); } static void print_aggr(char *prefix) @@ -904,36 +931,8 @@ static void print_aggr(char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (run == 0 || ena == 0) { - aggr_printout(counter, id, nr); - - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); - - print_running(run, ena); - fputc('\n', output); - continue; - } uval = val * counter->scale; - printout(id, nr, counter, uval, prefix); - if (!csv_output) - print_noise(counter, 1.0); - - print_running(run, ena); + printout(id, nr, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } } @@ -960,12 +959,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix); - - if (!csv_output) - print_noise(counter, 1.0); - - print_running(run, ena); + printout(thread, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } } @@ -979,7 +973,6 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) FILE *output = stat_config.output; struct perf_stat_evsel *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); - int scaled = counter->counts->scaled; double uval; double avg_enabled, avg_running; @@ -989,32 +982,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (scaled == -1 || !counter->supported) { - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", csv_sep, counter->cgrp->name); - - print_running(avg_running, avg_enabled); - fputc('\n', output); - return; - } - uval = avg * counter->scale; - printout(-1, 0, counter, uval, prefix); - - print_noise(counter, avg); - - print_running(avg_running, avg_enabled); + printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); fprintf(output, "\n"); } @@ -1037,36 +1006,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (run == 0 || ena == 0) { - fprintf(output, "CPU%*d%s%*s%s", - csv_output ? 0 : -4, - perf_evsel__cpus(counter)->map[cpu], csv_sep, - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); - - print_running(run, ena); - fputc('\n', output); - continue; - } - uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix); - if (!csv_output) - print_noise(counter, 1.0); - print_running(run, ena); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); }