From 0c69b93112428d43b8c103d032143ea89b895d43 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 2 Jul 2019 14:12:40 +0200 Subject: [PATCH 01/17] objtool: Fix build by linking against tools/lib/ctype.o sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix objtool build, because it adds _ctype dependency via isspace call patch. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: André Goddard Rosa Cc: Clark Williams Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Fixes: 7bd330de43fd ("tools lib: Adopt skip_spaces() from the kernel sources") Link: http://lkml.kernel.org/r/20190702121240.GB12694@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/objtool/Build | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/objtool/Build b/tools/objtool/Build index 749becdf5b90..8dc4f0848362 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build @@ -9,6 +9,7 @@ objtool-y += special.o objtool-y += objtool.o objtool-y += libstring.o +objtool-y += libctype.o objtool-y += str_error_r.o CFLAGS += -I$(srctree)/tools/lib @@ -17,6 +18,10 @@ $(OUTPUT)libstring.o: ../lib/string.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) +$(OUTPUT)libctype.o: ../lib/ctype.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + $(OUTPUT)str_error_r.o: ../lib/str_error_r.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) From 0cec2447e7d209b77e52c6ec62169cc564df54e7 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 28 Jun 2019 17:22:58 +0800 Subject: [PATCH 02/17] perf symbol: Create block_info structure 'perf diff' currently can only diff symbols(functions). We should expand it to diff cycles of individual programs blocks as reported by timed LBR. This would allow to identify changes in specific code accurately. We need a new structure to maintain the basic block information, such as, symbol(function), start/end address of this block, cycles. This patch creates this structure and with some ops. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1561713784-30533-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 22 ++++++++++++++++++++++ tools/perf/util/symbol.h | 23 +++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 46d2c03814a1..ae2ce255e848 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2351,3 +2351,25 @@ struct mem_info *mem_info__new(void) refcount_set(&mi->refcnt, 1); return mi; } + +struct block_info *block_info__get(struct block_info *bi) +{ + if (bi) + refcount_inc(&bi->refcnt); + return bi; +} + +void block_info__put(struct block_info *bi) +{ + if (bi && refcount_dec_and_test(&bi->refcnt)) + free(bi); +} + +struct block_info *block_info__new(void) +{ + struct block_info *bi = zalloc(sizeof(*bi)); + + if (bi) + refcount_set(&bi->refcnt, 1); + return bi; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 9a8fe012910a..12755b42ea93 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -131,6 +131,17 @@ struct mem_info { refcount_t refcnt; }; +struct block_info { + struct symbol *sym; + u64 start; + u64 end; + u64 cycles; + u64 cycles_aggr; + int num; + int num_aggr; + refcount_t refcnt; +}; + struct addr_location { struct machine *machine; struct thread *thread; @@ -332,4 +343,16 @@ static inline void __mem_info__zput(struct mem_info **mi) #define mem_info__zput(mi) __mem_info__zput(&mi) +struct block_info *block_info__new(void); +struct block_info *block_info__get(struct block_info *bi); +void block_info__put(struct block_info *bi); + +static inline void __block_info__zput(struct block_info **bi) +{ + block_info__put(*bi); + *bi = NULL; +} + +#define block_info__zput(bi) __block_info__zput(&bi) + #endif /* __PERF_SYMBOL */ From fe96245c7f38c4ea92c1c599b43f176e27d9921e Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 28 Jun 2019 17:22:59 +0800 Subject: [PATCH 03/17] perf hists: Add block_info in hist_entry The block_info contains the program basic block information, i.e, contains the start address and the end address of this basic block and how much cycles it takes. We need to compare, sort and even print out the basic block by some orders, i.e. sort by cycles. For this purpose, we add block_info field to hist_entry. In order not to impact current interface, we creates a new function hists__add_entry_block. v6: --- Remove the 'ops' argument in hists__add_entry_block Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1561713784-30533-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 20 ++++++++++++++++++-- tools/perf/util/hist.h | 5 +++++ tools/perf/util/sort.h | 1 + 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index fb3271fd420c..c4defff151ed 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -574,6 +574,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, */ mem_info__zput(entry->mem_info); + block_info__zput(entry->block_info); + /* If the map of an existing hist_entry has * become out-of-date due to an exec() or * similar, update it. Otherwise we will @@ -645,6 +647,7 @@ __hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, + struct block_info *block_info, struct perf_sample *sample, bool sample_self, struct hist_entry_ops *ops) @@ -677,6 +680,7 @@ __hists__add_entry(struct hists *hists, .hists = hists, .branch_info = bi, .mem_info = mi, + .block_info = block_info, .transaction = sample->transaction, .raw_data = sample->raw_data, .raw_size = sample->raw_size, @@ -699,7 +703,7 @@ struct hist_entry *hists__add_entry(struct hists *hists, struct perf_sample *sample, bool sample_self) { - return __hists__add_entry(hists, al, sym_parent, bi, mi, + return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, sample, sample_self, NULL); } @@ -712,10 +716,22 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists, struct perf_sample *sample, bool sample_self) { - return __hists__add_entry(hists, al, sym_parent, bi, mi, + return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, sample, sample_self, ops); } +struct hist_entry *hists__add_entry_block(struct hists *hists, + struct addr_location *al, + struct block_info *block_info) +{ + struct hist_entry entry = { + .block_info = block_info, + .hists = hists, + }, *he = hists__findnew_entry(hists, &entry, al, false); + + return he; +} + static int iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, struct addr_location *al __maybe_unused) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 76ff6c6d03b8..c670122b4e40 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -16,6 +16,7 @@ struct addr_location; struct map_symbol; struct mem_info; struct branch_info; +struct block_info; struct symbol; enum hist_filter { @@ -149,6 +150,10 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists, struct perf_sample *sample, bool sample_self); +struct hist_entry *hists__add_entry_block(struct hists *hists, + struct addr_location *al, + struct block_info *bi); + int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ce376a73f964..43623fa874b2 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -144,6 +144,7 @@ struct hist_entry { long time; struct hists *hists; struct mem_info *mem_info; + struct block_info *block_info; void *raw_data; u32 raw_size; int num_res; From 30d815534e63d737f8004414d12b1679c032e0dd Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 28 Jun 2019 17:23:00 +0800 Subject: [PATCH 04/17] perf diff: Check if all data files with branch stacks We will expand perf diff to support diff cycles of individual programs blocks, so it requires all data files having branch stacks. This patch checks HEADER_BRANCH_STACK in header, and only set the flag has_br_stack when HEADER_BRANCH_STACK are set in all data files. v2: --- Move check_file_brstack() from __cmd_diff() to cmd_diff(). Because later patch will check flag 'has_br_stack' before ui_init(). Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1561713784-30533-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 6e7920793729..a7e04202955c 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -32,6 +32,7 @@ struct perf_diff { struct perf_time_interval *ptime_range; int range_size; int range_num; + bool has_br_stack; }; /* Diff command specific HPP columns. */ @@ -873,6 +874,31 @@ static int parse_time_str(struct data__file *d, char *abstime_ostr, return ret; } +static int check_file_brstack(void) +{ + struct data__file *d; + bool has_br_stack; + int i; + + data__for_each_file(i, d) { + d->session = perf_session__new(&d->data, false, &pdiff.tool); + if (!d->session) { + pr_err("Failed to open %s\n", d->data.path); + return -1; + } + + has_br_stack = perf_header__has_feat(&d->session->header, + HEADER_BRANCH_STACK); + perf_session__delete(d->session); + if (!has_br_stack) + return 0; + } + + /* Set only all files having branch stacks */ + pdiff.has_br_stack = true; + return 0; +} + static int __cmd_diff(void) { struct data__file *d; @@ -1487,6 +1513,9 @@ int cmd_diff(int argc, const char **argv) if (data_init(argc, argv) < 0) return -1; + if (check_file_brstack() < 0) + return -1; + if (ui_init() < 0) return -1; From 99150a1faab2963d3f5bf353354afe79bdddb75f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 28 Jun 2019 17:23:01 +0800 Subject: [PATCH 05/17] perf diff: Use hists to manage basic blocks per symbol The hist__account_cycles() can account cycles per basic block. The basic block information is saved in cycles_hist structure. This patch processes each symbol, get basic blocks from cycles_hist and add the basic block entries to a new hists (in 'struct block_hist'). Using a hists is because we need to compare, sort and print the basic blocks later. v6: --- Since 'ops' argument is removed from hists__add_entry_block, update the code accordingly. No functional change. v5: --- Since now we still carry block_info in 'struct hist_entry' we don't need to use our own new/free ops for hist entries. And the block_info is released in hist_entry__delete. v3: --- 1. In v2, we put block stuffs in 'struct hist_entry', but it's not a good design. In v3, we create a new 'struct block_hist' and cast the 'struct hist_entry' to 'struct block_hist' in some places, which can avoid adding new stuffs in 'struct hist_entry'. 2. abs() -> labs(), in block_cycles_diff_cmp(). v2: --- v1 adds the basic block entries to per data-file hists but v2 adds the basic block entries to per symbol hists. That is to keep current perf-diff format. Will show the result in next patches. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1561713784-30533-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 190 +++++++++++++++++++++++++++++++++++++- tools/perf/util/hist.c | 3 + tools/perf/util/sort.h | 12 +++ 3 files changed, 202 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index a7e04202955c..83b8c0f3fb16 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -20,6 +20,7 @@ #include "util/data.h" #include "util/config.h" #include "util/time-utils.h" +#include "util/annotate.h" #include #include @@ -87,11 +88,14 @@ static s64 compute_wdiff_w2; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +static struct addr_location dummy_al; + enum { COMPUTE_DELTA, COMPUTE_RATIO, COMPUTE_WEIGHTED_DIFF, COMPUTE_DELTA_ABS, + COMPUTE_CYCLES, COMPUTE_MAX, }; @@ -100,6 +104,7 @@ const char *compute_names[COMPUTE_MAX] = { [COMPUTE_DELTA_ABS] = "delta-abs", [COMPUTE_RATIO] = "ratio", [COMPUTE_WEIGHTED_DIFF] = "wdiff", + [COMPUTE_CYCLES] = "cycles", }; static int compute = COMPUTE_DELTA_ABS; @@ -234,6 +239,8 @@ static int setup_compute(const struct option *opt, const char *str, for (i = 0; i < COMPUTE_MAX; i++) if (!strcmp(cstr, compute_names[i])) { *cp = i; + if (i == COMPUTE_CYCLES) + break; return setup_compute_opt(option); } @@ -336,6 +343,31 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair, return -1; } +static void *block_hist_zalloc(size_t size) +{ + struct block_hist *bh; + + bh = zalloc(size + sizeof(*bh)); + if (!bh) + return NULL; + + return &bh->he; +} + +static void block_hist_free(void *he) +{ + struct block_hist *bh; + + bh = container_of(he, struct block_hist, he); + hists__delete_entries(&bh->block_hists); + free(bh); +} + +struct hist_entry_ops block_hist_ops = { + .new = block_hist_zalloc, + .free = block_hist_free, +}; + static int diff__process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -363,9 +395,22 @@ static int diff__process_sample_event(struct perf_tool *tool, goto out_put; } - if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) { - pr_warning("problem incrementing symbol period, skipping event\n"); - goto out_put; + if (compute != COMPUTE_CYCLES) { + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, + true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } + } else { + if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL, + NULL, NULL, sample, true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } + + hist__account_cycles(sample->branch_stack, &al, sample, false); } /* @@ -475,6 +520,127 @@ static void hists__baseline_only(struct hists *hists) } } +static int64_t block_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct block_info *bi_l = left->block_info; + struct block_info *bi_r = right->block_info; + int cmp; + + if (!bi_l->sym || !bi_r->sym) { + if (!bi_l->sym && !bi_r->sym) + return 0; + else if (!bi_l->sym) + return -1; + else + return 1; + } + + if (bi_l->sym == bi_r->sym) { + if (bi_l->start == bi_r->start) { + if (bi_l->end == bi_r->end) + return 0; + else + return (int64_t)(bi_r->end - bi_l->end); + } else + return (int64_t)(bi_r->start - bi_l->start); + } else { + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + return cmp; + } + + if (bi_l->sym->start != bi_r->sym->start) + return (int64_t)(bi_r->sym->start - bi_l->sym->start); + + return (int64_t)(bi_r->sym->end - bi_l->sym->end); +} + +static int64_t block_cycles_diff_cmp(struct hist_entry *left, + struct hist_entry *right) +{ + bool pairs_left = hist_entry__has_pairs(left); + bool pairs_right = hist_entry__has_pairs(right); + s64 l, r; + + if (!pairs_left && !pairs_right) + return 0; + + l = labs(left->diff.cycles); + r = labs(right->diff.cycles); + return r - l; +} + +static int64_t block_sort(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return block_cycles_diff_cmp(right, left); +} + +static void init_block_hist(struct block_hist *bh) +{ + __hists__init(&bh->block_hists, &bh->block_list); + perf_hpp_list__init(&bh->block_list); + + INIT_LIST_HEAD(&bh->block_fmt.list); + INIT_LIST_HEAD(&bh->block_fmt.sort_list); + bh->block_fmt.cmp = block_cmp; + bh->block_fmt.sort = block_sort; + perf_hpp_list__register_sort_field(&bh->block_list, + &bh->block_fmt); + bh->valid = true; +} + +static void init_block_info(struct block_info *bi, struct symbol *sym, + struct cyc_hist *ch, int offset) +{ + bi->sym = sym; + bi->start = ch->start; + bi->end = offset; + bi->cycles = ch->cycles; + bi->cycles_aggr = ch->cycles_aggr; + bi->num = ch->num; + bi->num_aggr = ch->num_aggr; +} + +static int process_block_per_sym(struct hist_entry *he) +{ + struct annotation *notes; + struct cyc_hist *ch; + struct block_hist *bh; + + if (!he->ms.map || !he->ms.sym) + return 0; + + notes = symbol__annotation(he->ms.sym); + if (!notes || !notes->src || !notes->src->cycles_hist) + return 0; + + bh = container_of(he, struct block_hist, he); + init_block_hist(bh); + + ch = notes->src->cycles_hist; + for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { + if (ch[i].num_aggr) { + struct block_info *bi; + struct hist_entry *he_block; + + bi = block_info__new(); + if (!bi) + return -1; + + init_block_info(bi, he->ms.sym, &ch[i], i); + he_block = hists__add_entry_block(&bh->block_hists, + &dummy_al, bi); + if (!he_block) { + block_info__put(bi); + return -1; + } + } + } + + return 0; +} + static void hists__precompute(struct hists *hists) { struct rb_root_cached *root; @@ -494,6 +660,9 @@ static void hists__precompute(struct hists *hists) he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); + if (compute == COMPUTE_CYCLES) + process_block_per_sym(he); + data__for_each_file_new(i, d) { pair = get_pair_data(he, d); if (!pair) @@ -510,6 +679,9 @@ static void hists__precompute(struct hists *hists) case COMPUTE_WEIGHTED_DIFF: compute_wdiff(he, pair); break; + case COMPUTE_CYCLES: + process_block_per_sym(pair); + break; default: BUG_ON(1); } @@ -1411,6 +1583,13 @@ static int ui_init(void) case COMPUTE_DELTA_ABS: fmt->sort = hist_entry__cmp_delta_abs_idx; break; + case COMPUTE_CYCLES: + /* + * Should set since 'fmt->sort' is called without + * checking valid during sorting + */ + fmt->sort = hist_entry__cmp_nop; + break; default: BUG_ON(1); } @@ -1507,6 +1686,8 @@ int cmd_diff(int argc, const char **argv) if (quiet) perf_quiet_option(); + symbol__annotation_init(); + if (symbol__init(NULL) < 0) return -1; @@ -1516,6 +1697,9 @@ int cmd_diff(int argc, const char **argv) if (check_file_brstack() < 0) return -1; + if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack) + return -1; + if (ui_init() < 0) return -1; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index c4defff151ed..a6ba7d470eb8 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1229,6 +1229,9 @@ void hist_entry__delete(struct hist_entry *he) mem_info__zput(he->mem_info); } + if (he->block_info) + block_info__zput(he->block_info); + zfree(&he->res_samples); zfree(&he->stat_acc); free_srcline(he->srcline); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 43623fa874b2..a0f232151d6f 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -79,6 +79,9 @@ struct hist_entry_diff { /* HISTC_WEIGHTED_DIFF */ s64 wdiff; + + /* PERF_HPP_DIFF__CYCLES */ + s64 cycles; }; }; @@ -286,6 +289,15 @@ struct sort_entry { u8 se_width_idx; }; +struct block_hist { + struct hists block_hists; + struct perf_hpp_list block_list; + struct perf_hpp_fmt block_fmt; + int block_idx; + bool valid; + struct hist_entry he; +}; + extern struct sort_entry sort_thread; extern struct list_head hist_entry__sort_list; From f3810817b20645ffae809feb30e9fe260fbd6c4d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 28 Jun 2019 17:23:02 +0800 Subject: [PATCH 06/17] perf diff: Link same basic blocks among different data The target is to compare the performance difference (cycles diff) for the same basic blocks in different data files. The same basic block means same function, same start address and same end address. This patch finds the same basic blocks from different data files and link them together and resort by the cycles diff. v3: --- The block stuffs are maintained by new structure 'block_hist', so this patch is update accordingly. v2: --- Since now the basic block hists is changed to per symbol, the patch only links the basic block hists for the same symbol in different data files. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1561713784-30533-6-git-send-email-yao.jin@linux.intel.com [ sym->name is an array, not a pointer, so no need to check it for NULL, fixes de build in some distros ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 87 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 83b8c0f3fb16..fafb7b3f58fb 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -641,6 +641,82 @@ static int process_block_per_sym(struct hist_entry *he) return 0; } +static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) +{ + struct block_info *bi_a = a->block_info; + struct block_info *bi_b = b->block_info; + int cmp; + + if (!bi_a->sym || !bi_b->sym) + return -1; + + cmp = strcmp(bi_a->sym->name, bi_b->sym->name); + + if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) + return 0; + + return -1; +} + +static struct hist_entry *get_block_pair(struct hist_entry *he, + struct hists *hists_pair) +{ + struct rb_root_cached *root = hists_pair->entries_in; + struct rb_node *next = rb_first_cached(root); + int cmp; + + while (next != NULL) { + struct hist_entry *he_pair = rb_entry(next, struct hist_entry, + rb_node_in); + + next = rb_next(&he_pair->rb_node_in); + + cmp = block_pair_cmp(he_pair, he); + if (!cmp) + return he_pair; + } + + return NULL; +} + +static void compute_cycles_diff(struct hist_entry *he, + struct hist_entry *pair) +{ + pair->diff.computed = true; + if (pair->block_info->num && he->block_info->num) { + pair->diff.cycles = + pair->block_info->cycles_aggr / pair->block_info->num_aggr - + he->block_info->cycles_aggr / he->block_info->num_aggr; + } +} + +static void block_hists_match(struct hists *hists_base, + struct hists *hists_pair) +{ + struct rb_root_cached *root = hists_base->entries_in; + struct rb_node *next = rb_first_cached(root); + + while (next != NULL) { + struct hist_entry *he = rb_entry(next, struct hist_entry, + rb_node_in); + struct hist_entry *pair = get_block_pair(he, hists_pair); + + next = rb_next(&he->rb_node_in); + + if (pair) { + hist_entry__add_pair(pair, he); + compute_cycles_diff(he, pair); + } + } +} + +static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) +{ + /* Skip the calculation of column length in output_resort */ + he->filtered = true; + return 0; +} + static void hists__precompute(struct hists *hists) { struct rb_root_cached *root; @@ -653,6 +729,7 @@ static void hists__precompute(struct hists *hists) next = rb_first_cached(root); while (next != NULL) { + struct block_hist *bh, *pair_bh; struct hist_entry *he, *pair; struct data__file *d; int i; @@ -681,6 +758,16 @@ static void hists__precompute(struct hists *hists) break; case COMPUTE_CYCLES: process_block_per_sym(pair); + bh = container_of(he, struct block_hist, he); + pair_bh = container_of(pair, struct block_hist, + he); + + if (bh->valid && pair_bh->valid) { + block_hists_match(&bh->block_hists, + &pair_bh->block_hists); + hists__output_resort_cb(&pair_bh->block_hists, + NULL, filter_cb); + } break; default: BUG_ON(1); From b10c78c50964da952e6d4db78a3692ab051e6638 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 28 Jun 2019 17:23:03 +0800 Subject: [PATCH 07/17] perf diff: Print the basic block cycles diff $ perf record -b ./div $ perf record -b ./div Following is the default perf diff output $ perf diff # Event 'cycles' # # Baseline Delta Abs Shared Object Symbol # ........ ......... ................ .................................. # 48.75% +0.33% div [.] main 8.21% -0.20% div [.] compute_flag 19.02% -0.12% libc-2.23.so [.] __random_r 16.17% -0.09% libc-2.23.so [.] __random 2.27% -0.03% div [.] rand@plt +0.02% [i915] [k] gen8_irq_handler 5.52% +0.02% libc-2.23.so [.] rand This patch creates a new computation selection 'cycles'. $ perf diff -c cycles # Event 'cycles' # # Baseline [Program Block Range] Cycles Diff Shared Object Symbol # ........ ....................................... ......................................... # 48.75% [div.c:42 -> div.c:45] 147 div [.] main 48.75% [div.c:31 -> div.c:40] 4 div [.] main 48.75% [div.c:40 -> div.c:40] 0 div [.] main 48.75% [div.c:42 -> div.c:42] 0 div [.] main 48.75% [div.c:42 -> div.c:44] 0 div [.] main 19.02% [random_r.c:357 -> random_r.c:360] 0 libc-2.23.so [.] __random_r 19.02% [random_r.c:357 -> random_r.c:373] 0 libc-2.23.so [.] __random_r 19.02% [random_r.c:357 -> random_r.c:376] 0 libc-2.23.so [.] __random_r 19.02% [random_r.c:357 -> random_r.c:380] 0 libc-2.23.so [.] __random_r 19.02% [random_r.c:357 -> random_r.c:392] 0 libc-2.23.so [.] __random_r 16.17% [random.c:288 -> random.c:291] 0 libc-2.23.so [.] __random 16.17% [random.c:288 -> random.c:291] 0 libc-2.23.so [.] __random 16.17% [random.c:288 -> random.c:295] 0 libc-2.23.so [.] __random 16.17% [random.c:288 -> random.c:297] 0 libc-2.23.so [.] __random 16.17% [random.c:291 -> random.c:291] 0 libc-2.23.so [.] __random 16.17% [random.c:293 -> random.c:293] 0 libc-2.23.so [.] __random 8.21% [div.c:22 -> div.c:22] 148 div [.] compute_flag 8.21% [div.c:22 -> div.c:25] 0 div [.] compute_flag 8.21% [div.c:27 -> div.c:28] 0 div [.] compute_flag 5.52% [rand.c:26 -> rand.c:27] 0 libc-2.23.so [.] rand 5.52% [rand.c:26 -> rand.c:28] 0 libc-2.23.so [.] rand 2.27% [rand@plt+0 -> rand@plt+0] 0 div [.] rand@plt 0.01% [entry_64.S:694 -> entry_64.S:694] 16 [vmlinux] [k] native_irq_return_iret 0.00% [fair.c:7676 -> fair.c:7665] 162 [vmlinux] [k] update_blocked_averages "[Program Block Range]" indicates the range of program basic block (start -> end). If we can find the source line it prints the source line otherwise it prints the symbol+offset instead. v4: --- Use source lines or symbol+offset to indicate the basic block. It should be easier to understand. v3: --- Cast 'struct hist_entry' to 'struct block_hist' in hist_entry__block_fprintf. Use symbol_conf.report_block to check if executing hist_entry__block_fprintf. v2: --- Keep standard perf diff format and display the 'Baseline' and 'Shared Object'. The output is sorted by "Baseline" and the basic blocks in the same function are sorted by cycles diff. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1561713784-30533-7-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 80 +++++++++++++++++++++++++++++++++-- tools/perf/ui/stdio/hist.c | 27 ++++++++++++ tools/perf/util/hist.c | 18 ++++++++ tools/perf/util/hist.h | 3 ++ tools/perf/util/srcline.c | 4 +- tools/perf/util/symbol_conf.h | 4 +- 6 files changed, 130 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index fafb7b3f58fb..f924b46910b5 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -21,6 +21,7 @@ #include "util/config.h" #include "util/time-utils.h" #include "util/annotate.h" +#include "util/map.h" #include #include @@ -46,6 +47,7 @@ enum { PERF_HPP_DIFF__WEIGHTED_DIFF, PERF_HPP_DIFF__FORMULA, PERF_HPP_DIFF__DELTA_ABS, + PERF_HPP_DIFF__CYCLES, PERF_HPP_DIFF__MAX_INDEX }; @@ -114,6 +116,7 @@ static int compute_2_hpp[COMPUTE_MAX] = { [COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS, [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO, [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF, + [COMPUTE_CYCLES] = PERF_HPP_DIFF__CYCLES, }; #define MAX_COL_WIDTH 70 @@ -152,6 +155,10 @@ static struct header_column { [PERF_HPP_DIFF__FORMULA] = { .name = "Formula", .width = MAX_COL_WIDTH, + }, + [PERF_HPP_DIFF__CYCLES] = { + .name = "[Program Block Range] Cycles Diff", + .width = 70, } }; @@ -239,8 +246,6 @@ static int setup_compute(const struct option *opt, const char *str, for (i = 0; i < COMPUTE_MAX; i++) if (!strcmp(cstr, compute_names[i])) { *cp = i; - if (i == COMPUTE_CYCLES) - break; return setup_compute_opt(option); } @@ -980,6 +985,9 @@ static void hists__process(struct hists *hists) hists__precompute(hists); hists__output_resort(hists, NULL); + if (compute == COMPUTE_CYCLES) + symbol_conf.report_block = true; + hists__fprintf(hists, !quiet, 0, 0, 0, stdout, !symbol_conf.use_callchain); } @@ -1235,7 +1243,7 @@ static const struct option options[] = { OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, "Show only items with match in baseline"), OPT_CALLBACK('c', "compute", &compute, - "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)", + "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs),cycles", "Entries differential computation selection", setup_compute), OPT_BOOLEAN('p', "period", &show_period, @@ -1313,6 +1321,49 @@ static int hpp__entry_baseline(struct hist_entry *he, char *buf, size_t size) return ret; } +static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, + struct perf_hpp *hpp, int width) +{ + struct block_hist *bh = container_of(he, struct block_hist, he); + struct block_hist *bh_pair = container_of(pair, struct block_hist, he); + struct hist_entry *block_he; + struct block_info *bi; + char buf[128]; + char *start_line, *end_line; + + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); + if (!block_he) { + hpp->skip = true; + return 0; + } + + /* + * Avoid printing the warning "addr2line_init failed for ..." + */ + symbol_conf.disable_add2line_warn = true; + + bi = block_he->block_info; + + start_line = map__srcline(he->ms.map, bi->sym->start + bi->start, + he->ms.sym); + + end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, + he->ms.sym); + + if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", + start_line, end_line, block_he->diff.cycles); + } else { + scnprintf(buf, sizeof(buf), "[%7lx -> %7lx] %4ld", + bi->start, bi->end, block_he->diff.cycles); + } + + free_srcline(start_line); + free_srcline(end_line); + + return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + static int __hpp__color_compare(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he, int comparison_method) @@ -1324,8 +1375,17 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, s64 wdiff; char pfmt[20] = " "; - if (!pair) + if (!pair) { + if (comparison_method == COMPUTE_CYCLES) { + struct block_hist *bh; + + bh = container_of(he, struct block_hist, he); + if (bh->block_idx) + hpp->skip = true; + } + goto no_print; + } switch (comparison_method) { case COMPUTE_DELTA: @@ -1360,6 +1420,8 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, return color_snprintf(hpp->buf, hpp->size, get_percent_color(wdiff), pfmt, wdiff); + case COMPUTE_CYCLES: + return cycles_printf(he, pair, hpp, dfmt->header_width); default: BUG_ON(1); } @@ -1389,6 +1451,12 @@ static int hpp__color_wdiff(struct perf_hpp_fmt *fmt, return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF); } +static int hpp__color_cycles(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp, struct hist_entry *he) +{ + return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); +} + static void hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) { @@ -1590,6 +1658,10 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->color = hpp__color_delta; fmt->sort = hist_entry__cmp_delta_abs; break; + case PERF_HPP_DIFF__CYCLES: + fmt->color = hpp__color_cycles; + fmt->sort = hist_entry__cmp_nop; + break; default: fmt->sort = hist_entry__cmp_nop; break; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 9eb0131c3ade..89393c79d870 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -531,6 +531,30 @@ out: return printed; } +static int hist_entry__block_fprintf(struct hist_entry *he, + char *bf, size_t size, + FILE *fp) +{ + struct block_hist *bh = container_of(he, struct block_hist, he); + int ret = 0; + + for (unsigned int i = 0; i < bh->block_hists.nr_entries; i++) { + struct perf_hpp hpp = { + .buf = bf, + .size = size, + .skip = false, + }; + + bh->block_idx = i; + hist_entry__snprintf(he, &hpp); + + if (!hpp.skip) + ret += fprintf(fp, "%s\n", bf); + } + + return ret; +} + static int hist_entry__fprintf(struct hist_entry *he, size_t size, char *bf, size_t bfsz, FILE *fp, bool ignore_callchains) @@ -550,6 +574,9 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, if (symbol_conf.report_hierarchy) return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); + if (symbol_conf.report_block) + return hist_entry__block_fprintf(he, bf, size, fp); + hist_entry__snprintf(he, &hpp); ret = fprintf(fp, "%s\n", bf); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a6ba7d470eb8..27cecb59f866 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -376,6 +376,24 @@ void hists__delete_entries(struct hists *hists) } } +struct hist_entry *hists__get_entry(struct hists *hists, int idx) +{ + struct rb_node *next = rb_first_cached(&hists->entries); + struct hist_entry *n; + int i = 0; + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + if (i == idx) + return n; + + next = rb_next(&n->rb_node); + i++; + } + + return NULL; +} + /* * histogram, sorted on item, collects periods */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index c670122b4e40..24635f36148d 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -183,6 +183,8 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__delete_entries(struct hists *hists); void hists__output_recalc_col_len(struct hists *hists, int max_rows); +struct hist_entry *hists__get_entry(struct hists *hists, int idx); + u64 hists__total_period(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); @@ -248,6 +250,7 @@ struct perf_hpp { size_t size; const char *sep; void *ptr; + bool skip; }; struct perf_hpp_fmt { diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 1824cabe3512..dcad75daf5e4 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -11,6 +11,7 @@ #include "util/util.h" #include "util/debug.h" #include "util/callchain.h" +#include "util/symbol_conf.h" #include "srcline.h" #include "string2.h" #include "symbol.h" @@ -288,7 +289,8 @@ static int addr2line(const char *dso_name, u64 addr, } if (a2l == NULL) { - pr_warning("addr2line_init failed for %s\n", dso_name); + if (!symbol_conf.disable_add2line_warn) + pr_warning("addr2line_init failed for %s\n", dso_name); return 0; } diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 382ba63fc554..e6880789864c 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -39,7 +39,9 @@ struct symbol_conf { hide_unresolved, raw_trace, report_hierarchy, - inline_name; + report_block, + inline_name, + disable_add2line_warn; const char *vmlinux_name, *kallsyms_name, *source_prefix, From c8f7bc1a080b081a178bff20356cb7575d385f84 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 28 Jun 2019 17:23:04 +0800 Subject: [PATCH 08/17] perf diff: Documentation -c cycles option Documentation the new computation selection 'cycles'. v4: --- Change the column 'Block cycles diff [start:end]' to '[Program Block Range] Cycles Diff' Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1561713784-30533-8-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-diff.txt | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index facd91e4e945..d5cc15e651cf 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -90,9 +90,10 @@ OPTIONS -c:: --compute:: - Differential computation selection - delta, ratio, wdiff, delta-abs - (default is delta-abs). Default can be changed using diff.compute - config option. See COMPARISON METHODS section for more info. + Differential computation selection - delta, ratio, wdiff, cycles, + delta-abs (default is delta-abs). Default can be changed using + diff.compute config option. See COMPARISON METHODS section for + more info. -p:: --period:: @@ -280,6 +281,16 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as: - WEIGHT-A being the weight of the data file - WEIGHT-B being the weight of the baseline data file +cycles +~~~~~~ +If specified the '[Program Block Range] Cycles Diff' column is displayed. +It displays the cycles difference of same program basic block amongst +two perf.data. The program basic block is the code between two branches. + +'[Program Block Range]' indicates the range of a program basic block. +Source line is reported if it can be found otherwise uses symbol+offset +instead. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] From 730670b1d108c4a8aa1924762738ca38593ee44c Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 28 Jun 2019 22:35:49 +0800 Subject: [PATCH 09/17] perf pmu: Support more complex PMU event aliasing The jevent "Unit" field is used for uncore PMU alias definition. The form uncore_pmu_example_X is supported, where "X" is a wildcard, to support multiple instances of the same PMU in a system. Unfortunately this format not suitable for all uncore PMUs; take the Hisi DDRC uncore PMU for example, where the name is in the form hisi_scclX_ddrcY. For for current jevent parsing, we would be required to hardcode an uncore alias translation for each possible value of X. This is not scalable. Instead, add support for "Unit" field in the form "hisi_sccl,ddrc", where we can match by hisi_scclX and ddrcY. Tokens in Unit field are delimited by ','. Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ben Hutchings Cc: Hendrik Brueckner Cc: Kan Liang Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Thomas Richter Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1561732552-143038-2-git-send-email-john.garry@huawei.com [ Shut up older gcc complianing about the last arg to strtok_r() being uninitialized, set that tmp to NULL ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 46 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8139a1f3ed39..55f4de6442e3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -701,6 +701,46 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } +static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +{ + char *tmp = NULL, *tok, *str; + bool res; + + str = strdup(pmu_name); + if (!str) + return false; + + /* + * uncore alias may be from different PMU with common prefix + */ + tok = strtok_r(str, ",", &tmp); + if (strncmp(pmu_name, tok, strlen(tok))) { + res = false; + goto out; + } + + /* + * Match more complex aliases where the alias name is a comma-delimited + * list of tokens, orderly contained in the matching PMU name. + * + * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we + * match "socket" in "socketX_pmunameY" and then "pmuname" in + * "pmunameY". + */ + for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { + name = strstr(name, tok); + if (!name) { + res = false; + goto out; + } + } + + res = true; +out: + free(str); + return res; +} + /* * From the pmu_events_map, find the table of PMU events that corresponds * to the current running CPU. Then, add all PMU events from that table @@ -731,12 +771,8 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) break; } - /* - * uncore alias may be from different PMU - * with common prefix - */ if (pmu_is_uncore(name) && - !strncmp(pname, name, strlen(pname))) + pmu_uncore_alias_match(pname, name)) goto new_alias; if (strcmp(pname, name)) From 57cc732479bac2a3cbd759fb07188657c871d5c1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 28 Jun 2019 22:35:50 +0800 Subject: [PATCH 10/17] perf jevents: Add support for Hisi hip08 DDRC PMU aliasing Add support for Hisi hip08 DDRC PMU aliasing. We can now do something like this: $perf list [snip] uncore ddrc: uncore_hisi_ddrc.act_cmd [DDRC active commands. Unit: hisi_sccl,ddrc] uncore_hisi_ddrc.flux_rcmd [DDRC read commands. Unit: hisi_sccl,ddrc] uncore_hisi_ddrc.flux_wcmd [DDRC write commands. Unit: hisi_sccl,ddrc] uncore_hisi_ddrc.flux_wr [DDRC precharge commands. Unit: hisi_sccl,ddrc] uncore_hisi_ddrc.rnk_chg [DDRC rank commands. Unit: hisi_sccl,ddrc] uncore_hisi_ddrc.rw_chg [DDRC read and write changes. Unit: hisi_sccl,ddrc] Performance counter stats for 'system wide': 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl1_ddrc0] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl3_ddrc1] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl5_ddrc2] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl7_ddrc3] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl5_ddrc0] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl7_ddrc1] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl1_ddrc3] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl1_ddrc1] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl3_ddrc2] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl5_ddrc3] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl3_ddrc0] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl5_ddrc1] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl7_ddrc2] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl7_ddrc0] 20,421 uncore_hisi_ddrc.flux_rcmd [hisi_sccl1_ddrc2] 0 uncore_hisi_ddrc.flux_rcmd [hisi_sccl3_ddrc3] 1.001559011 seconds time elapsed The kernel driver is in drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ben Hutchings Cc: Hendrik Brueckner Cc: Kan Liang Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Thomas Richter Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1561732552-143038-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/hisilicon/hip08/uncore-ddrc.json | 44 +++++++++++++++++++ tools/perf/pmu-events/jevents.c | 1 + 2 files changed, 45 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json new file mode 100644 index 000000000000..0d1556fcdffe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json @@ -0,0 +1,44 @@ +[ + { + "EventCode": "0x02", + "EventName": "uncore_hisi_ddrc.flux_wcmd", + "BriefDescription": "DDRC write commands", + "PublicDescription": "DDRC write commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x03", + "EventName": "uncore_hisi_ddrc.flux_rcmd", + "BriefDescription": "DDRC read commands", + "PublicDescription": "DDRC read commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x04", + "EventName": "uncore_hisi_ddrc.flux_wr", + "BriefDescription": "DDRC precharge commands", + "PublicDescription": "DDRC precharge commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x05", + "EventName": "uncore_hisi_ddrc.act_cmd", + "BriefDescription": "DDRC active commands", + "PublicDescription": "DDRC active commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x06", + "EventName": "uncore_hisi_ddrc.rnk_chg", + "BriefDescription": "DDRC rank commands", + "PublicDescription": "DDRC rank commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x07", + "EventName": "uncore_hisi_ddrc.rw_chg", + "BriefDescription": "DDRC read and write changes", + "PublicDescription": "DDRC read and write changes", + "Unit": "hisi_sccl,ddrc", + }, +] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index a1184ea64cc6..d5997741f1d8 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -236,6 +236,7 @@ static struct map { { "CPU-M-CF", "cpum_cf" }, { "CPU-M-SF", "cpum_sf" }, { "UPI LL", "uncore_upi" }, + { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, {} }; From 8f5b703add99473b59b4a38a6b66afbafc29d92e Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 28 Jun 2019 22:35:51 +0800 Subject: [PATCH 11/17] perf jevents: Add support for Hisi hip08 HHA PMU aliasing Add support for Hisi hip08 HHA PMU aliasing. The kernel driver is in drivers/perf/hisilicon/hisi_uncore_hha_pmu.c Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ben Hutchings Cc: Hendrik Brueckner Cc: Kan Liang Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Thomas Richter Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1561732552-143038-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/hisilicon/hip08/uncore-hha.json | 51 +++++++++++++++++++ tools/perf/pmu-events/jevents.c | 1 + 2 files changed, 52 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json new file mode 100644 index 000000000000..447d3064de90 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json @@ -0,0 +1,51 @@ +[ + { + "EventCode": "0x00", + "EventName": "uncore_hisi_hha.rx_ops_num", + "BriefDescription": "The number of all operations received by the HHA", + "PublicDescription": "The number of all operations received by the HHA", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x01", + "EventName": "uncore_hisi_hha.rx_outer", + "BriefDescription": "The number of all operations received by the HHA from another socket", + "PublicDescription": "The number of all operations received by the HHA from another socket", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x02", + "EventName": "uncore_hisi_hha.rx_sccl", + "BriefDescription": "The number of all operations received by the HHA from another SCCL in this socket", + "PublicDescription": "The number of all operations received by the HHA from another SCCL in this socket", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1c", + "EventName": "uncore_hisi_hha.rd_ddr_64b", + "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes", + "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 64bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1d", + "EventName": "uncore_hisi_hha.wr_dr_64b", + "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", + "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1e", + "EventName": "uncore_hisi_hha.rd_ddr_128b", + "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", + "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1f", + "EventName": "uncore_hisi_hha.wr_ddr_128b", + "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", + "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", + "Unit": "hisi_sccl,hha", + }, +] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index d5997741f1d8..3c95affd85a4 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -237,6 +237,7 @@ static struct map { { "CPU-M-SF", "cpum_sf" }, { "UPI LL", "uncore_upi" }, { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, + { "hisi_sccl,hha", "hisi_sccl,hha" }, {} }; From edd93a4076cf18ede423c167de6d6fb8e4211e7b Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 28 Jun 2019 22:35:52 +0800 Subject: [PATCH 12/17] perf jevents: Add support for Hisi hip08 L3C PMU aliasing Add support for Hisi hip08 L3C PMU aliasing. The kernel driver is in drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ben Hutchings Cc: Hendrik Brueckner Cc: Kan Liang Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Thomas Richter Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: http://lkml.kernel.org/r/1561732552-143038-5-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/hisilicon/hip08/uncore-l3c.json | 37 +++++++++++++++++++ tools/perf/pmu-events/jevents.c | 1 + 2 files changed, 38 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json new file mode 100644 index 000000000000..ca48747642e1 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json @@ -0,0 +1,37 @@ +[ + { + "EventCode": "0x00", + "EventName": "uncore_hisi_l3c.rd_cpipe", + "BriefDescription": "Total read accesses", + "PublicDescription": "Total read accesses", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x01", + "EventName": "uncore_hisi_l3c.wr_cpipe", + "BriefDescription": "Total write accesses", + "PublicDescription": "Total write accesses", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x02", + "EventName": "uncore_hisi_l3c.rd_hit_cpipe", + "BriefDescription": "Total read hits", + "PublicDescription": "Total read hits", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x03", + "EventName": "uncore_hisi_l3c.wr_hit_cpipe", + "BriefDescription": "Total write hits", + "PublicDescription": "Total write hits", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x04", + "EventName": "uncore_hisi_l3c.victim_num", + "BriefDescription": "l3c precharge commands", + "PublicDescription": "l3c precharge commands", + "Unit": "hisi_sccl,l3c", + }, +] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 3c95affd85a4..287a6f10ca48 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -238,6 +238,7 @@ static struct map { { "UPI LL", "uncore_upi" }, { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, { "hisi_sccl,hha", "hisi_sccl,hha" }, + { "hisi_sccl,l3c", "hisi_sccl,l3c" }, {} }; From 734ac47e23aee12e1c16a4dd52d7c1cb893eaf6c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 28 Jun 2019 15:09:00 -0700 Subject: [PATCH 13/17] perf tools: Fix typos / broken sentences - Fix a typo in the man page - Fix a tip that doesn't make any sense. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190628220900.13741-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/Documentation/tips.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8c4372819e11..987261d158d4 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -89,7 +89,7 @@ OPTIONS - socket: processor socket number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The DWARF debugging info must be provided. - - srcfile: file name of the source file of the same. Requires dwarf + - srcfile: file name of the source file of the samples. Requires dwarf information. - weight: Event specific weight, e.g. memory latency or transaction abort cost. This is the global weight. diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 869965d629ce..825745a645c1 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -38,6 +38,6 @@ To report cacheline events from previous recording: perf c2c report To browse sample contexts use perf report --sample 10 and select in context menu To separate samples by time use perf report --sort time,overhead,sym To set sample time separation other than 100ms with --sort time use --time-quantum -Add -I to perf report to sample register values visible in perf report context. +Add -I to perf record to sample register values, which will be visible in perf report sample context. To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context To show context switches in perf report sample context add --switch-events to perf record. From 4df79ba3eb1b82e2939fb984b36a0e71bbed611b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 28 Jun 2019 15:07:35 -0700 Subject: [PATCH 14/17] perf vendor events intel: Metric fixes for SKX/CLX - Add a missing filter for the DRAM_Latency / DRAM_Parallel_Reads metrics - Remove the useless PMM_* metrics from Skylake Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190628220737.13259-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/cascadelakex/clx-metrics.json | 4 ++-- .../arch/x86/skylakex/skx-metrics.json | 22 ++----------------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 1a1a3501180a..a382b115633d 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -314,13 +314,13 @@ "MetricName": "DRAM_BW_Use" }, { - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", "MetricGroup": "Memory_Lat", "MetricName": "DRAM_Read_Latency" }, { - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", + "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@", "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", "MetricGroup": "Memory_BW", "MetricName": "DRAM_Parallel_Reads" diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 56e03ba771f4..35b255fa6a79 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -314,35 +314,17 @@ "MetricName": "DRAM_BW_Use" }, { - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", "MetricGroup": "Memory_Lat", "MetricName": "DRAM_Read_Latency" }, { - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", + "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@", "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", "MetricGroup": "Memory_BW", "MetricName": "DRAM_Parallel_Reads" }, - { - "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0", - "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", - "MetricGroup": "Memory_Lat", - "MetricName": "MEM_PMM_Read_Latency" - }, - { - "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", - "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", - "MetricGroup": "Memory_BW", - "MetricName": "PMM_Read_BW" - }, - { - "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", - "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", - "MetricGroup": "Memory_BW", - "MetricName": "PMM_Write_BW" - }, { "MetricExpr": "cha_0@event\\=0x0@", "BriefDescription": "Socket actual clocks when any core is active on that socket", From 9c344d15f5783260f57c711f3fce72dd744bebe2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 28 Jun 2019 15:07:36 -0700 Subject: [PATCH 15/17] perf list: Avoid extra : for --raw metrics When printing the metrics raw, don't print : after the metricgroups. This helps the command line completion to complete those too. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190628220737.13259-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index bc25995255ab..7d36435fa84c 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -375,7 +375,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, struct mep *me = container_of(node, struct mep, nd); if (metricgroups) - printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n"); + printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n"); if (metrics) metricgroup__print_strlist(me->metrics, raw); next = rb_next(node); From 488c3bf7ece89e47887607863207021283e37828 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 28 Jun 2019 15:07:37 -0700 Subject: [PATCH 16/17] perf tools metric: Don't include duration_time in group The Memory_BW metric generates groups including duration_time, which maps to a software event. For some reason this makes the group always not count. Always put duration_time outside a group when generating metrics. It's always the same time, so no need to group it. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190628220737.13259-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 7d36435fa84c..d8164574cb16 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -409,6 +409,7 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, const char **ids; int idnum; struct egroup *eg; + bool no_group = false; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); @@ -419,11 +420,25 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, strbuf_addf(events, ","); for (j = 0; j < idnum; j++) { pr_debug("found event %s\n", ids[j]); + /* + * Duration time maps to a software event and can make + * groups not count. Always use it outside a + * group. + */ + if (!strcmp(ids[j], "duration_time")) { + if (j > 0) + strbuf_addf(events, "}:W,"); + strbuf_addf(events, "duration_time"); + no_group = true; + continue; + } strbuf_addf(events, "%s%s", - j == 0 ? "{" : ",", + j == 0 || no_group ? "{" : ",", ids[j]); + no_group = false; } - strbuf_addf(events, "}:W"); + if (!no_group) + strbuf_addf(events, "}:W"); eg = malloc(sizeof(struct egroup)); if (!eg) { From 15a108af1a18b597bfbd7f7b3c7b4823bfbaf8df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 28 Jun 2019 17:16:58 -0300 Subject: [PATCH 17/17] perf script: Allow specifying the files to process guest samples The 'perf kvm' command set up things so that we can record, report, top, etc, but not 'script', so make 'perf script' be able to process samples by allowing to pass guest kallsyms, vmlinux, modules, etc, and if at least one of those is provided, set perf_guest to true so that guest samples get properly resolved. Testing it: # perf kvm --guest --guestkallsyms /wb/rhel6.kallsyms --guestmodules /wb/rhel6.modules record -e cycles:Gk ^C[ perf record: Woken up 7 times to write data ] [ perf record: Captured and wrote 3.602 MB perf.data.guest (10492 samples) ] # # perf evlist -i perf.data.guest cycles:Gk # perf evlist -v -i perf.data.guest cycles:Gk: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, read_format: ID, disabled: 1, inherit: 1, exclude_user: 1, exclude_hv: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_host: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1 # # perf kvm --guestkallsyms /wb/rhel6.kallsyms --guestmodules /wb/rhel6.modules report --stdio -s sym | head -30 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 10K of event 'cycles:Gk' # Event count (approx.): 2434201408 # # Overhead Symbol # ........ .............................................. # 11.93% [g] avtab_search_node 3.95% [g] sidtab_context_to_sid 2.41% [g] n_tty_write 2.20% [g] _spin_unlock_irqrestore 1.37% [g] _aesni_dec4 1.33% [g] kmem_cache_alloc 1.07% [g] native_write_cr0 0.99% [g] kfree 0.95% [g] _spin_lock 0.91% [g] __memset 0.87% [g] schedule 0.83% [g] _spin_lock_irqsave 0.76% [g] __kmalloc 0.67% [g] avc_has_perm_noaudit 0.66% [g] kmem_cache_free 0.65% [g] glue_xts_crypt_128bit 0.59% [g] __d_lookup 0.59% [g] __audit_syscall_exit 0.56% [g] __memcpy # Then, when trying to use perf script to generate a python script and then process the events after adding a python hook for non-tracepoint events: # perf script -i perf.data.guest -g python generated Python script: perf-script.py # vim perf-script.py # tail -2 perf-script.py def process_event(param_dict): print(param_dict["symbol"]) # # perf script -i perf.data.guest -s perf-script.py | head in trace_begin vmx_vmexit vmx_vmexit vmx_vmexit vmx_vmexit vmx_vmexit vmx_vmexit vmx_vmexit vmx_vmexit vmx_vmexit 231 # We'd see just the vmx_vmexit, i.e. the samples from the guest don't show up. After this patch: # perf script --guestkallsyms /wb/rhel6.kallsyms --guestmodules /wb/rhel6.modules -i perf.data.guest -s perf-script.py 2> /dev/null | head -30 in trace_begin apic_timer_interrupt apic_timer_interrupt apic_timer_interrupt apic_timer_interrupt apic_timer_interrupt save_args do_timer drain_array inode_permission avc_has_perm_noaudit run_timer_softirq apic_timer_interrupt apic_timer_interrupt apic_timer_interrupt apic_timer_interrupt apic_timer_interrupt kvm_guest_apic_eoi_write run_posix_cpu_timers _spin_lock handle_pte_fault rcu_irq_enter delay_tsc delay_tsc native_read_tsc apic_timer_interrupt sys_open internal_add_timer list_del rcu_exit_nohz # Jiri Olsa noticed we need to set 'perf_guest' to true if we want to process guest samples and I made it be set if one of the guest files settings get set via the command line options added in this patch, that match those present in the 'perf kvm' command. We probably want to have 'perf record', 'perf report' etc to notice that there are guest samples and do the right thing, which is to look for files with some suffix that make it be associated with the guest used to collect the samples, i.e. if a vmlinux file is passed, we can get the build-id from it, if not some other identifier or simply looking for "kallsyms.guest", for instance, in the current directory. Reported-by: Mariano Pache Tested-by: Mariano Pache Cc: Adrian Hunter Cc: Alexander Yarygin Cc: Ali Raza Cc: Christian Borntraeger Cc: Jiri Olsa Cc: Joe Mario Cc: Larry Woodman Cc: Namhyung Kim Cc: Orran Krieger Cc: Ramkumar Ramachandra Cc: Yunlong Song Link: https://lkml.kernel.org/n/tip-d54gj64rerlxcqsrod05biwn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 520e5b6b9ef9..2f6232f1bfdc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3522,6 +3522,15 @@ int cmd_script(int argc, const char **argv) "Time span of interest (start,stop)"), OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, "Show inline function"), + OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", + "guest mount directory under which every guest os" + " instance has a subdir"), + OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name, + "file", "file saving guest os vmlinux"), + OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms, + "file", "file saving guest os /proc/kallsyms"), + OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, + "file", "file saving guest os /proc/modules"), OPT_END() }; const char * const script_subcommands[] = { "record", "report", NULL }; @@ -3541,6 +3550,16 @@ int cmd_script(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (symbol_conf.guestmount || + symbol_conf.default_guest_vmlinux_name || + symbol_conf.default_guest_kallsyms || + symbol_conf.default_guest_modules) { + /* + * Enable guest sample processing. + */ + perf_guest = true; + } + data.path = input_name; data.force = symbol_conf.force;