From 7efbcc8c075c5e9ef69b2379b75b58a699f23eb3 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Sat, 11 Sep 2021 10:08:53 +0530 Subject: [PATCH 1/5] perf annotate: Fix fused instr logic for assembly functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some x86 microarchitectures fuse a subset of cmp/test/ALU instructions with branch instructions, and thus perf annotate highlight such valid pairs as fused. When annotated with source, perf uses struct disasm_line to contain either source or instruction line from objdump output. Usually, a C statement generates multiple instructions which include such cmp/test/ALU + branch instruction pairs. But in case of assembly function, each individual assembly source line generate one instruction. The 'perf annotate' instruction fusion logic assumes the previous disasm_line as the previous instruction line, which is wrong because, for assembly function, previous disasm_line contains source line. And thus perf fails to highlight valid fused instruction pairs for assembly functions. Fix it by searching backward until we find an instruction line and consider that disasm_line as fused with current branch instruction. Before: │ cmpq %rcx, RIP+8(%rsp) 0.00 │ cmp %rcx,0x88(%rsp) │ je .Lerror_bad_iret <--- Source line 0.14 │ ┌──je b4 <--- Instruction line │ │movl %ecx, %eax After: │ cmpq %rcx, RIP+8(%rsp) 0.00 │ ┌──cmp %rcx,0x88(%rsp) │ │je .Lerror_bad_iret 0.14 │ ├──je b4 │ │movl %ecx, %eax Reviewed-by: Jin Yao Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Namhyung Kim Link: https //lore.kernel.org/r/20210911043854.8373-1-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 33 ++++++++++++++++++++++--------- tools/perf/ui/browser.h | 2 +- tools/perf/ui/browsers/annotate.c | 24 +++++++++++++++------- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 781afe42e90e..fa5bd5c20e96 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -757,25 +757,40 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, } void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, - unsigned int row, bool arrow_down) + unsigned int row, int diff, bool arrow_down) { - unsigned int end_row; + int end_row; - if (row >= browser->top_idx) - end_row = row - browser->top_idx; - else + if (diff <= 0) return; SLsmg_set_char_set(1); if (arrow_down) { + if (row + diff <= browser->top_idx) + return; + + end_row = row + diff - browser->top_idx; ui_browser__gotorc(browser, end_row, column - 1); - SLsmg_write_char(SLSMG_ULCORN_CHAR); - ui_browser__gotorc(browser, end_row, column); - SLsmg_draw_hline(2); - ui_browser__gotorc(browser, end_row + 1, column - 1); SLsmg_write_char(SLSMG_LTEE_CHAR); + + while (--end_row >= 0 && end_row > (int)(row - browser->top_idx)) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_draw_vline(1); + } + + end_row = (int)(row - browser->top_idx); + if (end_row >= 0) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_write_char(SLSMG_ULCORN_CHAR); + ui_browser__gotorc(browser, end_row, column); + SLsmg_draw_hline(2); + } } else { + if (row < browser->top_idx) + return; + + end_row = row - browser->top_idx; ui_browser__gotorc(browser, end_row, column - 1); SLsmg_write_char(SLSMG_LTEE_CHAR); ui_browser__gotorc(browser, end_row, column); diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 3678eb88f119..510ce4554050 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -51,7 +51,7 @@ void ui_browser__write_graph(struct ui_browser *browser, int graph); void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, u64 start, u64 end); void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, - unsigned int row, bool arrow_down); + unsigned int row, int diff, bool arrow_down); void __ui_browser__show_title(struct ui_browser *browser, const char *title); void ui_browser__show_title(struct ui_browser *browser, const char *title); int ui_browser__show(struct ui_browser *browser, const char *title, diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ef4da4295bf7..e81c2493efdf 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -125,13 +125,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ab->selection = al; } -static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) +static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor) { struct disasm_line *pos = list_prev_entry(cursor, al.node); const char *name; + int diff = 1; + + while (pos && pos->al.offset == -1) { + pos = list_prev_entry(pos, al.node); + if (!ab->opts->hide_src_code) + diff++; + } if (!pos) - return false; + return 0; if (ins__is_lock(&pos->ins)) name = pos->ops.locked.ins.name; @@ -139,9 +146,11 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) name = pos->ins.name; if (!name || !cursor->ins.name) - return false; + return 0; - return ins__is_fused(ab->arch, name, cursor->ins.name); + if (ins__is_fused(ab->arch, name, cursor->ins.name)) + return diff; + return 0; } static void annotate_browser__draw_current_jump(struct ui_browser *browser) @@ -155,6 +164,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) struct annotation *notes = symbol__annotation(sym); u8 pcnt_width = annotation__pcnt_width(notes); int width; + int diff = 0; /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) @@ -205,11 +215,11 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) pcnt_width + 2 + notes->widths.addr + width, from, to); - if (is_fused(ab, cursor)) { + diff = is_fused(ab, cursor); + if (diff > 0) { ui_browser__mark_fused(browser, pcnt_width + 3 + notes->widths.addr + width, - from - 1, - to > from); + from - diff, diff, to > from); } } From ff6f41fbcee9830f88413cbb08dc45e543243b55 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sat, 11 Sep 2021 16:30:53 +0300 Subject: [PATCH 2/5] perf script: Fix ip display when type != attr->type set_print_ip_opts() was not being called when type != attr->type because there is not a one-to-one relationship between output types and attr->type. That resulted in ip not printing. The attr_type() function is removed, and the match of attr->type to output type is corrected. Example on ADL using taskset to select an atom cpu: # perf record -e cpu_atom/cpu-cycles/ taskset 0x1000 uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.003 MB perf.data (7 samples) ] Before: # perf script | head taskset 428 [-01] 10394.179041: 1 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179043: 1 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179044: 11 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179045: 407 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179046: 16789 cpu_atom/cpu-cycles/: taskset 428 [-01] 10394.179052: 676300 cpu_atom/cpu-cycles/: uname 428 [-01] 10394.179278: 4079859 cpu_atom/cpu-cycles/: After: # perf script | head taskset 428 10394.179041: 1 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179043: 1 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179044: 11 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179045: 407 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179046: 16789 cpu_atom/cpu-cycles/: ffffffff95a0bb97 __intel_pmu_enable_all.constprop.48+0x47 ([kernel.kallsyms]) taskset 428 10394.179052: 676300 cpu_atom/cpu-cycles/: 7f829ef73800 cfree+0x0 (/lib/libc-2.32.so) uname 428 10394.179278: 4079859 cpu_atom/cpu-cycles/: ffffffff95bae912 vma_interval_tree_remove+0x1f2 ([kernel.kallsyms]) Signed-off-by: Adrian Hunter Reviewed-by: Kan Liang Cc: Jin Yao Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210911133053.15682-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0e824f7d8b19..6211d0b84b7a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -368,16 +368,6 @@ static inline int output_type(unsigned int type) return OUTPUT_TYPE_OTHER; } -static inline unsigned int attr_type(unsigned int type) -{ - switch (type) { - case OUTPUT_TYPE_SYNTH: - return PERF_TYPE_SYNTH; - default: - return type; - } -} - static bool output_set_by_user(void) { int j; @@ -556,6 +546,18 @@ static void set_print_ip_opts(struct perf_event_attr *attr) output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE; } +static struct evsel *find_first_output_type(struct evlist *evlist, + unsigned int type) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (output_type(evsel->core.attr.type) == (int)type) + return evsel; + } + return NULL; +} + /* * verify all user requested events exist and the samples * have the expected data @@ -567,7 +569,7 @@ static int perf_session__check_output_opt(struct perf_session *session) struct evsel *evsel; for (j = 0; j < OUTPUT_TYPE_MAX; ++j) { - evsel = perf_session__find_first_evtype(session, attr_type(j)); + evsel = find_first_output_type(session->evlist, j); /* * even if fields is set to 0 (ie., show nothing) event must From 57f0ff059e3daa4e70a811cb1d31a49968262d20 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Mon, 19 Jul 2021 16:53:32 +0200 Subject: [PATCH 3/5] perf machine: Initialize srcline string member in add_location struct It's later supposed to be either a correct address or NULL. Without the initialization, it may contain an undefined value which results in the following segmentation fault: # perf top --sort comm -g --ignore-callees=do_idle terminates with: #0 0x00007ffff56b7685 in __strlen_avx2 () from /lib64/libc.so.6 #1 0x00007ffff55e3802 in strdup () from /lib64/libc.so.6 #2 0x00005555558cb139 in hist_entry__init (callchain_size=, sample_self=true, template=0x7fffde7fb110, he=0x7fffd801c250) at util/hist.c:489 #3 hist_entry__new (template=template@entry=0x7fffde7fb110, sample_self=sample_self@entry=true) at util/hist.c:564 #4 0x00005555558cb4ba in hists__findnew_entry (hists=hists@entry=0x5555561d9e38, entry=entry@entry=0x7fffde7fb110, al=al@entry=0x7fffde7fb420, sample_self=sample_self@entry=true) at util/hist.c:657 #5 0x00005555558cba1b in __hists__add_entry (hists=hists@entry=0x5555561d9e38, al=0x7fffde7fb420, sym_parent=, bi=bi@entry=0x0, mi=mi@entry=0x0, sample=sample@entry=0x7fffde7fb4b0, sample_self=true, ops=0x0, block_info=0x0) at util/hist.c:288 #6 0x00005555558cbb70 in hists__add_entry (sample_self=true, sample=0x7fffde7fb4b0, mi=0x0, bi=0x0, sym_parent=, al=, hists=0x5555561d9e38) at util/hist.c:1056 #7 iter_add_single_cumulative_entry (iter=0x7fffde7fb460, al=) at util/hist.c:1056 #8 0x00005555558cc8a4 in hist_entry_iter__add (iter=iter@entry=0x7fffde7fb460, al=al@entry=0x7fffde7fb420, max_stack_depth=, arg=arg@entry=0x7fffffff7db0) at util/hist.c:1231 #9 0x00005555557cdc9a in perf_event__process_sample (machine=, sample=0x7fffde7fb4b0, evsel=, event=, tool=0x7fffffff7db0) at builtin-top.c:842 #10 deliver_event (qe=, qevent=) at builtin-top.c:1202 #11 0x00005555558a9318 in do_flush (show_progress=false, oe=0x7fffffff80e0) at util/ordered-events.c:244 #12 __ordered_events__flush (oe=oe@entry=0x7fffffff80e0, how=how@entry=OE_FLUSH__TOP, timestamp=timestamp@entry=0) at util/ordered-events.c:323 #13 0x00005555558a9789 in __ordered_events__flush (timestamp=, how=, oe=) at util/ordered-events.c:339 #14 ordered_events__flush (how=OE_FLUSH__TOP, oe=0x7fffffff80e0) at util/ordered-events.c:341 #15 ordered_events__flush (oe=oe@entry=0x7fffffff80e0, how=how@entry=OE_FLUSH__TOP) at util/ordered-events.c:339 #16 0x00005555557cd631 in process_thread (arg=0x7fffffff7db0) at builtin-top.c:1114 #17 0x00007ffff7bb817a in start_thread () from /lib64/libpthread.so.0 #18 0x00007ffff5656dc3 in clone () from /lib64/libc.so.6 If you look at the frame #2, the code is: 488 if (he->srcline) { 489 he->srcline = strdup(he->srcline); 490 if (he->srcline == NULL) 491 goto err_rawdata; 492 } If he->srcline is not NULL (it is not NULL if it is uninitialized rubbish), it gets strdupped and strdupping a rubbish random string causes the problem. Also, if you look at the commit 1fb7d06a509e, it adds the srcline property into the struct, but not initializing it everywhere needed. Committer notes: Now I see, when using --ignore-callees=do_idle we end up here at line 2189 in add_callchain_ip(): 2181 if (al.sym != NULL) { 2182 if (perf_hpp_list.parent && !*parent && 2183 symbol__match_regex(al.sym, &parent_regex)) 2184 *parent = al.sym; 2185 else if (have_ignore_callees && root_al && 2186 symbol__match_regex(al.sym, &ignore_callees_regex)) { 2187 /* Treat this symbol as the root, 2188 forgetting its callees. */ 2189 *root_al = al; 2190 callchain_cursor_reset(cursor); 2191 } 2192 } And the al that doesn't have the ->srcline field initialized will be copied to the root_al, so then, back to: 1211 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, 1212 int max_stack_depth, void *arg) 1213 { 1214 int err, err2; 1215 struct map *alm = NULL; 1216 1217 if (al) 1218 alm = map__get(al->map); 1219 1220 err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, 1221 iter->evsel, al, max_stack_depth); 1222 if (err) { 1223 map__put(alm); 1224 return err; 1225 } 1226 1227 err = iter->ops->prepare_entry(iter, al); 1228 if (err) 1229 goto out; 1230 1231 err = iter->ops->add_single_entry(iter, al); 1232 if (err) 1233 goto out; 1234 That al at line 1221 is what hist_entry_iter__add() (called from sample__resolve_callchain()) saw as 'root_al', and then: iter->ops->add_single_entry(iter, al); will go on with al->srcline with a bogus value, I'll add the above sequence to the cset and apply, thanks! Signed-off-by: Michael Petlan CC: Milian Wolff Cc: Jiri Olsa Fixes: 1fb7d06a509e ("perf report Use srcline from callchain for hist entries") Link: https //lore.kernel.org/r/20210719145332.29747-1-mpetlan@redhat.com Reported-by: Juri Lelli Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da19be7da284..44e40bad0e33 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2149,6 +2149,7 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; + al.srcline = NULL; if (!cpumode) { thread__find_cpumode_addr_location(thread, ip, &al); } else { From aba5daeb645181ee5a046bc00c231fd045882aaa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 17 Sep 2021 22:44:40 -0700 Subject: [PATCH 4/5] libperf evsel: Make use of FD robust. FD uses xyarray__entry that may return NULL if an index is out of bounds. If NULL is returned then a segv happens as FD unconditionally dereferences the pointer. This was happening in a case of with perf iostat as shown below. The fix is to make FD an "int*" rather than an int and handle the NULL case as either invalid input or a closed fd. $ sudo gdb --args perf stat --iostat list ... Breakpoint 1, perf_evsel__alloc_fd (evsel=0x5555560951a0, ncpus=1, nthreads=1) at evsel.c:50 50 { (gdb) bt #0 perf_evsel__alloc_fd (evsel=0x5555560951a0, ncpus=1, nthreads=1) at evsel.c:50 #1 0x000055555585c188 in evsel__open_cpu (evsel=0x5555560951a0, cpus=0x555556093410, threads=0x555556086fb0, start_cpu=0, end_cpu=1) at util/evsel.c:1792 #2 0x000055555585cfb2 in evsel__open (evsel=0x5555560951a0, cpus=0x0, threads=0x555556086fb0) at util/evsel.c:2045 #3 0x000055555585d0db in evsel__open_per_thread (evsel=0x5555560951a0, threads=0x555556086fb0) at util/evsel.c:2065 #4 0x00005555558ece64 in create_perf_stat_counter (evsel=0x5555560951a0, config=0x555555c34700 , target=0x555555c2f1c0 , cpu=0) at util/stat.c:590 #5 0x000055555578e927 in __run_perf_stat (argc=1, argv=0x7fffffffe4a0, run_idx=0) at builtin-stat.c:833 #6 0x000055555578f3c6 in run_perf_stat (argc=1, argv=0x7fffffffe4a0, run_idx=0) at builtin-stat.c:1048 #7 0x0000555555792ee5 in cmd_stat (argc=1, argv=0x7fffffffe4a0) at builtin-stat.c:2534 #8 0x0000555555835ed3 in run_builtin (p=0x555555c3f540 , argc=3, argv=0x7fffffffe4a0) at perf.c:313 #9 0x0000555555836154 in handle_internal_command (argc=3, argv=0x7fffffffe4a0) at perf.c:365 #10 0x000055555583629f in run_argv (argcp=0x7fffffffe2ec, argv=0x7fffffffe2e0) at perf.c:409 #11 0x0000555555836692 in main (argc=3, argv=0x7fffffffe4a0) at perf.c:539 ... (gdb) c Continuing. Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (uncore_iio_0/event=0x83,umask=0x04,ch_mask=0xF,fc_mask=0x07/). /bin/dmesg | grep -i perf may provide additional information. Program received signal SIGSEGV, Segmentation fault. 0x00005555559b03ea in perf_evsel__close_fd_cpu (evsel=0x5555560951a0, cpu=1) at evsel.c:166 166 if (FD(evsel, cpu, thread) >= 0) v3. fixes a bug in perf_evsel__run_ioctl where the sense of a branch was backward. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210918054440.2350466-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 64 +++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index d8886720e83d..8441e3e1aaac 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,7 +43,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) +#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) #define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -54,7 +54,10 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) int cpu, thread; for (cpu = 0; cpu < ncpus; cpu++) { for (thread = 0; thread < nthreads; thread++) { - FD(evsel, cpu, thread) = -1; + int *fd = FD(evsel, cpu, thread); + + if (fd) + *fd = -1; } } } @@ -80,7 +83,7 @@ sys_perf_event_open(struct perf_event_attr *attr, static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; - int fd; + int *fd; if (evsel == leader) { *group_fd = -1; @@ -95,10 +98,10 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou return -ENOTCONN; fd = FD(leader, cpu, thread); - if (fd == -1) + if (fd == NULL || *fd == -1) return -EBADF; - *group_fd = fd; + *group_fd = *fd; return 0; } @@ -138,7 +141,11 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < threads->nr; thread++) { - int fd, group_fd; + int fd, group_fd, *evsel_fd; + + evsel_fd = FD(evsel, cpu, thread); + if (evsel_fd == NULL) + return -EINVAL; err = get_group_fd(evsel, cpu, thread, &group_fd); if (err < 0) @@ -151,7 +158,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, if (fd < 0) return -errno; - FD(evsel, cpu, thread) = fd; + *evsel_fd = fd; } } @@ -163,9 +170,12 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; + int *fd = FD(evsel, cpu, thread); + + if (fd && *fd >= 0) { + close(*fd); + *fd = -1; + } } } @@ -209,13 +219,12 @@ void perf_evsel__munmap(struct perf_evsel *evsel) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread); - struct perf_mmap *map = MMAP(evsel, cpu, thread); + int *fd = FD(evsel, cpu, thread); - if (fd < 0) + if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(map); + perf_mmap__munmap(MMAP(evsel, cpu, thread)); } } @@ -239,15 +248,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread); - struct perf_mmap *map = MMAP(evsel, cpu, thread); + int *fd = FD(evsel, cpu, thread); + struct perf_mmap *map; - if (fd < 0) + if (fd == NULL || *fd < 0) continue; + map = MMAP(evsel, cpu, thread); perf_mmap__init(map, NULL, false, NULL); - ret = perf_mmap__mmap(map, &mp, fd, cpu); + ret = perf_mmap__mmap(map, &mp, *fd, cpu); if (ret) { perf_evsel__munmap(evsel); return ret; @@ -260,7 +270,9 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) { - if (FD(evsel, cpu, thread) < 0 || MMAP(evsel, cpu, thread) == NULL) + int *fd = FD(evsel, cpu, thread); + + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) return NULL; return MMAP(evsel, cpu, thread)->base; @@ -295,17 +307,18 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); + int *fd = FD(evsel, cpu, thread); memset(count, 0, sizeof(*count)); - if (FD(evsel, cpu, thread) < 0) + if (fd == NULL || *fd < 0) return -EINVAL; if (MMAP(evsel, cpu, thread) && !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) return 0; - if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) + if (readn(*fd, count->values, size) <= 0) return -errno; return 0; @@ -318,8 +331,13 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread), - err = ioctl(fd, ioc, arg); + int err; + int *fd = FD(evsel, cpu, thread); + + if (fd == NULL || *fd < 0) + return -1; + + err = ioctl(*fd, ioc, arg); if (err) return err; From 219d720e6df71c2607d7120d6b9281614863e5b1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 14 Sep 2021 10:00:04 -0700 Subject: [PATCH 5/5] perf bpf: Ignore deprecation warning when using libbpf's btf__get_from_id() Perf code re-implements libbpf's btf__load_from_kernel_by_id() API as a weak function, presumably to dynamically link against old version of libbpf shared library. Unfortunately this causes compilation warning when perf is compiled against libbpf v0.6+. For now, just ignore deprecation warning, but there might be a better solution, depending on perf's needs. Signed-off-by: Andrii Nakryiko Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: kernel-team@fb.com LPU-Reference: 20210914170004.4185659-1-andrii@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 683f6d63560e..1a7112a87736 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -24,7 +24,10 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) { struct btf *btf; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" int err = btf__get_from_id(id, &btf); +#pragma GCC diagnostic pop return err ? ERR_PTR(err) : btf; }