From 5839a5506de30c23d4cfaf49755cd294f5c48368 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 21 Aug 2015 22:05:58 +0300 Subject: [PATCH 01/22] perf tools: Fix tarball build broken by pt/bts Fix some include paths and add missing inat_types.h. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/55D77696.60102@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/inat.c | 2 +- tools/perf/util/intel-pt-decoder/inat.h | 2 +- tools/perf/util/intel-pt-decoder/inat_types.h | 29 +++++++++++++++++++ tools/perf/util/intel-pt-decoder/insn.c | 4 +-- tools/perf/util/intel-pt-decoder/insn.h | 2 +- .../intel-pt-decoder/intel-pt-insn-decoder.c | 2 +- 6 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 tools/perf/util/intel-pt-decoder/inat_types.h diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c index feeaa509dfe4..906d94aa0a24 100644 --- a/tools/perf/util/intel-pt-decoder/inat.c +++ b/tools/perf/util/intel-pt-decoder/inat.c @@ -18,7 +18,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ -#include +#include "insn.h" /* Attribute tables are generated from opcode map */ #include "inat-tables.c" diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h index 74a2e312e8a2..611645e903a8 100644 --- a/tools/perf/util/intel-pt-decoder/inat.h +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -20,7 +20,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ -#include +#include "inat_types.h" /* * Internal bits. Don't use bitmasks directly, because these bits are diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c index 8f72b334aea0..47314a64399c 100644 --- a/tools/perf/util/intel-pt-decoder/insn.c +++ b/tools/perf/util/intel-pt-decoder/insn.c @@ -23,8 +23,8 @@ #else #include #endif -#include -#include +#include "inat.h" +#include "insn.h" /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h index e7814b74caf8..dd12da0f4593 100644 --- a/tools/perf/util/intel-pt-decoder/insn.h +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -21,7 +21,7 @@ */ /* insn_attr_t is defined in inat.h */ -#include +#include "inat.h" struct insn_field { union { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 46980fc663ac..9e4eb8fcd559 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -20,7 +20,7 @@ #include "event.h" -#include +#include "insn.h" #include "inat.c" #include "insn.c" From c0b4dffbc529244d3e4e3bd392f2bffa2d8531a7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Aug 2015 13:33:14 -0300 Subject: [PATCH 02/22] perf annotate: Reset the dso find_symbol cache when removing symbols The 'annotate' tool does some filtering in the entries in a DSO but forgot to reset the cache done in dso__find_symbol(), cauxing a SEGV: [root@zoo ~]# perf annotate netlink_poll perf: Segmentation fault -------- backtrace -------- perf[0x526ceb] /lib64/libc.so.6(+0x34960)[0x7faedfbe0960] perf(rb_erase+0x223)[0x499d63] perf[0x4213e9] perf[0x4bc123] perf[0x4bc621] perf[0x4bf26b] perf[0x4bc855] perf(perf_session__process_events+0x340)[0x4bddc0] perf(cmd_annotate+0x6bb)[0x421b5b] perf[0x479063] perf(main+0x60a)[0x42098a] /lib64/libc.so.6(__libc_start_main+0xf0)[0x7faedfbcbfe0] perf[0x420aa9] [0x0] [root@zoo ~]# Fix it by reseting the find cache when removing symbols. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Fixes: b685ac22b436 ("perf symbols: Add front end cache for DSO symbol lookup") Link: http://lkml.kernel.org/n/tip-b2y9x46y0t8yem1ive41zqyp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 1 + tools/perf/util/dso.h | 2 ++ tools/perf/util/symbol.c | 10 ++++++++++ 3 files changed, 13 insertions(+) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index a32a64ef08e2..8edc205ff9a7 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -67,6 +67,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, rb_erase(&al->sym->rb_node, &al->map->dso->symbols[al->map->type]); symbol__delete(al->sym); + dso__reset_find_symbol_cache(al->map->dso); } return 0; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index c73276db6d6f..fc8db9c764ac 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -324,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name); struct dso *dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); +void dso__reset_find_symbol_cache(struct dso *dso); + size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); size_t __dsos__fprintf(struct list_head *head, FILE *fp); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 42e98ab5a9bb..46ae0532a8a6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -441,6 +441,16 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, return &s->sym; } +void dso__reset_find_symbol_cache(struct dso *dso) +{ + enum map_type type; + + for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) { + dso->last_find_result[type].addr = 0; + dso->last_find_result[type].symbol = NULL; + } +} + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { From 1e259ad4a25e37eb298bb5772b9a51e70bed7782 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Aug 2015 16:18:26 -0300 Subject: [PATCH 03/22] perf ui tui progress: Implement the ui_progress_ops->finish() method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that we can erase the progress bar after we're done with it, avoiding things like: ------------------------------------------------------------------- ┌─Error:──────────────────────────────────────────────────────┐ │Can't annotate unmapped_area_topdown: │ │ │ │No vmlinux file with build id a826726b5ddacfab1f0bade868f1a79│ │was found in the path. │ │ │ │Note that annotation using /proc/kcore requires CAP_SYS_RAWIO│ ┌Processin│ │──┐ │ │Please use: │ │ └─────────│ │──┘ │ perf buildid-cache -vu vmlinux │ │ │ │or: │ │ │ │ --vmlinux vmlinux │ │ │ │ │ │Press any key... │ └─────────────────────────────────────────────────────────────┘ Can't annotate unmapped_area_topdown: ------------------------------------------------------------------- I.e. that finished progress bar behind the error window. It is not a problem when we end up redrawing the whole screen, but its ugly when we present such error windows, provide a TUI method so that code like the above may avoid this situation, as will be done with the annotation code in the next cset. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-qvktnojzwwe37pweging058t@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/tui/progress.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index c61d14b101e0..c4b99008e2c9 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -33,9 +33,26 @@ static void tui_progress__update(struct ui_progress *p) pthread_mutex_unlock(&ui__lock); } +static void tui_progress__finish(void) +{ + int y; + + if (use_browser <= 0) + return; + + ui__refresh_dimensions(false); + pthread_mutex_lock(&ui__lock); + y = SLtt_Screen_Rows / 2 - 2; + SLsmg_set_color(0); + SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' '); + SLsmg_refresh(); + pthread_mutex_unlock(&ui__lock); +} + static struct ui_progress_ops tui_progress__ops = { - .update = tui_progress__update, + .update = tui_progress__update, + .finish = tui_progress__finish, }; void tui_progress__init(void) From 5c9ce1e644c1919ac4bff4394a4c372f47a89b4e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Aug 2015 17:16:22 -0300 Subject: [PATCH 04/22] perf ordered_events: Clear the progress bar at the end of a flush We were depending on the next screen operation after a flush() being one that would redraw the whole screen so that the progress bar would be overwritten, when that didn't happen a screen artifact of, say, a error dialog window would be overlaid on top of the progress bar, fix it by calling ui_browser__finish(), that now has a TUI implementation. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-el0fyw6duemnx62lydjzhs8c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ordered-events.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 52be201b9b25..b1b9e2385f4b 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -220,6 +220,9 @@ static int __ordered_events__flush(struct ordered_events *oe) else if (last_ts <= limit) oe->last = list_entry(head->prev, struct ordered_event, list); + if (show_progress) + ui_progress__finish(); + return 0; } From 2a21d03686881331b0af0471588674e7e896eeb2 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:48 +0300 Subject: [PATCH 05/22] perf tools: Fix Intel PT 'instructions' sample period The period on synthesized 'instructions' samples was being set to a fixed value, whereas the correct value is the number of instructions since the last sample, which is a value that the decoder can provide. So do it that way. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-14-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 3 +++ tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 + tools/perf/util/intel-pt.c | 5 ++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f8ac462fec1a..56790ea1e88e 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -108,6 +108,7 @@ struct intel_pt_decoder { uint64_t sign_bits; uint64_t period; enum intel_pt_period_type period_type; + uint64_t tot_insn_cnt; uint64_t period_insn_cnt; uint64_t period_mask; uint64_t period_ticks; @@ -559,6 +560,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip, max_insn_cnt, decoder->data); + decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; @@ -1529,6 +1531,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) decoder->state.timestamp = decoder->timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; + decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; if (err) decoder->state.from_ip = decoder->ip; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 4c4880230cc9..cbf57044c385 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -58,6 +58,7 @@ struct intel_pt_state { uint64_t from_ip; uint64_t to_ip; uint64_t cr3; + uint64_t tot_insn_cnt; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index a5acd2fe2447..3b34a64195ea 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -126,6 +126,7 @@ struct intel_pt_queue { u64 timestamp; u32 flags; u16 insn_len; + u64 last_insn_cnt; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -920,11 +921,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.addr = ptq->state->to_ip; sample.id = ptq->pt->instructions_id; sample.stream_id = ptq->pt->instructions_id; - sample.period = ptq->pt->instructions_sample_period; + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; sample.cpu = ptq->cpu; sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; + ptq->last_insn_cnt = ptq->state->tot_insn_cnt; + if (pt->synth_opts.callchain) { thread_stack__sample(ptq->thread, ptq->chain, pt->synth_opts.callchain_sz, sample.ip); From bc9b6bf07c8b3f4e85509f9b3a552c86e567b4ae Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:52 +0300 Subject: [PATCH 06/22] perf tools: Add Intel PT support for PSB periods The PSB packet is a synchronization packet that provides a starting point for decoding or recovery from errors. This patch adds support for a new Intel PT feature that allows the frequency of PSB packets to be specified. Support for this feature is indicated by /sys/bus/event_source/devices/intel_pt/caps/psb_cyc which contains "1" if the feature is supported and "0" otherwise. The PSB period can be specified as a PMU config term e.g. perf record -e intel_pt/psb_period=2/u sleep 1 The default value is 3 or the nearest lower value that is supported. 0 is always supported. Valid values are given by: /sys/bus/event_source/devices/intel_pt/caps/psb_periods which contains a hexadecimal value, the bits of which represent valid values e.g. bit 2 set means value 2 is valid. The value is converted to the approximate number of trace bytes between PSB packets as: 2 ^ (value + 11) e.g. value 3 means 16KiB bytes between PSBs If an invalid value is entered, the error message will give a list of valid values e.g. $ perf record -e intel_pt/psb_period=15/u uname Invalid psb_period for intel_pt. Valid values are: 0-5 tools/perf/Documentation/intel-pt.txt is updated in a later patch as there are a number of new features being added. For more information about PSB periods refer to the Intel 64 and IA-32 Architectures SDM Chapter 36 Intel Processor Trace from June 2015 or later. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-18-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 217 +++++++++++++++++++++++++++- 1 file changed, 210 insertions(+), 7 deletions(-) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index da7d2c15e611..145975b003a7 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -99,17 +99,121 @@ static int intel_pt_parse_terms(struct list_head *formats, const char *str, return intel_pt_parse_terms_with_default(formats, str, config); } -static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu __maybe_unused, - struct perf_evlist *evlist __maybe_unused) +static u64 intel_pt_masked_bits(u64 mask, u64 bits) { - return 256; + const u64 top_bit = 1ULL << 63; + u64 res = 0; + int i; + + for (i = 0; i < 64; i++) { + if (mask & top_bit) { + res <<= 1; + if (bits & top_bit) + res |= 1; + } + mask <<= 1; + bits <<= 1; + } + + return res; +} + +static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, + struct perf_evlist *evlist, u64 *res) +{ + struct perf_evsel *evsel; + u64 mask; + + *res = 0; + + mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); + if (!mask) + return -EINVAL; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type == intel_pt_pmu->type) { + *res = intel_pt_masked_bits(mask, evsel->attr.config); + return 0; + } + } + + return -EINVAL; +} + +static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, + struct perf_evlist *evlist) +{ + u64 val; + int err, topa_multiple_entries; + size_t psb_period; + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", + "%d", &topa_multiple_entries) != 1) + topa_multiple_entries = 0; + + /* + * Use caps/topa_multiple_entries to indicate early hardware that had + * extra frequent PSBs. + */ + if (!topa_multiple_entries) { + psb_period = 256; + goto out; + } + + err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); + if (err) + val = 0; + + psb_period = 1 << (val + 11); +out: + pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); + return psb_period; +} + +static int intel_pt_pick_bit(int bits, int target) +{ + int pos, pick = -1; + + for (pos = 0; bits; bits >>= 1, pos++) { + if (bits & 1) { + if (pos <= target || pick < 0) + pick = pos; + if (pos >= target) + break; + } + } + + return pick; } static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) { + char buf[256]; + int psb_cyc, psb_periods, psb_period; + int pos = 0; u64 config; - intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &config); + pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); + + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", + &psb_cyc) != 1) + psb_cyc = 1; + + if (psb_cyc) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", + &psb_periods) != 1) + psb_periods = 0; + if (psb_periods) { + psb_period = intel_pt_pick_bit(psb_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",psb_period=%d", psb_period); + } + } + + pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); + + intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); + return config; } @@ -239,6 +343,103 @@ static int intel_pt_track_switches(struct perf_evlist *evlist) return 0; } +static void intel_pt_valid_str(char *str, size_t len, u64 valid) +{ + unsigned int val, last = 0, state = 1; + int p = 0; + + str[0] = '\0'; + + for (val = 0; val <= 64; val++, valid >>= 1) { + if (valid & 1) { + last = val; + switch (state) { + case 0: + p += scnprintf(str + p, len - p, ","); + /* Fall through */ + case 1: + p += scnprintf(str + p, len - p, "%u", val); + state = 2; + break; + case 2: + state = 3; + break; + case 3: + state = 4; + break; + default: + break; + } + } else { + switch (state) { + case 3: + p += scnprintf(str + p, len - p, ",%u", last); + state = 0; + break; + case 4: + p += scnprintf(str + p, len - p, "-%u", last); + state = 0; + break; + default: + break; + } + if (state != 1) + state = 0; + } + } +} + +static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, + const char *caps, const char *name, + const char *supported, u64 config) +{ + char valid_str[256]; + unsigned int shift; + unsigned long long valid; + u64 bits; + int ok; + + if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) + valid = 0; + + if (supported && + perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) + valid = 0; + + valid |= 1; + + bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); + + config &= bits; + + for (shift = 0; bits && !(bits & 1); shift++) + bits >>= 1; + + config >>= shift; + + if (config > 63) + goto out_err; + + if (valid & (1 << config)) + return 0; +out_err: + intel_pt_valid_str(valid_str, sizeof(valid_str), valid); + pr_err("Invalid %s for %s. Valid values are: %s\n", + name, INTEL_PT_PMU_NAME, valid_str); + return -EINVAL; +} + +static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, + struct perf_evsel *evsel) +{ + if (!evsel) + return 0; + + return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", + "psb_period", "caps/psb_cyc", + evsel->attr.config); +} + static int intel_pt_recording_options(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts) @@ -251,6 +452,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, const struct cpu_map *cpus = evlist->cpus; bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; u64 tsc_bit; + int err; ptr->evlist = evlist; ptr->snapshot_mode = opts->auxtrace_snapshot_mode; @@ -281,6 +483,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; + err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); + if (err) + return err; + /* Set default sizes for snapshot mode */ if (opts->auxtrace_snapshot_mode) { size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); @@ -366,8 +572,6 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * threads. */ if (have_timing_info && !cpu_map__empty(cpus)) { - int err; - err = intel_pt_track_switches(evlist); if (err == -EPERM) pr_debug2("Unable to select sched:sched_switch\n"); @@ -394,7 +598,6 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { struct perf_evsel *tracking_evsel; - int err; err = parse_events(evlist, "dummy:u", NULL); if (err) From 3d49807870f08d6f3406b77efd94bb3788372162 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:53 +0300 Subject: [PATCH 07/22] perf tools: Add new Intel PT packet definitions New features have been added to Intel PT which include a number of new packet definitions. This patch adds packet definitions for new packets: TMA, MTC, CYC, VMCS, TRACESTOP and MNT. Also another bit in PIP is defined. This patch only adds support for the definitions. Later patches add support for decoding TMA, MTC, CYC and TRACESTOP which is where those packets are explained. VMCS and the newly defined bit in PIP are used with virtualization which is not supported yet. MNT is a maintenance packet which the decoder should ignore. For details, refer to the June 2015 or later Intel 64 and IA-32 Architectures SDM Chapter 36 Intel Processor Trace. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-19-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 70 ++++++++- .../intel-pt-decoder/intel-pt-pkt-decoder.c | 142 ++++++++++++++++-- .../intel-pt-decoder/intel-pt-pkt-decoder.h | 6 + 3 files changed, 201 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 56790ea1e88e..4a0e9fb1d173 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -923,6 +923,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: case INTEL_PT_TNT: + case INTEL_PT_TRACESTOP: case INTEL_PT_BAD: case INTEL_PT_PSB: intel_pt_log("ERROR: Unexpected packet\n"); @@ -935,6 +936,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) intel_pt_calc_tsc_timestamp(decoder); break; + case INTEL_PT_TMA: + break; + case INTEL_PT_CBR: decoder->cbr = decoder->packet.payload; break; @@ -944,7 +948,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); break; case INTEL_PT_FUP: @@ -956,6 +960,12 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) intel_pt_update_in_tx(decoder); break; + case INTEL_PT_MTC: + break; + + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: default: break; @@ -983,8 +993,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) switch (decoder->packet.type) { case INTEL_PT_TNT: case INTEL_PT_FUP: + case INTEL_PT_TRACESTOP: case INTEL_PT_PSB: case INTEL_PT_TSC: + case INTEL_PT_TMA: case INTEL_PT_CBR: case INTEL_PT_MODE_TSX: case INTEL_PT_BAD: @@ -1032,13 +1044,21 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) return 0; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: + break; + + case INTEL_PT_CYC: break; case INTEL_PT_MODE_EXEC: decoder->exec_mode = decoder->packet.payload; break; + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: break; @@ -1122,6 +1142,9 @@ next: } return intel_pt_walk_fup_tip(decoder); + case INTEL_PT_TRACESTOP: + break; + case INTEL_PT_PSB: intel_pt_clear_stack(&decoder->stack); err = intel_pt_walk_psbend(decoder); @@ -1132,13 +1155,22 @@ next: break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); + break; + + case INTEL_PT_MTC: break; case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); break; + case INTEL_PT_TMA: + break; + + case INTEL_PT_CYC: + break; + case INTEL_PT_CBR: decoder->cbr = decoder->packet.payload; break; @@ -1162,6 +1194,8 @@ next: return intel_pt_bug(decoder); case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: break; @@ -1202,16 +1236,25 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) } break; + case INTEL_PT_MTC: + break; + case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); break; + case INTEL_PT_TMA: + break; + + case INTEL_PT_CYC: + break; + case INTEL_PT_CBR: decoder->cbr = decoder->packet.payload; break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); break; case INTEL_PT_MODE_EXEC: @@ -1222,6 +1265,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) intel_pt_update_in_tx(decoder); break; + case INTEL_PT_TRACESTOP: case INTEL_PT_TNT: intel_pt_log("ERROR: Unexpected packet\n"); if (decoder->ip) @@ -1240,6 +1284,8 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) return 0; case INTEL_PT_PSB: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: default: break; @@ -1282,16 +1328,25 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) intel_pt_set_last_ip(decoder); break; + case INTEL_PT_MTC: + break; + case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); break; + case INTEL_PT_TMA: + break; + + case INTEL_PT_CYC: + break; + case INTEL_PT_CBR: decoder->cbr = decoder->packet.payload; break; case INTEL_PT_PIP: - decoder->cr3 = decoder->packet.payload; + decoder->cr3 = decoder->packet.payload & (BIT63 - 1); break; case INTEL_PT_MODE_EXEC: @@ -1308,6 +1363,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_BAD: /* Does not happen */ return intel_pt_bug(decoder); + case INTEL_PT_TRACESTOP: + break; + case INTEL_PT_PSB: err = intel_pt_walk_psb(decoder); if (err) @@ -1321,6 +1379,8 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TNT: case INTEL_PT_PSBEND: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: case INTEL_PT_PAD: default: break; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 988c82c6652d..b1257c816310 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -24,6 +24,8 @@ #define BIT63 ((uint64_t)1 << 63) +#define NR_FLAG BIT63 + #if __BYTE_ORDER == __BIG_ENDIAN #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 @@ -46,15 +48,21 @@ static const char * const packet_name[] = { [INTEL_PT_TIP_PGD] = "TIP.PGD", [INTEL_PT_TIP_PGE] = "TIP.PGE", [INTEL_PT_TSC] = "TSC", + [INTEL_PT_TMA] = "TMA", [INTEL_PT_MODE_EXEC] = "MODE.Exec", [INTEL_PT_MODE_TSX] = "MODE.TSX", + [INTEL_PT_MTC] = "MTC", [INTEL_PT_TIP] = "TIP", [INTEL_PT_FUP] = "FUP", + [INTEL_PT_CYC] = "CYC", + [INTEL_PT_VMCS] = "VMCS", [INTEL_PT_PSB] = "PSB", [INTEL_PT_PSBEND] = "PSBEND", [INTEL_PT_CBR] = "CBR", + [INTEL_PT_TRACESTOP] = "TraceSTOP", [INTEL_PT_PIP] = "PIP", [INTEL_PT_OVF] = "OVF", + [INTEL_PT_MNT] = "MNT", }; const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) @@ -96,10 +104,18 @@ static int intel_pt_get_pip(const unsigned char *buf, size_t len, packet->type = INTEL_PT_PIP; memcpy_le64(&payload, buf + 2, 6); packet->payload = payload >> 1; + if (payload & 1) + packet->payload |= NR_FLAG; return 8; } +static int intel_pt_get_tracestop(struct intel_pt_pkt *packet) +{ + packet->type = INTEL_PT_TRACESTOP; + return 2; +} + static int intel_pt_get_cbr(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { @@ -110,6 +126,24 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len, return 4; } +static int intel_pt_get_vmcs(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + unsigned int count = (52 - 5) >> 3; + + if (count < 1 || count > 7) + return INTEL_PT_BAD_PACKET; + + if (len < count + 2) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_VMCS; + packet->count = count; + memcpy_le64(&packet->payload, buf + 2, count); + + return count + 2; +} + static int intel_pt_get_ovf(struct intel_pt_pkt *packet) { packet->type = INTEL_PT_OVF; @@ -139,12 +173,49 @@ static int intel_pt_get_psbend(struct intel_pt_pkt *packet) return 2; } +static int intel_pt_get_tma(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 7) + return INTEL_PT_NEED_MORE_BYTES; + + packet->type = INTEL_PT_TMA; + packet->payload = buf[2] | (buf[3] << 8); + packet->count = buf[5] | ((buf[6] & BIT(0)) << 8); + return 7; +} + static int intel_pt_get_pad(struct intel_pt_pkt *packet) { packet->type = INTEL_PT_PAD; return 1; } +static int intel_pt_get_mnt(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 11) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MNT; + memcpy_le64(&packet->payload, buf + 3, 8); + return 11 +; +} + +static int intel_pt_get_3byte(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 3) + return INTEL_PT_NEED_MORE_BYTES; + + switch (buf[2]) { + case 0x88: /* MNT */ + return intel_pt_get_mnt(buf, len, packet); + default: + return INTEL_PT_BAD_PACKET; + } +} + static int intel_pt_get_ext(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { @@ -156,14 +227,22 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len, return intel_pt_get_long_tnt(buf, len, packet); case 0x43: /* PIP */ return intel_pt_get_pip(buf, len, packet); + case 0x83: /* TraceStop */ + return intel_pt_get_tracestop(packet); case 0x03: /* CBR */ return intel_pt_get_cbr(buf, len, packet); + case 0xc8: /* VMCS */ + return intel_pt_get_vmcs(buf, len, packet); case 0xf3: /* OVF */ return intel_pt_get_ovf(packet); case 0x82: /* PSB */ return intel_pt_get_psb(buf, len, packet); case 0x23: /* PSBEND */ return intel_pt_get_psbend(packet); + case 0x73: /* TMA */ + return intel_pt_get_tma(buf, len, packet); + case 0xC3: /* 3-byte header */ + return intel_pt_get_3byte(buf, len, packet); default: return INTEL_PT_BAD_PACKET; } @@ -187,6 +266,28 @@ static int intel_pt_get_short_tnt(unsigned int byte, return 1; } +static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf, + size_t len, struct intel_pt_pkt *packet) +{ + unsigned int offs = 1, shift; + uint64_t payload = byte >> 3; + + byte >>= 2; + len -= 1; + for (shift = 5; byte & 1; shift += 7) { + if (offs > 9) + return INTEL_PT_BAD_PACKET; + if (len < offs) + return INTEL_PT_NEED_MORE_BYTES; + byte = buf[offs++]; + payload |= (byte >> 1) << shift; + } + + packet->type = INTEL_PT_CYC; + packet->payload = payload; + return offs; +} + static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) @@ -269,6 +370,16 @@ static int intel_pt_get_tsc(const unsigned char *buf, size_t len, return 8; } +static int intel_pt_get_mtc(const unsigned char *buf, size_t len, + struct intel_pt_pkt *packet) +{ + if (len < 2) + return INTEL_PT_NEED_MORE_BYTES; + packet->type = INTEL_PT_MTC; + packet->payload = buf[1]; + return 2; +} + static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { @@ -288,6 +399,9 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, return intel_pt_get_short_tnt(byte, packet); } + if ((byte & 2)) + return intel_pt_get_cyc(byte, buf, len, packet); + switch (byte & 0x1f) { case 0x0D: return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet); @@ -305,6 +419,8 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, return intel_pt_get_mode(buf, len, packet); case 0x19: return intel_pt_get_tsc(buf, len, packet); + case 0x59: + return intel_pt_get_mtc(buf, len, packet); default: return INTEL_PT_BAD_PACKET; } @@ -329,7 +445,7 @@ int intel_pt_get_packet(const unsigned char *buf, size_t len, int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t buf_len) { - int ret, i; + int ret, i, nr; unsigned long long payload = packet->payload; const char *name = intel_pt_pkt_name(packet->type); @@ -338,6 +454,7 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, case INTEL_PT_PAD: case INTEL_PT_PSB: case INTEL_PT_PSBEND: + case INTEL_PT_TRACESTOP: case INTEL_PT_OVF: return snprintf(buf, buf_len, "%s", name); case INTEL_PT_TNT: { @@ -371,17 +488,16 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, case INTEL_PT_FUP: if (!(packet->count)) return snprintf(buf, buf_len, "%s no ip", name); + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_MTC: + case INTEL_PT_MNT: case INTEL_PT_CBR: - return snprintf(buf, buf_len, "%s 0x%llx", name, payload); case INTEL_PT_TSC: - if (packet->count) - return snprintf(buf, buf_len, - "%s 0x%llx CTC 0x%x FC 0x%x", - name, payload, packet->count & 0xffff, - (packet->count >> 16) & 0x1ff); - else - return snprintf(buf, buf_len, "%s 0x%llx", - name, payload); + return snprintf(buf, buf_len, "%s 0x%llx", name, payload); + case INTEL_PT_TMA: + return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name, + (unsigned)payload, packet->count); case INTEL_PT_MODE_EXEC: return snprintf(buf, buf_len, "%s %lld", name, payload); case INTEL_PT_MODE_TSX: @@ -389,8 +505,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, name, (unsigned)(payload >> 1) & 1, (unsigned)payload & 1); case INTEL_PT_PIP: - ret = snprintf(buf, buf_len, "%s 0x%llx", - name, payload); + nr = packet->payload & NR_FLAG ? 1 : 0; + payload &= ~NR_FLAG; + ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", + name, payload, nr); return ret; default: break; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h index 53404fa942b3..781bb79883bd 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h @@ -37,15 +37,21 @@ enum intel_pt_pkt_type { INTEL_PT_TIP_PGD, INTEL_PT_TIP_PGE, INTEL_PT_TSC, + INTEL_PT_TMA, INTEL_PT_MODE_EXEC, INTEL_PT_MODE_TSX, + INTEL_PT_MTC, INTEL_PT_TIP, INTEL_PT_FUP, + INTEL_PT_CYC, + INTEL_PT_VMCS, INTEL_PT_PSB, INTEL_PT_PSBEND, INTEL_PT_CBR, + INTEL_PT_TRACESTOP, INTEL_PT_PIP, INTEL_PT_OVF, + INTEL_PT_MNT, }; struct intel_pt_pkt { From 11fa7cb86b56d3610043ba2ac6cbd81feab4b7c4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:54 +0300 Subject: [PATCH 08/22] perf tools: Pass Intel PT information for decoding MTC and CYC Record additional information in the AUXTRACE_INFO event in preparation for decoding MTC and CYC packets. Pass the information to the decoder. The AUXTRACE_INFO record can be extended by using the size to indicate the presence of new members. The additional information includes PMU config bit positions and the TSC to CTC (hardware crystal clock) ratio needed to decode MTC packets. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-20-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 24 ++++++- .../util/intel-pt-decoder/intel-pt-decoder.h | 3 + tools/perf/util/intel-pt.c | 62 ++++++++++++++++--- tools/perf/util/intel-pt.h | 5 ++ 4 files changed, 83 insertions(+), 11 deletions(-) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 145975b003a7..faae9289bcf6 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../../perf.h" #include "../../util/session.h" @@ -261,6 +262,15 @@ static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused return INTEL_PT_AUXTRACE_PRIV_SIZE; } +static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); + *n = ebx; + *d = eax; +} + static int intel_pt_info_fill(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, @@ -272,7 +282,8 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, struct perf_event_mmap_page *pc; struct perf_tsc_conversion tc = { .time_mult = 0, }; bool cap_user_time_zero = false, per_cpu_mmaps; - u64 tsc_bit, noretcomp_bit; + u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; + u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; int err; if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE) @@ -281,6 +292,12 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", &noretcomp_bit); + intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); + mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, + "mtc_period"); + intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); + + intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); if (!session->evlist->nr_mmaps) return -EINVAL; @@ -311,6 +328,11 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; + auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; + auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; + auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; + auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; + auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; return 0; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index cbf57044c385..56cc47baca11 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -87,6 +87,9 @@ struct intel_pt_params { uint64_t period; enum intel_pt_period_type period_type; unsigned max_non_turbo_ratio; + unsigned int mtc_period; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; }; struct intel_pt_decoder; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 3b34a64195ea..bb41c20e6005 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -91,6 +91,11 @@ struct intel_pt { bool synth_needs_swap; u64 tsc_bit; + u64 mtc_bit; + u64 mtc_freq_bits; + u32 tsc_ctc_ratio_n; + u32 tsc_ctc_ratio_d; + u64 cyc_bit; u64 noretcomp_bit; unsigned max_non_turbo_ratio; }; @@ -568,6 +573,25 @@ static bool intel_pt_return_compression(struct intel_pt *pt) return true; } +static unsigned int intel_pt_mtc_period(struct intel_pt *pt) +{ + struct perf_evsel *evsel; + unsigned int shift; + u64 config; + + if (!pt->mtc_freq_bits) + return 0; + + for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) + config >>= 1; + + evlist__for_each(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->attr, &config)) + return (config & pt->mtc_freq_bits) >> shift; + } + return 0; +} + static bool intel_pt_timeless_decoding(struct intel_pt *pt) { struct perf_evsel *evsel; @@ -668,6 +692,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; + params.mtc_period = intel_pt_mtc_period(pt); + params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; + params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; if (pt->synth_opts.instructions) { if (pt->synth_opts.period) { @@ -1751,16 +1778,20 @@ static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) } static const char * const intel_pt_info_fmts[] = { - [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", - [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", - [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", - [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", - [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", - [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", - [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", - [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", - [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", + [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", + [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", + [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", + [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", + [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", + [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", + [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", + [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", + [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", + [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", + [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", }; static void intel_pt_print_info(u64 *arr, int start, int finish) @@ -1812,6 +1843,17 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, INTEL_PT_PER_CPU_MMAPS); + if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) + + (sizeof(u64) * INTEL_PT_CYC_BIT)) { + pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; + pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; + pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; + pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; + pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; + intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, + INTEL_PT_CYC_BIT); + } + pt->timeless_decoding = intel_pt_timeless_decoding(pt); pt->have_tsc = intel_pt_have_tsc(pt); pt->sampling_mode = false; diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h index a1bfe93473ba..0065949df693 100644 --- a/tools/perf/util/intel-pt.h +++ b/tools/perf/util/intel-pt.h @@ -29,6 +29,11 @@ enum { INTEL_PT_HAVE_SCHED_SWITCH, INTEL_PT_SNAPSHOT_MODE, INTEL_PT_PER_CPU_MMAPS, + INTEL_PT_MTC_BIT, + INTEL_PT_MTC_FREQ_BITS, + INTEL_PT_TSC_CTC_N, + INTEL_PT_TSC_CTC_D, + INTEL_PT_CYC_BIT, INTEL_PT_AUXTRACE_PRIV_MAX, }; From 79b58424b821c651a4b4df9018a14684e3670f42 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:55 +0300 Subject: [PATCH 09/22] perf tools: Add Intel PT support for decoding MTC packets MTC packets provide finer grain timestamp information than TSC packets. MTC packets record time using the hardware crystal clock (CTC) which is related to TSC packets using a TMA packet. This patch just adds decoder support. Support for a default value and validation of values is provided by a later patch. Also documentation is updated in a separate patch. For details refer to the June 2015 or later Intel 64 and IA-32 Architectures SDM Chapter 36 Intel Processor Trace. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-21-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 162 +++++++++++++++++- .../util/intel-pt-decoder/intel-pt-decoder.h | 1 + 2 files changed, 159 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 4a0e9fb1d173..f7119a11a4b6 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -85,7 +85,9 @@ struct intel_pt_decoder { const unsigned char *buf; size_t len; bool return_compression; + bool mtc_insn; bool pge; + bool have_tma; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -94,6 +96,15 @@ struct intel_pt_decoder { uint64_t tsc_timestamp; uint64_t ref_timestamp; uint64_t ret_addr; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint32_t last_mtc; + uint32_t tsc_ctc_ratio_n; + uint32_t tsc_ctc_ratio_d; + uint32_t tsc_ctc_mult; + uint32_t tsc_slip; + uint32_t ctc_rem_mask; + int mtc_shift; struct intel_pt_stack stack; enum intel_pt_pkt_state pkt_state; struct intel_pt_pkt packet; @@ -149,6 +160,13 @@ static void intel_pt_setup_period(struct intel_pt_decoder *decoder) } } +static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d) +{ + if (!d) + return 0; + return (t / d) * n + ((t % d) * n) / d; +} + struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) { struct intel_pt_decoder *decoder; @@ -175,6 +193,39 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) intel_pt_setup_period(decoder); + decoder->mtc_shift = params->mtc_period; + decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1; + + decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n; + decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d; + + if (!decoder->tsc_ctc_ratio_n) + decoder->tsc_ctc_ratio_d = 0; + + if (decoder->tsc_ctc_ratio_d) { + if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) + decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / + decoder->tsc_ctc_ratio_d; + + /* + * Allow for timestamps appearing to backwards because a TSC + * packet has slipped past a MTC packet, so allow 2 MTC ticks + * or ... + */ + decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + /* ... or 0x100 paranoia */ + if (decoder->tsc_slip < 0x100) + decoder->tsc_slip = 0x100; + + intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); + intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); + intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d); + intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); + intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); + return decoder; } @@ -368,6 +419,7 @@ static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder) static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) { intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; decoder->pkt_len = 1; decoder->pkt_step = 1; intel_pt_decoder_log_packet(decoder); @@ -400,6 +452,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_NO_PSB; decoder->ref_timestamp = buffer.ref_timestamp; decoder->timestamp = 0; + decoder->have_tma = false; decoder->state.trace_nr = buffer.trace_nr; intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", decoder->ref_timestamp); @@ -523,6 +576,7 @@ static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder) case INTEL_PT_PERIOD_TICKS: return intel_pt_next_period(decoder); case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: default: return 0; } @@ -542,6 +596,7 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) decoder->last_masked_timestamp = masked_timestamp; break; case INTEL_PT_PERIOD_NONE: + case INTEL_PT_PERIOD_MTC: default: break; } @@ -555,6 +610,9 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, uint64_t max_insn_cnt, insn_cnt = 0; int err; + if (!decoder->mtc_insn) + decoder->mtc_insn = true; + max_insn_cnt = intel_pt_next_sample(decoder); err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip, @@ -861,6 +919,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) { uint64_t timestamp; + decoder->have_tma = false; + if (decoder->ref_timestamp) { timestamp = decoder->packet.payload | (decoder->ref_timestamp & (0xffULL << 56)); @@ -878,17 +938,18 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) } else if (decoder->timestamp) { timestamp = decoder->packet.payload | (decoder->timestamp & (0xffULL << 56)); + decoder->tsc_timestamp = timestamp; if (timestamp < decoder->timestamp && - decoder->timestamp - timestamp < 0x100) { - intel_pt_log_to("ERROR: Suppressing backwards timestamp", + decoder->timestamp - timestamp < decoder->tsc_slip) { + intel_pt_log_to("Suppressing backwards timestamp", timestamp); timestamp = decoder->timestamp; } while (timestamp < decoder->timestamp) { intel_pt_log_to("Wraparound timestamp", timestamp); timestamp += (1ULL << 56); + decoder->tsc_timestamp = timestamp; } - decoder->tsc_timestamp = timestamp; decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; } @@ -900,11 +961,73 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; return -EOVERFLOW; } +static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) +{ + uint32_t ctc = decoder->packet.payload; + uint32_t fc = decoder->packet.count; + uint32_t ctc_rem = ctc & decoder->ctc_rem_mask; + + if (!decoder->tsc_ctc_ratio_d) + return; + + decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + decoder->ctc_timestamp = decoder->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + decoder->ctc_timestamp -= multdiv(ctc_rem, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + decoder->ctc_delta = 0; + decoder->have_tma = true; + intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", + decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); +} + +static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp; + uint32_t mtc, mtc_delta; + + if (!decoder->have_tma) + return; + + mtc = decoder->packet.payload; + + if (mtc > decoder->last_mtc) + mtc_delta = mtc - decoder->last_mtc; + else + mtc_delta = mtc + 256 - decoder->last_mtc; + + decoder->ctc_delta += mtc_delta << decoder->mtc_shift; + + if (decoder->tsc_ctc_mult) { + timestamp = decoder->ctc_timestamp + + decoder->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = decoder->ctc_timestamp + + multdiv(decoder->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; + + decoder->timestamp_insn_cnt = 0; + decoder->last_mtc = mtc; +} + /* Walk PSB+ packets when already in sync. */ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) { @@ -926,6 +1049,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_TRACESTOP: case INTEL_PT_BAD: case INTEL_PT_PSB: + decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); return -EAGAIN; @@ -937,6 +1061,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); break; case INTEL_PT_CBR: @@ -961,6 +1086,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; break; case INTEL_PT_CYC: @@ -1048,6 +1176,9 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) break; case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type == INTEL_PT_PERIOD_MTC) + decoder->state.type |= INTEL_PT_INSTRUCTION; break; case INTEL_PT_CYC: @@ -1159,13 +1290,31 @@ next: break; case INTEL_PT_MTC: - break; + intel_pt_calc_mtc_timestamp(decoder); + if (decoder->period_type != INTEL_PT_PERIOD_MTC) + break; + /* + * Ensure that there has been an instruction since the + * last MTC. + */ + if (!decoder->mtc_insn) + break; + decoder->mtc_insn = false; + /* Ensure that there is a timestamp */ + if (!decoder->timestamp) + break; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + decoder->mtc_insn = false; + return 0; case INTEL_PT_TSC: intel_pt_calc_tsc_timestamp(decoder); break; case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); break; case INTEL_PT_CYC: @@ -1237,6 +1386,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) break; case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); break; case INTEL_PT_TSC: @@ -1244,6 +1394,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) break; case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); break; case INTEL_PT_CYC: @@ -1267,6 +1418,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_TRACESTOP: case INTEL_PT_TNT: + decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); if (decoder->ip) decoder->pkt_state = INTEL_PT_STATE_ERR4; @@ -1329,6 +1481,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); break; case INTEL_PT_TSC: @@ -1336,6 +1489,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); break; case INTEL_PT_CYC: diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 56cc47baca11..02c38fec1c37 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -36,6 +36,7 @@ enum intel_pt_period_type { INTEL_PT_PERIOD_NONE, INTEL_PT_PERIOD_INSTRUCTIONS, INTEL_PT_PERIOD_TICKS, + INTEL_PT_PERIOD_MTC, }; enum { From b45fc0bfaf4a0b60ce2deda222f8ef2a23b89a5f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:56 +0300 Subject: [PATCH 10/22] perf tools: Add Intel PT support for using MTC packets MTC packets are a new Intel PT feature. MTC packets provide finer grain timestamp information than TSC packets. Support for this feature is indicated by: /sys/bus/event_source/devices/intel_pt/caps/mtc which contains "1" if the feature is supported and "0" otherwise. MTC packets can be requested using a PMU config term e.g. perf record -e intel_pt/mtc/u sleep 1 The frequency of MTC packets can also be specified. e.g. perf record -e intel_pt/mtc,mtc_period=2/u sleep 1 The default value is 3 or the nearest lower value that is supported. 0 is always supported. Valid values are given by: /sys/bus/event_source/devices/intel_pt/caps/mtc_periods which contains a hexadecimal value, the bits of which represent valid values e.g. bit 2 set means value 2 is valid. The value is converted to the MTC frequency as: CTC-frequency / (2 ^ value) e.g. value 3 means one eighth of CTC-frequency Where CTC is the hardware crystal clock, the frequency of which can be related to TSC via values provided in cpuid leaf 0x15. If an invalid value is entered, the error message will give a list of valid values e.g. $ perf record -e intel_pt/mtc_period=15/u uname Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 tools/perf/Documentation/intel-pt.txt is updated in a later patch as there are a number of new features being added. For more information refer to the June 2015 or later Intel 64 and IA-32 Architectures SDM Chapter 36 Intel Processor Trace. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-22-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index faae9289bcf6..a5de01dad868 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -190,17 +190,33 @@ static int intel_pt_pick_bit(int bits, int target) static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) { char buf[256]; + int mtc, mtc_periods = 0, mtc_period; int psb_cyc, psb_periods, psb_period; int pos = 0; u64 config; pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", + &mtc) != 1) + mtc = 1; + + if (mtc) { + if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", + &mtc_periods) != 1) + mtc_periods = 0; + if (mtc_periods) { + mtc_period = intel_pt_pick_bit(mtc_periods, 3); + pos += scnprintf(buf + pos, sizeof(buf) - pos, + ",mtc,mtc_period=%d", mtc_period); + } + } + if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", &psb_cyc) != 1) psb_cyc = 1; - if (psb_cyc) { + if (psb_cyc && mtc_periods) { if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", &psb_periods) != 1) psb_periods = 0; @@ -454,9 +470,17 @@ out_err: static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, struct perf_evsel *evsel) { + int err; + if (!evsel) return 0; + err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", + "mtc_period", "caps/mtc", + evsel->attr.config); + if (err) + return err; + return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", "psb_period", "caps/psb_cyc", evsel->attr.config); From cc33618619cefc6d730cca3bb8e15311016a4da7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:57 +0300 Subject: [PATCH 11/22] perf tools: Add Intel PT support for decoding CYC packets CYC packets provide even finer grain timestamp information than MTC and TSC packets. A CYC packet contains the number of CPU cycles since the last CYC packet. This patch just adds decoder support. The CPU frequency can be related to TSC using the Maximum Non-Turbo Ratio in combination with the CBR (core-to-bus ratio) packet. However more accuracy is achieved by simply interpolating the number of cycles between other timing packets like MTC or TSC. This patch takes the latter approach. Support for a default value and validation of values is provided by a later patch. Also documentation is updated in a separate patch. For details refer to the June 2015 or later Intel 64 and IA-32 Architectures SDM Chapter 36 Intel Processor Trace. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-23-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 311 +++++++++++++++++- 1 file changed, 306 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f7119a11a4b6..0845c5e6ad1d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -88,6 +88,7 @@ struct intel_pt_decoder { bool mtc_insn; bool pge; bool have_tma; + bool have_cyc; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -98,6 +99,8 @@ struct intel_pt_decoder { uint64_t ret_addr; uint64_t ctc_timestamp; uint64_t ctc_delta; + uint64_t cycle_cnt; + uint64_t cyc_ref_timestamp; uint32_t last_mtc; uint32_t tsc_ctc_ratio_n; uint32_t tsc_ctc_ratio_d; @@ -111,8 +114,13 @@ struct intel_pt_decoder { struct intel_pt_pkt tnt; int pkt_step; int pkt_len; + int last_packet_type; unsigned int cbr; unsigned int max_non_turbo_ratio; + double max_non_turbo_ratio_fp; + double cbr_cyc_to_tsc; + double calc_cyc_to_tsc; + bool have_calc_cyc_to_tsc; int exec_mode; unsigned int insn_bytes; uint64_t sign_bit; @@ -189,7 +197,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->period = params->period; decoder->period_type = params->period_type; - decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; + decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; + decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; intel_pt_setup_period(decoder); @@ -514,10 +523,247 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) return ret; } +struct intel_pt_pkt_info { + struct intel_pt_decoder *decoder; + struct intel_pt_pkt packet; + uint64_t pos; + int pkt_len; + int last_packet_type; + void *data; +}; + +typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info); + +/* Lookahead packets in current buffer */ +static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, + intel_pt_pkt_cb_t cb, void *data) +{ + struct intel_pt_pkt_info pkt_info; + const unsigned char *buf = decoder->buf; + size_t len = decoder->len; + int ret; + + pkt_info.decoder = decoder; + pkt_info.pos = decoder->pos; + pkt_info.pkt_len = decoder->pkt_step; + pkt_info.last_packet_type = decoder->last_packet_type; + pkt_info.data = data; + + while (1) { + do { + pkt_info.pos += pkt_info.pkt_len; + buf += pkt_info.pkt_len; + len -= pkt_info.pkt_len; + + if (!len) + return INTEL_PT_NEED_MORE_BYTES; + + ret = intel_pt_get_packet(buf, len, &pkt_info.packet); + if (!ret) + return INTEL_PT_NEED_MORE_BYTES; + if (ret < 0) + return ret; + + pkt_info.pkt_len = ret; + } while (pkt_info.packet.type == INTEL_PT_PAD); + + ret = cb(&pkt_info); + if (ret) + return 0; + + pkt_info.last_packet_type = pkt_info.packet.type; + } +} + +struct intel_pt_calc_cyc_to_tsc_info { + uint64_t cycle_cnt; + unsigned int cbr; + uint32_t last_mtc; + uint64_t ctc_timestamp; + uint64_t ctc_delta; + uint64_t tsc_timestamp; + uint64_t timestamp; + bool have_tma; + bool from_mtc; + double cbr_cyc_to_tsc; +}; + +static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_decoder *decoder = pkt_info->decoder; + struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data; + uint64_t timestamp; + double cyc_to_tsc; + unsigned int cbr; + uint32_t mtc, mtc_delta, ctc, fc, ctc_rem; + + switch (pkt_info->packet.type) { + case INTEL_PT_TNT: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TIP: + case INTEL_PT_FUP: + case INTEL_PT_PSB: + case INTEL_PT_PIP: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_PSBEND: + case INTEL_PT_PAD: + case INTEL_PT_VMCS: + case INTEL_PT_MNT: + return 0; + + case INTEL_PT_MTC: + if (!data->have_tma) + return 0; + + mtc = pkt_info->packet.payload; + if (mtc > data->last_mtc) + mtc_delta = mtc - data->last_mtc; + else + mtc_delta = mtc + 256 - data->last_mtc; + data->ctc_delta += mtc_delta << decoder->mtc_shift; + data->last_mtc = mtc; + + if (decoder->tsc_ctc_mult) { + timestamp = data->ctc_timestamp + + data->ctc_delta * decoder->tsc_ctc_mult; + } else { + timestamp = data->ctc_timestamp + + multdiv(data->ctc_delta, + decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + if (timestamp < data->timestamp) + return 1; + + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + data->timestamp = timestamp; + return 0; + } + + break; + + case INTEL_PT_TSC: + timestamp = pkt_info->packet.payload | + (data->timestamp & (0xffULL << 56)); + if (data->from_mtc && timestamp < data->timestamp && + data->timestamp - timestamp < decoder->tsc_slip) + return 1; + while (timestamp < data->timestamp) + timestamp += (1ULL << 56); + if (pkt_info->last_packet_type != INTEL_PT_CYC) { + if (data->from_mtc) + return 1; + data->tsc_timestamp = timestamp; + data->timestamp = timestamp; + return 0; + } + break; + + case INTEL_PT_TMA: + if (data->from_mtc) + return 1; + + if (!decoder->tsc_ctc_ratio_d) + return 0; + + ctc = pkt_info->packet.payload; + fc = pkt_info->packet.count; + ctc_rem = ctc & decoder->ctc_rem_mask; + + data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + + data->ctc_timestamp = data->tsc_timestamp - fc; + if (decoder->tsc_ctc_mult) { + data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; + } else { + data->ctc_timestamp -= + multdiv(ctc_rem, decoder->tsc_ctc_ratio_n, + decoder->tsc_ctc_ratio_d); + } + + data->ctc_delta = 0; + data->have_tma = true; + + return 0; + + case INTEL_PT_CYC: + data->cycle_cnt += pkt_info->packet.payload; + return 0; + + case INTEL_PT_CBR: + cbr = pkt_info->packet.payload; + if (data->cbr && data->cbr != cbr) + return 1; + data->cbr = cbr; + data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + return 0; + + case INTEL_PT_TIP_PGD: + case INTEL_PT_TRACESTOP: + case INTEL_PT_OVF: + case INTEL_PT_BAD: /* Does not happen */ + default: + return 1; + } + + if (!data->cbr && decoder->cbr) { + data->cbr = decoder->cbr; + data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc; + } + + if (!data->cycle_cnt) + return 1; + + cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt; + + if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc && + cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + return 1; + } + + decoder->calc_cyc_to_tsc = cyc_to_tsc; + decoder->have_calc_cyc_to_tsc = true; + + if (data->cbr) { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n", + cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); + } else { + intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n", + cyc_to_tsc, pkt_info->pos); + } + + return 1; +} + +static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, + bool from_mtc) +{ + struct intel_pt_calc_cyc_to_tsc_info data = { + .cycle_cnt = 0, + .cbr = 0, + .last_mtc = decoder->last_mtc, + .ctc_timestamp = decoder->ctc_timestamp, + .ctc_delta = decoder->ctc_delta, + .tsc_timestamp = decoder->tsc_timestamp, + .timestamp = decoder->timestamp, + .have_tma = decoder->have_tma, + .from_mtc = from_mtc, + .cbr_cyc_to_tsc = 0, + }; + + intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data); +} + static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) { int ret; + decoder->last_packet_type = decoder->packet.type; + do { decoder->pos += decoder->pkt_step; decoder->buf += decoder->pkt_step; @@ -954,6 +1200,13 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) decoder->timestamp_insn_cnt = 0; } + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, false); + } + intel_pt_log_to("Setting timestamp", decoder->timestamp); } @@ -962,6 +1215,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); decoder->have_tma = false; + decoder->cbr = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; return -EOVERFLOW; @@ -1026,6 +1280,49 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) decoder->timestamp_insn_cnt = 0; decoder->last_mtc = mtc; + + if (decoder->last_packet_type == INTEL_PT_CYC) { + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; + decoder->have_calc_cyc_to_tsc = false; + intel_pt_calc_cyc_to_tsc(decoder, true); + } +} + +static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) +{ + unsigned int cbr = decoder->packet.payload; + + if (decoder->cbr == cbr) + return; + + decoder->cbr = cbr; + decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; +} + +static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) +{ + uint64_t timestamp = decoder->cyc_ref_timestamp; + + decoder->have_cyc = true; + + decoder->cycle_cnt += decoder->packet.payload; + + if (!decoder->cyc_ref_timestamp) + return; + + if (decoder->have_calc_cyc_to_tsc) + timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc; + else if (decoder->cbr) + timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc; + else + return; + + if (timestamp < decoder->timestamp) + intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", + timestamp, decoder->timestamp); + else + decoder->timestamp = timestamp; } /* Walk PSB+ packets when already in sync. */ @@ -1065,7 +1362,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; case INTEL_PT_CBR: - decoder->cbr = decoder->packet.payload; + intel_pt_calc_cbr(decoder); break; case INTEL_PT_MODE_EXEC: @@ -1182,6 +1479,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) break; case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); break; case INTEL_PT_MODE_EXEC: @@ -1318,10 +1616,11 @@ next: break; case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); break; case INTEL_PT_CBR: - decoder->cbr = decoder->packet.payload; + intel_pt_calc_cbr(decoder); break; case INTEL_PT_MODE_EXEC: @@ -1398,10 +1697,11 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) break; case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); break; case INTEL_PT_CBR: - decoder->cbr = decoder->packet.payload; + intel_pt_calc_cbr(decoder); break; case INTEL_PT_PIP: @@ -1493,10 +1793,11 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) break; case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); break; case INTEL_PT_CBR: - decoder->cbr = decoder->packet.payload; + intel_pt_calc_cbr(decoder); break; case INTEL_PT_PIP: From 0de802abd14abdf8cbbba28b421a1a00fa0939d5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:58 +0300 Subject: [PATCH 12/22] perf tools: Add Intel PT support for using CYC packets CYC packets are a new Intel PT feature. CYC packets provide even finer grain timestamp information than MTC and TSC packets. A CYC packet contains the number of CPU cycles since the last CYC packet. Unlike MTC and TSC packets, CYC packets are only sent when another packet is also sent. Support for this feature is indicated by: /sys/bus/event_source/devices/intel_pt/caps/psb_cyc which contains "1" if the feature is supported and "0" otherwise. CYC packets can be requested using a PMU config term e.g. perf record -e intel_pt/cyc/u sleep 1 The frequency of CYC packets can also be specified. e.g. perf record -e intel_pt/cyc,cyc_thresh=2/u sleep 1 CYC packets are not requested by default. Valid cyc_thresh values are given by: /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds which contains a hexadecimal value, the bits of which represent valid values e.g. bit 2 set means value 2 is valid. The value represents the minimum number of CPU cycles that must have passed before a CYC packet can be sent. The number of CPU cycles is: 2 ^ (value - 1) e.g. value 4 means 8 CPU cycles must pass before a CYC packet can be sent. Note a CYC packet is still only sent when another packet is sent, not at, e.g. every 8 CPU cycles. If an invalid value is entered, the error message will give a list of valid values e.g. $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname Invalid cyc_thresh for intel_pt. Valid values are: 0-12 tools/perf/Documentation/intel-pt.txt is updated in a later patch as there are a number of new features being added. For more information refer to the June 2015 or later Intel 64 and IA-32 Architectures SDM Chapter 36 Intel Processor Trace. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-24-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index a5de01dad868..2ca10d796c0b 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -475,6 +475,12 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, if (!evsel) return 0; + err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", + "cyc_thresh", "caps/psb_cyc", + evsel->attr.config); + if (err) + return err; + err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", "mtc_period", "caps/mtc", evsel->attr.config); From 7eacca3ebb03a4ee7bb41284aafeb19a54242621 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:33:59 +0300 Subject: [PATCH 13/22] perf tools: Add Intel PT support for decoding TRACESTOP packets A TRACESTOP packet is produced when an Intel PT trace enters a defined region of the address space at which point the tracing stops. This patch just adds decoder support. Support for specifying TRACESTOP regions is left until later. For details refer to the June 2015 or later Intel 64 and IA-32 Architectures SDM Chapter 36 Intel Processor Trace. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-25-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 0845c5e6ad1d..22ba50224319 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1572,6 +1572,10 @@ next: return intel_pt_walk_fup_tip(decoder); case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; break; case INTEL_PT_PSB: @@ -1717,6 +1721,9 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) break; case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); case INTEL_PT_TNT: decoder->have_tma = false; intel_pt_log("ERROR: Unexpected packet\n"); @@ -1819,6 +1826,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) return intel_pt_bug(decoder); case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->continuous_period = false; + intel_pt_clear_tx_flags(decoder); + decoder->have_tma = false; break; case INTEL_PT_PSB: From 9d1bf02ac3d41367896b38793db6f8f30bb9a295 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 17 Jul 2015 19:34:00 +0300 Subject: [PATCH 14/22] perf tools: Update Intel PT documentation Update Intel PT documentation to describe new features. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1437150840-31811-26-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 194 ++++++++++++++++++++++++-- 1 file changed, 186 insertions(+), 8 deletions(-) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 2866b62eb293..4a0501d7a3b4 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -142,19 +142,21 @@ which is the same as -e intel_pt/tsc=1,noretcomp=0/ +Note there are now new config terms - see section 'config terms' further below. + The config terms are listed in /sys/devices/intel_pt/format. They are bit fields within the config member of the struct perf_event_attr which is passed to the kernel by the perf_event_open system call. They correspond to bit fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: - $ for f in `ls /sys/devices/intel_pt/format`;do - > echo $f - > cat /sys/devices/intel_pt/format/$f - > done - noretcomp - config:11 - tsc - config:10 + $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* + /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 + /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 + /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 + /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 + /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 + /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 + /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 Note that the default config must be overridden for each term i.e. @@ -209,9 +211,185 @@ perf_event_attr is displayed if the -vv option is used e.g. ------------------------------------------------------------ +config terms +------------ + +The June 2015 version of Intel 64 and IA-32 Architectures Software Developer +Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. +Some of the features are reflect in new config terms. All the config terms are +described below. + +tsc Always supported. Produces TSC timestamp packets to provide + timing information. In some cases it is possible to decode + without timing information, for example a per-thread context + that does not overlap executable memory maps. + + The default config selects tsc (i.e. tsc=1). + +noretcomp Always supported. Disables "return compression" so a TIP packet + is produced when a function returns. Causes more packets to be + produced but might make decoding more reliable. + + The default config does not select noretcomp (i.e. noretcomp=0). + +psb_period Allows the frequency of PSB packets to be specified. + + The PSB packet is a synchronization packet that provides a + starting point for decoding or recovery from errors. + + Support for psb_period is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and "0" + otherwise. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The psb_period value is converted to the approximate number of + trace bytes between PSB packets as: + + 2 ^ (value + 11) + + e.g. value 3 means 16KiB bytes between PSBs + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/psb_period=15/u uname + Invalid psb_period for intel_pt. Valid values are: 0-5 + + If MTC packets are selected, the default config selects a value + of 3 (i.e. psb_period=3) or the nearest lower value that is + supported (0 is always supported). Otherwise the default is 0. + + If decoding is expected to be reliable and the buffer is large + then a large PSB period can be used. + + Because a TSC packet is produced with PSB, the PSB period can + also affect the granularity to timing information in the absence + of MTC or CYC. + +mtc Produces MTC timing packets. + + MTC packets provide finer grain timestamp information than TSC + packets. MTC packets record time using the hardware crystal + clock (CTC) which is related to TSC packets using a TMA packet. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc + + which contains "1" if the feature is supported and + "0" otherwise. + + The frequency of MTC packets can also be specified - see + mtc_period below. + +mtc_period Specifies how frequently MTC packets are produced - see mtc + above for how to determine if MTC packets are supported. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The mtc_period value is converted to the MTC frequency as: + + CTC-frequency / (2 ^ value) + + e.g. value 3 means one eighth of CTC-frequency + + Where CTC is the hardware crystal clock, the frequency of which + can be related to TSC via values provided in cpuid leaf 0x15. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/mtc_period=15/u uname + Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 + + The default value is 3 or the nearest lower value + that is supported (0 is always supported). + +cyc Produces CYC timing packets. + + CYC packets provide even finer grain timestamp information than + MTC and TSC packets. A CYC packet contains the number of CPU + cycles since the last CYC packet. Unlike MTC and TSC packets, + CYC packets are only sent when another packet is also sent. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and + "0" otherwise. + + The number of CYC packets produced can be reduced by specifying + a threshold - see cyc_thresh below. + +cyc_thresh Specifies how frequently CYC packets are produced - see cyc + above for how to determine if CYC packets are supported. + + Valid cyc_thresh values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The cyc_thresh value represents the minimum number of CPU cycles + that must have passed before a CYC packet can be sent. The + number of CPU cycles is: + + 2 ^ (value - 1) + + e.g. value 4 means 8 CPU cycles must pass before a CYC packet + can be sent. Note a CYC packet is still only sent when another + packet is sent, not at, e.g. every 8 CPU cycles. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname + Invalid cyc_thresh for intel_pt. Valid values are: 0-12 + + CYC packets are not requested by default. + +no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR. + + It stops the driver resetting the byte count to zero whenever + enabling the trace (for example on context switches) which in + turn results in no PSB being forced. However some processors + will produce a PSB anyway. + + In any case, there is still a PSB when the trace is enabled for + the first time. + + no_force_psb can be used to slightly decrease the trace size but + may make it harder for the decoder to recover from errors. + + no_force_psb is not selected by default. + + new snapshot option ------------------- +The difference between full trace and snapshot from the kernel's perspective is +that in full trace we don't overwrite trace data that the user hasn't collected +yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let +the trace run and overwrite older data in the buffer so that whenever something +interesting happens, we can stop it and grab a snapshot of what was going on +around that interesting moment. + To select snapshot mode a new option has been added: -S From e486367f0101d43a044404ed2fa35aa043b19667 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 25 Aug 2015 13:27:35 +0000 Subject: [PATCH 15/22] perf probe: Prevent segfault when reading probe point with absolute address 'perf probe -l' panic if there is a manually inserted probing point with absolute address. For example: # echo 'p:probe/abs_ffffffff811e6615 0xffffffff811e6615' > /sys/kernel/debug/tracing/kprobe_events # perf probe -l Segmentation fault (core dumped) This patch fix this problem by considering the situation that "tp->symbol == NULL" in find_perf_probe_point_from_dwarf() and find_perf_probe_point_from_map(). After this patch: # perf probe -l probe:abs_ffffffff811e6615 (on SyS_write+5@fs/read_write.c) And when debug info is missing: # rm -rf ~/.debug # mv /lib/modules/4.2.0-rc1+/build/vmlinux /lib/modules/4.2.0-rc1+/build/vmlinux.bak # perf probe -l probe:abs_ffffffff811e6615 (on sys_write+5) Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Cc: Namhyung Kim Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1440509256-193590-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index f07374bc9c5a..6c7e538c9b8b 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -515,7 +515,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, if (ret < 0) goto error; addr += stext; - } else { + } else if (tp->symbol) { addr = kernel_get_symbol_address_by_name(tp->symbol, false); if (addr == 0) goto error; @@ -1815,17 +1815,17 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, { struct symbol *sym = NULL; struct map *map; - u64 addr; + u64 addr = tp->address; int ret = -ENOENT; if (!is_kprobe) { map = dso__new_map(tp->module); if (!map) goto out; - addr = tp->address; sym = map__find_symbol(map, addr, NULL); } else { - addr = kernel_get_symbol_address_by_name(tp->symbol, true); + if (tp->symbol) + addr = kernel_get_symbol_address_by_name(tp->symbol, true); if (addr) { addr += tp->offset; sym = __find_kernel_function(addr, &map); From 18b9a05868391e8f617febb4528bc1765dc921cf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Aug 2015 10:07:50 +0200 Subject: [PATCH 16/22] perf tools: Remove export.h from MANIFEST We don't carry an export.h wrapper anymore, remove it from the MANIFEST file to avoid breaking the make perf-tar targets. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20150826080750.GD22670@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f31f15a5f873..af009bd6e6b7 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -41,7 +41,6 @@ tools/include/asm-generic/bitops.h tools/include/linux/atomic.h tools/include/linux/bitops.h tools/include/linux/compiler.h -tools/include/linux/export.h tools/include/linux/hash.h tools/include/linux/kernel.h tools/include/linux/list.h From 0bdede8a3e4ff7710622a2e6713d04b3243c24a8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Aug 2015 15:01:03 +0200 Subject: [PATCH 17/22] tools build: Allow duplicate objects in the object list It's sometimes useful to specify the object affiliation to multiple config options like: libperf-$(CONFIG_X86) += tsc.o libperf-$(CONFIG_AUXTRACE) += tsc.o while the object itself is linked only once. Adding the support for this and ignoring duplicate objects in the object list. Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20150826130103.GF22670@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Documentation/Build.txt | 1 + tools/build/Makefile.build | 2 +- tools/build/tests/ex/Build | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index 00ad2d608727..aa5e092c4352 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -66,6 +66,7 @@ To follow the above example, the user provides following 'Build' files: ex/Build: ex-y += a.o ex-y += b.o + ex-y += b.o # duplicates in the lists are allowed libex-y += c.o libex-y += d.o diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 8120af9c0341..0c5f485521d6 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -63,7 +63,7 @@ quiet_cmd_gen = GEN $@ # If there's nothing to link, create empty $@ object. quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ - $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) + $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) # Build rules $(OUTPUT)%.o: %.c FORCE diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build index 70d876237c57..429c7d452101 100644 --- a/tools/build/tests/ex/Build +++ b/tools/build/tests/ex/Build @@ -1,6 +1,7 @@ ex-y += ex.o ex-y += a.o ex-y += b.o +ex-y += b.o ex-y += empty/ ex-y += empty2/ From 614e2fdbd79b47ddf63a1ccd3f2343e98c3077fb Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 26 Aug 2015 10:57:42 +0000 Subject: [PATCH 18/22] perf probe: Fix list result when symbol can't be found 'perf probe -l' reports error if it is unable find symbol through address. Here is an example. # echo 'p:probe_libc/abs_5 /lib64/libc.so.6:0x5' > /sys/kernel/debug/tracing/uprobe_events # cat /sys/kernel/debug/tracing/uprobe_events p:probe_libc/abs_5 /lib64/libc.so.6:0x0000000000000005 # perf probe -l Error: Failed to show event list Also, this situation triggers a logical inconsistency in convert_to_perf_probe_point() that, it returns ENOMEM but actually it never try strdup(). This patch removes !tp->module && !is_kprobe condition, so it always uses address to build function name if symbol not found. Test result: # perf probe -l probe_libc:abs_5 (on 0x5 in /lib64/libc.so.6) Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Cc: Namhyung Kim Cc: Steven Rostedt Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1440586666-235233-4-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 6c7e538c9b8b..f7bacbb98865 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1866,7 +1866,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, if (tp->symbol) { pp->function = strdup(tp->symbol); pp->offset = tp->offset; - } else if (!tp->module && !is_kprobe) { + } else { ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); if (ret < 0) return ret; From be07afe92a09638db9159d2c0794487d66a437a2 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 26 Aug 2015 10:57:43 +0000 Subject: [PATCH 19/22] perf probe: Fix list result when address is zero When manually added uprobe point with zero address, 'perf probe -l' reports error. For example: # echo p:probe_libc/abs_0 /path/to/lib.bin:0x0 arg1=%ax > \ /sys/kernel/debug/tracing/uprobe_events # perf probe -l Error: Failed to show event list. Probing at 0x0 is possible and useful when lib.bin is not a normal shared object but is manually mapped. However, in this case kernel report: # cat /sys/kernel/debug/tracing/uprobe_events p:probe_libc/abs_0 /path/to/lib.bin:0x (null) arg1=%ax This patch supports the above kernel output. Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Cc: Namhyung Kim Cc: Steven Rostedt Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1440586666-235233-5-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index f7bacbb98865..926bcecc4a44 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1519,9 +1519,31 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) } else p = argv[1]; fmt1_str = strtok_r(p, "+", &fmt); - if (fmt1_str[0] == '0') /* only the address started with 0x */ - tp->address = strtoul(fmt1_str, NULL, 0); - else { + /* only the address started with 0x */ + if (fmt1_str[0] == '0') { + /* + * Fix a special case: + * if address == 0, kernel reports something like: + * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax + * Newer kernel may fix that, but we want to + * support old kernel also. + */ + if (strcmp(fmt1_str, "0x") == 0) { + if (!argv[2] || strcmp(argv[2], "(null)")) { + ret = -EINVAL; + goto out; + } + tp->address = 0; + + free(argv[2]); + for (i = 2; argv[i + 1] != NULL; i++) + argv[i] = argv[i + 1]; + + argv[i] = NULL; + argc -= 1; + } else + tp->address = strtoul(fmt1_str, NULL, 0); + } else { /* Only the symbol-based probe has offset */ tp->symbol = strdup(fmt1_str); if (tp->symbol == NULL) { From 6c6e024f0a62a6a08c06002fd3caa2307cc54fd0 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 26 Aug 2015 10:57:44 +0000 Subject: [PATCH 20/22] perf probe: Fix error reported when offset without function This patch fixes a bug that, when offset is provided but function is lost, parse_perf_probe_point() will give a "" string as function name, so the checking code at the end of parse_perf_probe_point() become useless. For example: # perf probe +0x1234 Failed to find symbol in kernel Error: Failed to add events. After this patch: # perf probe +0x1234 Semantic error :Offset requires an entry function. Error: Command Parse Error. Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Cc: Namhyung Kim Cc: Steven Rostedt Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1440586666-235233-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 926bcecc4a44..eaacb58b9b36 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1194,9 +1194,13 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) *ptr++ = '\0'; } - tmp = strdup(arg); - if (tmp == NULL) - return -ENOMEM; + if (arg[0] == '\0') + tmp = NULL; + else { + tmp = strdup(arg); + if (tmp == NULL) + return -ENOMEM; + } if (file_spec) pp->file = tmp; From da15bd9df4afd2f9f78cf29f85f013e3a38402b5 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 26 Aug 2015 10:57:45 +0000 Subject: [PATCH 21/22] perf probe: Support probing at absolute address It should be useful to allow 'perf probe' probe at absolute offset of a target. For example, when (u)probing at a instruction of a shared object in a embedded system where debuginfo is not avaliable but we know the offset of that instruction by manually digging. This patch enables following perf probe command syntax: # perf probe 0xffffffff811e6615 And # perf probe /lib/x86_64-linux-gnu/libc-2.19.so 0xeb860 In the above example, we don't need a anchor symbol, so it is possible to compute absolute addresses using other methods and then use 'perf probe' to create the probing points. v1 -> v2: Drop the leading '+' in cmdline; Allow uprobing at offset 0x0; Improve 'perf probe -l' result when uprobe at area without debuginfo. v2 -> v3: Split bugfix to a separated patch. Test result: # perf probe 0xffffffff8119d175 %ax # perf probe sys_write %ax # perf probe /lib64/libc-2.18.so 0x0 %ax # perf probe /lib64/libc-2.18.so 0x5 %ax # perf probe /lib64/libc-2.18.so 0xd8e40 %ax # perf probe /lib64/libc-2.18.so __write %ax # perf probe /lib64/libc-2.18.so 0xd8e49 %ax # cat /sys/kernel/debug/tracing/uprobe_events p:probe_libc/abs_0 /lib64/libc-2.18.so:0x (null) arg1=%ax p:probe_libc/abs_5 /lib64/libc-2.18.so:0x0000000000000005 arg1=%ax p:probe_libc/abs_d8e40 /lib64/libc-2.18.so:0x00000000000d8e40 arg1=%ax p:probe_libc/__write /lib64/libc-2.18.so:0x00000000000d8e40 arg1=%ax p:probe_libc/abs_d8e49 /lib64/libc-2.18.so:0x00000000000d8e49 arg1=%ax # cat /sys/kernel/debug/tracing/kprobe_events p:probe/abs_ffffffff8119d175 0xffffffff8119d175 arg1=%ax p:probe/sys_write _text+1692016 arg1=%ax # perf probe -l Failed to find debug information for address 5 probe:abs_ffffffff8119d175 (on sys_write+5 with arg1) probe:sys_write (on sys_write with arg1) probe_libc:__write (on @unix/syscall-template.S:81 in /lib64/libc-2.18.so with arg1) probe_libc:abs_0 (on 0x0 in /lib64/libc-2.18.so with arg1) probe_libc:abs_5 (on 0x5 in /lib64/libc-2.18.so with arg1) probe_libc:abs_d8e40 (on @unix/syscall-template.S:81 in /lib64/libc-2.18.so with arg1) probe_libc:abs_d8e49 (on __GI___libc_write+9 in /lib64/libc-2.18.so with arg1) Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Cc: Namhyung Kim Cc: Steven Rostedt Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1440586666-235233-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 162 +++++++++++++++++++++++++++++++-- tools/perf/util/probe-event.h | 4 + tools/perf/util/probe-finder.c | 21 +---- 3 files changed, 163 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index eaacb58b9b36..eb5f18b75402 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1204,9 +1204,27 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (file_spec) pp->file = tmp; - else + else { pp->function = tmp; + /* + * Keep pp->function even if this is absolute address, + * so it can mark whether abs_address is valid. + * Which make 'perf probe lib.bin 0x0' possible. + * + * Note that checking length of tmp is not needed + * because when we access tmp[1] we know tmp[0] is '0', + * so tmp[1] should always valid (but could be '\0'). + */ + if (tmp && !strncmp(tmp, "0x", 2)) { + pp->abs_address = strtoul(pp->function, &tmp, 0); + if (*tmp != '\0') { + semantic_error("Invalid absolute address.\n"); + return -EINVAL; + } + } + } + /* Parse other options */ while (ptr) { arg = ptr; @@ -1804,14 +1822,29 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) if (len <= 0) goto error; - /* Uprobes must have tp->address and tp->module */ - if (tev->uprobes && (!tp->address || !tp->module)) + /* Uprobes must have tp->module */ + if (tev->uprobes && !tp->module) goto error; + /* + * If tp->address == 0, then this point must be a + * absolute address uprobe. + * try_to_find_absolute_address() should have made + * tp->symbol to "0x0". + */ + if (tev->uprobes && !tp->address) { + if (!tp->symbol || strcmp(tp->symbol, "0x0")) + goto error; + } /* Use the tp->address for uprobes */ if (tev->uprobes) ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", tp->module, tp->address); + else if (!strncmp(tp->symbol, "0x", 2)) + /* Absolute address. See try_to_find_absolute_address() */ + ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx", + tp->module ?: "", tp->module ? ":" : "", + tp->address); else ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", tp->module ?: "", tp->module ? ":" : "", @@ -1874,8 +1907,8 @@ out: } static int convert_to_perf_probe_point(struct probe_trace_point *tp, - struct perf_probe_point *pp, - bool is_kprobe) + struct perf_probe_point *pp, + bool is_kprobe) { char buf[128]; int ret; @@ -2331,7 +2364,9 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, if (pev->event) event = pev->event; else - if (pev->point.function && !strisglob(pev->point.function)) + if (pev->point.function && + (strncmp(pev->point.function, "0x", 2) != 0) && + !strisglob(pev->point.function)) event = pev->point.function; else event = tev->point.realname; @@ -2598,6 +2633,98 @@ err_out: goto out; } +static int try_to_find_absolute_address(struct perf_probe_event *pev, + struct probe_trace_event **tevs) +{ + struct perf_probe_point *pp = &pev->point; + struct probe_trace_event *tev; + struct probe_trace_point *tp; + int i, err; + + if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2))) + return -EINVAL; + if (perf_probe_event_need_dwarf(pev)) + return -EINVAL; + + /* + * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at + * absolute address. + * + * Only one tev can be generated by this. + */ + *tevs = zalloc(sizeof(*tev)); + if (!*tevs) + return -ENOMEM; + + tev = *tevs; + tp = &tev->point; + + /* + * Don't use tp->offset, use address directly, because + * in synthesize_probe_trace_command() address cannot be + * zero. + */ + tp->address = pev->point.abs_address; + tp->retprobe = pp->retprobe; + tev->uprobes = pev->uprobes; + + err = -ENOMEM; + /* + * Give it a '0x' leading symbol name. + * In __add_probe_trace_events, a NULL symbol is interpreted as + * invalud. + */ + if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0) + goto errout; + + /* For kprobe, check range */ + if ((!tev->uprobes) && + (kprobe_warn_out_range(tev->point.symbol, + tev->point.address))) { + err = -EACCES; + goto errout; + } + + if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0) + goto errout; + + if (pev->target) { + tp->module = strdup(pev->target); + if (!tp->module) + goto errout; + } + + if (tev->group) { + tev->group = strdup(pev->group); + if (!tev->group) + goto errout; + } + + if (pev->event) { + tev->event = strdup(pev->event); + if (!tev->event) + goto errout; + } + + tev->nargs = pev->nargs; + tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); + if (!tev->args) { + err = -ENOMEM; + goto errout; + } + for (i = 0; i < tev->nargs; i++) + copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]); + + return 1; + +errout: + if (*tevs) { + clear_probe_trace_events(*tevs, 1); + *tevs = NULL; + } + return err; +} + bool __weak arch__prefers_symtab(void) { return false; } static int convert_to_probe_trace_events(struct perf_probe_event *pev, @@ -2614,6 +2741,10 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, } } + ret = try_to_find_absolute_address(pev, tevs); + if (ret > 0) + return ret; + if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { ret = find_probe_trace_events_from_map(pev, tevs); if (ret > 0) @@ -2784,3 +2915,22 @@ end: return ret; } +int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, + struct perf_probe_arg *pvar) +{ + tvar->value = strdup(pvar->var); + if (tvar->value == NULL) + return -ENOMEM; + if (pvar->type) { + tvar->type = strdup(pvar->type); + if (tvar->type == NULL) + return -ENOMEM; + } + if (pvar->name) { + tvar->name = strdup(pvar->name); + if (tvar->name == NULL) + return -ENOMEM; + } else + tvar->name = NULL; + return 0; +} diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 83ee95e9743b..6e7ec68a4aa8 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -59,6 +59,7 @@ struct perf_probe_point { bool retprobe; /* Return probe flag */ char *lazy_line; /* Lazy matching pattern */ unsigned long offset; /* Offset from function entry */ + unsigned long abs_address; /* Absolute address of the point */ }; /* Perf probe probing argument field chain */ @@ -156,4 +157,7 @@ int e_snprintf(char *str, size_t size, const char *format, ...) /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 +int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, + struct perf_probe_arg *pvar); + #endif /*_PROBE_EVENT_H */ diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 7b80f8cb62b9..29c43c0680a8 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -553,24 +553,9 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) char buf[32], *ptr; int ret = 0; - if (!is_c_varname(pf->pvar->var)) { - /* Copy raw parameters */ - pf->tvar->value = strdup(pf->pvar->var); - if (pf->tvar->value == NULL) - return -ENOMEM; - if (pf->pvar->type) { - pf->tvar->type = strdup(pf->pvar->type); - if (pf->tvar->type == NULL) - return -ENOMEM; - } - if (pf->pvar->name) { - pf->tvar->name = strdup(pf->pvar->name); - if (pf->tvar->name == NULL) - return -ENOMEM; - } else - pf->tvar->name = NULL; - return 0; - } + /* Copy raw parameters */ + if (!is_c_varname(pf->pvar->var)) + return copy_to_probe_trace_arg(pf->tvar, pf->pvar); if (pf->pvar->name) pf->tvar->name = strdup(pf->pvar->name); From a2fb3382edbea83c6f2bf6ac15e3673b2e254aad Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 26 Aug 2015 10:57:46 +0000 Subject: [PATCH 22/22] tracing/uprobes: Do not print '0x (null)' when offset is 0 When manually added uprobe point with zero address, 'uprobe_events' output '(null)' instead of 0x00000000: # echo p:probe_libc/abs_0 /path/to/lib.bin:0x0 arg1=%ax > \ /sys/kernel/debug/tracing/uprobe_events # cat /sys/kernel/debug/tracing/uprobe_events p:probe_libc/abs_0 /path/to/lib.bin:0x (null) arg1=%ax This patch fixes this behavior: # cat /sys/kernel/debug/tracing/uprobe_events p:probe_libc/abs_0 /path/to/lib.bin:0x0000000000000000 Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Cc: Namhyung Kim Cc: Steven Rostedt Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1440586666-235233-8-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/trace/trace_uprobe.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index f97479f1ce35..d2f6d0be3503 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -601,7 +601,22 @@ static int probes_seq_show(struct seq_file *m, void *v) seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system, trace_event_name(&tu->tp.call)); - seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); + seq_printf(m, " %s:", tu->filename); + + /* Don't print "0x (null)" when offset is 0 */ + if (tu->offset) { + seq_printf(m, "0x%p", (void *)tu->offset); + } else { + switch (sizeof(void *)) { + case 4: + seq_printf(m, "0x00000000"); + break; + case 8: + default: + seq_printf(m, "0x0000000000000000"); + break; + } + } for (i = 0; i < tu->tp.nr_args; i++) seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);