perf fixes for v5.13: 2nd batch
- Fix 'perf script' decoding of Intel PT traces for abort handling and sample instruction bytes. - Add missing PERF_IP_FLAG_CHARS for VM-Entry and VM-Exit to Intel PT 'perf script' decoder. - Fixes for the python based Intel PT trace viewer GUI. - Sync UAPI copies (unwire quotactl_path, some comment fixes). - Fix handling of missing kernel software events, such as the recently added 'cgroup-switches', and add the trivial glue for it in the tooling side, since it was added in this merge window. - Add missing initialization of zstd_data in 'perf buildid-list', detected with valgrind's memcheck. - Remove needless event enable/disable when all events uses BPF. - Fix libpfm4 support (63) test error for nested event groups. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYKwENwAKCRCyPKLppCJ+ JxBWAP0UQ2Mm/STKDz4GpqJl1WsHF5oUUr8mFv+17ucyk4vdYgD/Xd5BaFNm6Y7E /PgbNW9qze1ltWvHWGDpP/rFJfoNqg8= =YzL3 -----END PGP SIGNATURE----- Merge tag 'perf-tools-fixes-for-v5.13-2021-05-24' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux Pull perf tool fixes from Arnaldo Carvalho de Melo: - Fix 'perf script' decoding of Intel PT traces for abort handling and sample instruction bytes. - Add missing PERF_IP_FLAG_CHARS for VM-Entry and VM-Exit to Intel PT 'perf script' decoder. - Fixes for the python based Intel PT trace viewer GUI. - Sync UAPI copies (unwire quotactl_path, some comment fixes). - Fix handling of missing kernel software events, such as the recently added 'cgroup-switches', and add the trivial glue for it in the tooling side, since it was added in this merge window. - Add missing initialization of zstd_data in 'perf buildid-list', detected with valgrind's memcheck. - Remove needless event enable/disable when all events uses BPF. - Fix libpfm4 support (63) test error for nested event groups. * tag 'perf-tools-fixes-for-v5.13-2021-05-24' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: perf stat: Skip evlist__[enable|disable] when all events uses BPF perf script: Add missing PERF_IP_FLAG_CHARS for VM-Entry and VM-Exit perf scripts python: exported-sql-viewer.py: Fix warning display perf scripts python: exported-sql-viewer.py: Fix Array TypeError perf scripts python: exported-sql-viewer.py: Fix copy to clipboard from Top Calls by elapsed Time report tools headers UAPI: Sync files changed by the quotactl_path unwiring tools headers UAPI: Sync linux/perf_event.h with the kernel sources tools headers UAPI: Sync linux/fs.h with the kernel sources perf parse-events: Check if the software events array slots are populated perf tools: Add 'cgroup-switches' software event perf intel-pt: Remove redundant setting of ptq->insn_len perf intel-pt: Fix sample instruction bytes perf intel-pt: Fix transaction abort handling perf test: Fix libpfm4 support (63) test error for nested event groups tools arch kvm: Sync kvm headers with the kernel sources perf buildid-list: Initialize zstd_data
This commit is contained in:
commit
a050a6d2b7
@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
|
||||
__u16 flags;
|
||||
} smm;
|
||||
|
||||
__u16 pad;
|
||||
|
||||
__u32 flags;
|
||||
__u64 preemption_timer_deadline;
|
||||
};
|
||||
|
@ -185,7 +185,7 @@ struct fsxattr {
|
||||
#define BLKROTATIONAL _IO(0x12,126)
|
||||
#define BLKZEROOUT _IO(0x12,127)
|
||||
/*
|
||||
* A jump here: 130-131 are reserved for zoned block devices
|
||||
* A jump here: 130-136 are reserved for zoned block devices
|
||||
* (see uapi/linux/blkzoned.h)
|
||||
*/
|
||||
|
||||
|
@ -464,7 +464,7 @@ struct perf_event_attr {
|
||||
|
||||
/*
|
||||
* User provided data if sigtrap=1, passed back to user via
|
||||
* siginfo_t::si_perf, e.g. to permit user to identify the event.
|
||||
* siginfo_t::si_perf_data, e.g. to permit user to identify the event.
|
||||
*/
|
||||
__u64 sig_data;
|
||||
};
|
||||
|
@ -108,9 +108,9 @@ displayed as follows:
|
||||
|
||||
perf script --itrace=ibxwpe -F+flags
|
||||
|
||||
The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
|
||||
system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
|
||||
in transaction, respectively.
|
||||
The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional,
|
||||
system, asynchronous, interrupt, transaction abort, trace begin, trace end,
|
||||
in transaction, VM-entry, and VM-exit respectively.
|
||||
|
||||
perf script also supports higher level ways to dump instruction traces:
|
||||
|
||||
|
@ -183,14 +183,15 @@ OPTIONS
|
||||
At this point usage is displayed, and perf-script exits.
|
||||
|
||||
The flags field is synthesized and may have a value when Instruction
|
||||
Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
|
||||
Trace decoding. The flags are "bcrosyiABExgh" which stand for branch,
|
||||
call, return, conditional, system, asynchronous, interrupt,
|
||||
transaction abort, trace begin, trace end, and in transaction,
|
||||
transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit
|
||||
respectively. Known combinations of flags are printed more nicely e.g.
|
||||
"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
|
||||
"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
|
||||
"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
|
||||
"tr end" for "bE". However the "x" flag will be display separately in those
|
||||
"tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch".
|
||||
However the "x" flag will be displayed separately in those
|
||||
cases e.g. "jcc (x)" for a condition branch within a transaction.
|
||||
|
||||
The callindent field is synthesized and may have a value when
|
||||
|
@ -357,7 +357,7 @@
|
||||
440 n64 process_madvise sys_process_madvise
|
||||
441 n64 epoll_pwait2 sys_epoll_pwait2
|
||||
442 n64 mount_setattr sys_mount_setattr
|
||||
443 n64 quotactl_path sys_quotactl_path
|
||||
# 443 reserved for quotactl_path
|
||||
444 n64 landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 n64 landlock_add_rule sys_landlock_add_rule
|
||||
446 n64 landlock_restrict_self sys_landlock_restrict_self
|
||||
|
@ -522,7 +522,7 @@
|
||||
440 common process_madvise sys_process_madvise
|
||||
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
|
||||
442 common mount_setattr sys_mount_setattr
|
||||
443 common quotactl_path sys_quotactl_path
|
||||
# 443 reserved for quotactl_path
|
||||
444 common landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 common landlock_add_rule sys_landlock_add_rule
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
|
@ -445,7 +445,7 @@
|
||||
440 common process_madvise sys_process_madvise sys_process_madvise
|
||||
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
|
||||
442 common mount_setattr sys_mount_setattr sys_mount_setattr
|
||||
443 common quotactl_path sys_quotactl_path sys_quotactl_path
|
||||
# 443 reserved for quotactl_path
|
||||
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
|
||||
|
@ -364,7 +364,7 @@
|
||||
440 common process_madvise sys_process_madvise
|
||||
441 common epoll_pwait2 sys_epoll_pwait2
|
||||
442 common mount_setattr sys_mount_setattr
|
||||
443 common quotactl_path sys_quotactl_path
|
||||
# 443 reserved for quotactl_path
|
||||
444 common landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 common landlock_add_rule sys_landlock_add_rule
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
|
@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
|
||||
if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID))
|
||||
with_hits = true;
|
||||
|
||||
if (zstd_init(&(session->zstd_data), 0) < 0)
|
||||
pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
|
||||
|
||||
/*
|
||||
* in pipe-mode, the only way to get the buildids is to parse
|
||||
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
|
||||
|
@ -572,7 +572,8 @@ static int enable_counters(void)
|
||||
* - we have initial delay configured
|
||||
*/
|
||||
if (!target__none(&target) || stat_config.initial_delay) {
|
||||
evlist__enable(evsel_list);
|
||||
if (!all_counters_use_bpf)
|
||||
evlist__enable(evsel_list);
|
||||
if (stat_config.initial_delay > 0)
|
||||
pr_info(EVLIST_ENABLED_MSG);
|
||||
}
|
||||
@ -581,13 +582,19 @@ static int enable_counters(void)
|
||||
|
||||
static void disable_counters(void)
|
||||
{
|
||||
struct evsel *counter;
|
||||
|
||||
/*
|
||||
* If we don't have tracee (attaching to task or cpu), counters may
|
||||
* still be running. To get accurate group ratios, we must stop groups
|
||||
* from counting before reading their constituent counters.
|
||||
*/
|
||||
if (!target__none(&target))
|
||||
evlist__disable(evsel_list);
|
||||
if (!target__none(&target)) {
|
||||
evlist__for_each_entry(evsel_list, counter)
|
||||
bpf_counter__disable(counter);
|
||||
if (!all_counters_use_bpf)
|
||||
evlist__disable(evsel_list);
|
||||
}
|
||||
}
|
||||
|
||||
static volatile int workload_exec_errno;
|
||||
|
@ -91,6 +91,11 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
# Only change warnings if the python -W option was not used
|
||||
if not sys.warnoptions:
|
||||
import warnings
|
||||
# PySide2 causes deprecation warnings, ignore them.
|
||||
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
||||
import argparse
|
||||
import weakref
|
||||
import threading
|
||||
@ -125,8 +130,9 @@ if pyside_version_1:
|
||||
from PySide.QtGui import *
|
||||
from PySide.QtSql import *
|
||||
|
||||
from decimal import *
|
||||
from ctypes import *
|
||||
from decimal import Decimal, ROUND_HALF_UP
|
||||
from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \
|
||||
c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong
|
||||
from multiprocessing import Process, Array, Value, Event
|
||||
|
||||
# xrange is range in Python3
|
||||
@ -3868,7 +3874,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False):
|
||||
if with_hdr:
|
||||
model = indexes[0].model()
|
||||
for col in range(min_col, max_col + 1):
|
||||
val = model.headerData(col, Qt.Horizontal)
|
||||
val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
|
||||
if as_csv:
|
||||
text += sep + ToCSValue(val)
|
||||
sep = ","
|
||||
|
@ -131,8 +131,8 @@ static int test__pfm_group(void)
|
||||
},
|
||||
{
|
||||
.events = "{},{instructions}",
|
||||
.nr_events = 0,
|
||||
.nr_groups = 0,
|
||||
.nr_events = 1,
|
||||
.nr_groups = 1,
|
||||
},
|
||||
{
|
||||
.events = "{instructions},{instructions}",
|
||||
|
@ -100,7 +100,7 @@ enum {
|
||||
PERF_IP_FLAG_VMEXIT = 1ULL << 12,
|
||||
};
|
||||
|
||||
#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
|
||||
#define PERF_IP_FLAG_CHARS "bcrosyiABExgh"
|
||||
|
||||
#define PERF_BRANCH_MASK (\
|
||||
PERF_IP_FLAG_BRANCH |\
|
||||
|
@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
|
||||
if (affinity__setup(&affinity) < 0)
|
||||
return;
|
||||
|
||||
evlist__for_each_entry(evlist, pos)
|
||||
bpf_counter__disable(pos);
|
||||
|
||||
/* Disable 'immediate' events last */
|
||||
for (imm = 0; imm <= 1; imm++) {
|
||||
evlist__for_each_cpu(evlist, i, cpu) {
|
||||
|
@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
|
||||
decoder->set_fup_tx_flags = false;
|
||||
decoder->tx_flags = decoder->fup_tx_flags;
|
||||
decoder->state.type = INTEL_PT_TRANSACTION;
|
||||
if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
|
||||
decoder->state.type |= INTEL_PT_BRANCH;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.flags = decoder->fup_tx_flags;
|
||||
@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
|
||||
return 0;
|
||||
if (err == -EAGAIN ||
|
||||
intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
|
||||
bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
|
||||
|
||||
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
|
||||
if (intel_pt_fup_event(decoder))
|
||||
if (intel_pt_fup_event(decoder) && no_tip)
|
||||
return 0;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
|
||||
|
||||
*ip += intel_pt_insn->length;
|
||||
|
||||
if (to_ip && *ip == to_ip)
|
||||
if (to_ip && *ip == to_ip) {
|
||||
intel_pt_insn->length = 0;
|
||||
goto out_no_cache;
|
||||
}
|
||||
|
||||
if (*ip >= al.map->end)
|
||||
break;
|
||||
@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
|
||||
|
||||
static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
|
||||
{
|
||||
ptq->insn_len = 0;
|
||||
if (ptq->state->flags & INTEL_PT_ABORT_TX) {
|
||||
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
|
||||
} else if (ptq->state->flags & INTEL_PT_ASYNC) {
|
||||
@ -1211,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
|
||||
ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
|
||||
PERF_IP_FLAG_ASYNC |
|
||||
PERF_IP_FLAG_INTERRUPT;
|
||||
ptq->insn_len = 0;
|
||||
} else {
|
||||
if (ptq->state->from_ip)
|
||||
ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
|
||||
|
@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
|
||||
.symbol = "bpf-output",
|
||||
.alias = "",
|
||||
},
|
||||
[PERF_COUNT_SW_CGROUP_SWITCHES] = {
|
||||
.symbol = "cgroup-switches",
|
||||
.alias = "",
|
||||
},
|
||||
};
|
||||
|
||||
#define __PERF_EVENT_FIELD(config, name) \
|
||||
@ -2928,9 +2932,14 @@ restart:
|
||||
}
|
||||
|
||||
for (i = 0; i < max; i++, syms++) {
|
||||
/*
|
||||
* New attr.config still not supported here, the latest
|
||||
* example was PERF_COUNT_SW_CGROUP_SWITCHES
|
||||
*/
|
||||
if (syms->symbol == NULL)
|
||||
continue;
|
||||
|
||||
if (event_glob != NULL && syms->symbol != NULL &&
|
||||
!(strglobmatch(syms->symbol, event_glob) ||
|
||||
if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
|
||||
(syms->alias && strglobmatch(syms->alias, event_glob))))
|
||||
continue;
|
||||
|
||||
|
@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM
|
||||
dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
|
||||
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
|
||||
bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
|
||||
cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
|
||||
|
||||
/*
|
||||
* We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
|
||||
|
@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
|
||||
}
|
||||
|
||||
/* no event */
|
||||
if (*q == '\0')
|
||||
if (*q == '\0') {
|
||||
if (*sep == '}') {
|
||||
if (grp_evt < 0) {
|
||||
ui__error("cannot close a non-existing event group\n");
|
||||
goto error;
|
||||
}
|
||||
grp_evt--;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
event_attr_init(&attr);
|
||||
@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
|
||||
grp_evt = -1;
|
||||
}
|
||||
}
|
||||
free(p_orig);
|
||||
return 0;
|
||||
error:
|
||||
free(p_orig);
|
||||
|
Loading…
Reference in New Issue
Block a user