linux/tools/perf/arch/arm64/util/arm-spe.c

227 lines
5.7 KiB
C
Raw Normal View History

perf tools: Add ARM Statistical Profiling Extensions (SPE) support 'perf record' and 'perf report --dump-raw-trace' supported in this release. Example usage: # perf record -e arm_spe/ts_enable=1,pa_enable=1/ dd if=/dev/zero of=/dev/null count=10000 # perf report --dump-raw-trace Note that the perf.data file is portable, so the report can be run on another architecture host if necessary. Output will contain raw SPE data and its textual representation, such as: 0x5c8 [0x30]: PERF_RECORD_AUXTRACE size: 0x200000 offset: 0 ref: 0x1891ad0e idx: 1 tid: 2227 cpu: 1 . . ... ARM SPE data: size 2097152 bytes . 00000000: 49 00 LD . 00000002: b2 c0 3b 29 0f 00 00 ff ff VA 0xffff00000f293bc0 . 0000000b: b3 c0 eb 24 fb 00 00 00 80 PA 0xfb24ebc0 ns=1 . 00000014: 9a 00 00 LAT 0 XLAT . 00000017: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000019: b0 00 c4 15 08 00 00 ff ff PC 0xff00000815c400 el3 ns=1 . 00000022: 98 00 00 LAT 0 TOT . 00000025: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000002e: 49 00 LD . 00000030: b2 80 3c 29 0f 00 00 ff ff VA 0xffff00000f293c80 . 00000039: b3 80 ec 24 fb 00 00 00 80 PA 0xfb24ec80 ns=1 . 00000042: 9a 00 00 LAT 0 XLAT . 00000045: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000047: b0 f4 11 16 08 00 00 ff ff PC 0xff0000081611f4 el3 ns=1 . 00000050: 98 00 00 LAT 0 TOT . 00000053: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000005c: 48 00 INSN-OTHER . 0000005e: 42 02 EV RETIRED . 00000060: b0 2c ef 7f 08 00 00 ff ff PC 0xff0000087fef2c el3 ns=1 . 00000069: 98 00 00 LAT 0 TOT . 0000006c: 71 d1 6f 21 2c 09 00 00 00 TS 39395094481 ... Other release notes: - applies to acme's perf/{core,urgent} branches, likely elsewhere - Report is self-contained within the tool. Record requires enabling the kernel SPE driver by setting CONFIG_ARM_SPE_PMU. - The intel-bts implementation was used as a starting point; its min/default/max buffer sizes and power of 2 pages granularity need to be revisited for ARM SPE - Recording across multiple SPE clusters/domains not supported - Snapshot support (record -S), and conversion to native perf events (e.g., via 'perf inject --itrace'), are also not supported - Technically both cs-etm and spe can be used simultaneously, however disabled for simplicity in this release Signed-off-by: Kim Phillips <kim.phillips@arm.com> Reviewed-by: Dongjiu Geng <gengdongjiu@huawei.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: linux-arm-kernel@lists.infradead.org Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Pawel Moll <pawel.moll@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rob Herring <robh@kernel.org> Cc: Suzuki Poulouse <suzuki.poulose@arm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wang Nan <wangnan0@huawei.com> Cc: Will Deacon <will.deacon@arm.com> Link: http://lkml.kernel.org/r/20180114132850.0b127434b704a26bad13268f@arm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-01-14 19:28:50 +00:00
// SPDX-License-Identifier: GPL-2.0
/*
* Arm Statistical Profiling Extensions (SPE) support
* Copyright (c) 2017-2018, Arm Ltd.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/log2.h>
#include <time.h>
#include "../../util/cpumap.h"
#include "../../util/evsel.h"
#include "../../util/evlist.h"
#include "../../util/session.h"
#include "../../util/util.h"
#include "../../util/pmu.h"
#include "../../util/debug.h"
#include "../../util/auxtrace.h"
#include "../../util/arm-spe.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
struct arm_spe_recording {
struct auxtrace_record itr;
struct perf_pmu *arm_spe_pmu;
struct perf_evlist *evlist;
};
static size_t
arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct perf_evlist *evlist __maybe_unused)
{
return ARM_SPE_AUXTRACE_PRIV_SIZE;
}
static int arm_spe_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct auxtrace_info_event *auxtrace_info,
size_t priv_size)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
return -EINVAL;
if (!session->evlist->nr_mmaps)
return -EINVAL;
auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
return 0;
}
static int arm_spe_recording_options(struct auxtrace_record *itr,
struct perf_evlist *evlist,
struct record_opts *opts)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
struct perf_evsel *evsel, *arm_spe_evsel = NULL;
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
struct perf_evsel *tracking_evsel;
int err;
sper->evlist = evlist;
evlist__for_each_entry(evlist, evsel) {
if (evsel->attr.type == arm_spe_pmu->type) {
if (arm_spe_evsel) {
pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
return -EINVAL;
}
evsel->attr.freq = 0;
evsel->attr.sample_period = 1;
arm_spe_evsel = evsel;
opts->full_auxtrace = true;
}
}
if (!opts->full_auxtrace)
return 0;
/* We are in full trace mode but '-m,xyz' wasn't specified */
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages = KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
}
/* Validate auxtrace_mmap_pages */
if (opts->auxtrace_mmap_pages) {
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
size_t min_sz = KiB(8);
if (sz < min_sz || !is_power_of_2(sz)) {
pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
min_sz / 1024);
return -EINVAL;
}
}
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace event
* must come first.
*/
perf_evlist__to_front(evlist, arm_spe_evsel);
perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
perf_evsel__set_sample_bit(arm_spe_evsel, TID);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
if (err)
return err;
tracking_evsel = perf_evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, tracking_evsel);
tracking_evsel->attr.freq = 0;
tracking_evsel->attr.sample_period = 1;
perf_evsel__set_sample_bit(tracking_evsel, TIME);
perf_evsel__set_sample_bit(tracking_evsel, CPU);
perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
return 0;
}
static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec ^ ts.tv_nsec;
}
static void arm_spe_recording_free(struct auxtrace_record *itr)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
free(sper);
}
static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
{
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
struct perf_evsel *evsel;
evlist__for_each_entry(sper->evlist, evsel) {
if (evsel->attr.type == sper->arm_spe_pmu->type)
return perf_evlist__enable_event_idx(sper->evlist,
evsel, idx);
}
return -EINVAL;
}
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu)
{
struct arm_spe_recording *sper;
if (!arm_spe_pmu) {
*err = -ENODEV;
return NULL;
}
sper = zalloc(sizeof(struct arm_spe_recording));
if (!sper) {
*err = -ENOMEM;
return NULL;
}
sper->arm_spe_pmu = arm_spe_pmu;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;
sper->itr.free = arm_spe_recording_free;
sper->itr.reference = arm_spe_reference;
sper->itr.read_finish = arm_spe_read_finish;
sper->itr.alignment = 0;
*err = 0;
perf tools: Add ARM Statistical Profiling Extensions (SPE) support 'perf record' and 'perf report --dump-raw-trace' supported in this release. Example usage: # perf record -e arm_spe/ts_enable=1,pa_enable=1/ dd if=/dev/zero of=/dev/null count=10000 # perf report --dump-raw-trace Note that the perf.data file is portable, so the report can be run on another architecture host if necessary. Output will contain raw SPE data and its textual representation, such as: 0x5c8 [0x30]: PERF_RECORD_AUXTRACE size: 0x200000 offset: 0 ref: 0x1891ad0e idx: 1 tid: 2227 cpu: 1 . . ... ARM SPE data: size 2097152 bytes . 00000000: 49 00 LD . 00000002: b2 c0 3b 29 0f 00 00 ff ff VA 0xffff00000f293bc0 . 0000000b: b3 c0 eb 24 fb 00 00 00 80 PA 0xfb24ebc0 ns=1 . 00000014: 9a 00 00 LAT 0 XLAT . 00000017: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000019: b0 00 c4 15 08 00 00 ff ff PC 0xff00000815c400 el3 ns=1 . 00000022: 98 00 00 LAT 0 TOT . 00000025: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000002e: 49 00 LD . 00000030: b2 80 3c 29 0f 00 00 ff ff VA 0xffff00000f293c80 . 00000039: b3 80 ec 24 fb 00 00 00 80 PA 0xfb24ec80 ns=1 . 00000042: 9a 00 00 LAT 0 XLAT . 00000045: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000047: b0 f4 11 16 08 00 00 ff ff PC 0xff0000081611f4 el3 ns=1 . 00000050: 98 00 00 LAT 0 TOT . 00000053: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000005c: 48 00 INSN-OTHER . 0000005e: 42 02 EV RETIRED . 00000060: b0 2c ef 7f 08 00 00 ff ff PC 0xff0000087fef2c el3 ns=1 . 00000069: 98 00 00 LAT 0 TOT . 0000006c: 71 d1 6f 21 2c 09 00 00 00 TS 39395094481 ... Other release notes: - applies to acme's perf/{core,urgent} branches, likely elsewhere - Report is self-contained within the tool. Record requires enabling the kernel SPE driver by setting CONFIG_ARM_SPE_PMU. - The intel-bts implementation was used as a starting point; its min/default/max buffer sizes and power of 2 pages granularity need to be revisited for ARM SPE - Recording across multiple SPE clusters/domains not supported - Snapshot support (record -S), and conversion to native perf events (e.g., via 'perf inject --itrace'), are also not supported - Technically both cs-etm and spe can be used simultaneously, however disabled for simplicity in this release Signed-off-by: Kim Phillips <kim.phillips@arm.com> Reviewed-by: Dongjiu Geng <gengdongjiu@huawei.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: linux-arm-kernel@lists.infradead.org Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Pawel Moll <pawel.moll@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rob Herring <robh@kernel.org> Cc: Suzuki Poulouse <suzuki.poulose@arm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wang Nan <wangnan0@huawei.com> Cc: Will Deacon <will.deacon@arm.com> Link: http://lkml.kernel.org/r/20180114132850.0b127434b704a26bad13268f@arm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-01-14 19:28:50 +00:00
return &sper->itr;
}
struct perf_event_attr
*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
{
struct perf_event_attr *attr;
attr = zalloc(sizeof(struct perf_event_attr));
if (!attr) {
pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
return NULL;
}
/*
* If kernel driver doesn't advertise a minimum,
* use max allowable by PMSIDR_EL1.INTERVAL
*/
if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
&attr->sample_period) != 1) {
pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
attr->sample_period = 4096;
}
arm_spe_pmu->selectable = true;
arm_spe_pmu->is_uncore = false;
return attr;
}