a54ca19498
After the commit ffd3d18c20
("perf tools: Add ARM Statistical
Profiling Extensions (SPE) support") has been merged, it supports to
output raw data with option "--dump-raw-trace". However, it misses for
support synthetic events so cannot output any statistical info.
This patch is to improve the "perf report" support for ARM SPE for four
types synthetic events:
First level cache synthetic events, including L1 data cache accessing
and missing events;
Last level cache synthetic events, including last level cache
accessing and missing events;
TLB synthetic events, including TLB accessing and missing events;
Remote access events, which is used to account load/store operations
caused to another socket.
Example usage:
$ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000
$ perf report --stdio
# Samples: 59 of event 'l1d-miss'
# Event count (approx.): 59
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..................................
#
23.73% 23.73% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
20.34% 20.34% dd [kernel.kallsyms] [k] filemap_map_pages
5.08% 5.08% dd [kernel.kallsyms] [k] perf_event_mmap
5.08% 5.08% dd [kernel.kallsyms] [k] unlock_page_memcg
5.08% 5.08% dd [kernel.kallsyms] [k] unmap_page_range
3.39% 3.39% dd [kernel.kallsyms] [k] PageHuge
3.39% 3.39% dd [kernel.kallsyms] [k] release_pages
3.39% 3.39% dd ld-2.28.so [.] 0x0000000000008b5c
1.69% 1.69% dd [kernel.kallsyms] [k] __alloc_fd
[...]
# Samples: 3K of event 'l1d-access'
# Event count (approx.): 3980
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ......................................
#
26.98% 26.98% dd [kernel.kallsyms] [k] ret_to_user
10.53% 10.53% dd [kernel.kallsyms] [k] fsnotify
7.51% 7.51% dd [kernel.kallsyms] [k] new_sync_read
4.57% 4.57% dd [kernel.kallsyms] [k] vfs_read
4.35% 4.35% dd [kernel.kallsyms] [k] vfs_write
3.69% 3.69% dd [kernel.kallsyms] [k] __fget_light
3.69% 3.69% dd [kernel.kallsyms] [k] rw_verify_area
3.44% 3.44% dd [kernel.kallsyms] [k] security_file_permission
2.76% 2.76% dd [kernel.kallsyms] [k] __fsnotify_parent
2.44% 2.44% dd [kernel.kallsyms] [k] ksys_write
2.24% 2.24% dd [kernel.kallsyms] [k] iov_iter_zero
2.19% 2.19% dd [kernel.kallsyms] [k] read_iter_zero
1.81% 1.81% dd dd [.] 0x0000000000002960
1.78% 1.78% dd dd [.] 0x0000000000002980
[...]
# Samples: 35 of event 'llc-miss'
# Event count (approx.): 35
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ...........................
#
34.29% 34.29% dd [kernel.kallsyms] [k] filemap_map_pages
8.57% 8.57% dd [kernel.kallsyms] [k] unlock_page_memcg
8.57% 8.57% dd [kernel.kallsyms] [k] unmap_page_range
5.71% 5.71% dd [kernel.kallsyms] [k] PageHuge
5.71% 5.71% dd [kernel.kallsyms] [k] release_pages
5.71% 5.71% dd ld-2.28.so [.] 0x0000000000008b5c
2.86% 2.86% dd [kernel.kallsyms] [k] __queue_work
2.86% 2.86% dd [kernel.kallsyms] [k] __radix_tree_lookup
2.86% 2.86% dd [kernel.kallsyms] [k] copy_page
[...]
# Samples: 2 of event 'llc-access'
# Event count (approx.): 2
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. .............
#
50.00% 50.00% dd [kernel.kallsyms] [k] copy_page
50.00% 50.00% dd libc-2.28.so [.] _dl_addr
# Samples: 48 of event 'tlb-miss'
# Event count (approx.): 48
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ..................................
#
20.83% 20.83% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
12.50% 12.50% dd [kernel.kallsyms] [k] __arch_clear_user
10.42% 10.42% dd [kernel.kallsyms] [k] clear_page
4.17% 4.17% dd [kernel.kallsyms] [k] copy_page
4.17% 4.17% dd [kernel.kallsyms] [k] filemap_map_pages
2.08% 2.08% dd [kernel.kallsyms] [k] __alloc_fd
2.08% 2.08% dd [kernel.kallsyms] [k] __mod_memcg_state.part.70
2.08% 2.08% dd [kernel.kallsyms] [k] __queue_work
2.08% 2.08% dd [kernel.kallsyms] [k] __rcu_read_unlock
2.08% 2.08% dd [kernel.kallsyms] [k] d_path
2.08% 2.08% dd [kernel.kallsyms] [k] destroy_inode
2.08% 2.08% dd [kernel.kallsyms] [k] do_dentry_open
[...]
# Samples: 9K of event 'tlb-access'
# Event count (approx.): 9573
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ......................................
#
25.79% 25.79% dd [kernel.kallsyms] [k] __arch_clear_user
11.22% 11.22% dd [kernel.kallsyms] [k] ret_to_user
8.56% 8.56% dd [kernel.kallsyms] [k] fsnotify
4.06% 4.06% dd [kernel.kallsyms] [k] new_sync_read
3.67% 3.67% dd [kernel.kallsyms] [k] el0_svc_common.constprop.2
3.04% 3.04% dd [kernel.kallsyms] [k] __fsnotify_parent
2.90% 2.90% dd [kernel.kallsyms] [k] vfs_write
2.82% 2.82% dd [kernel.kallsyms] [k] vfs_read
2.52% 2.52% dd libc-2.28.so [.] write
2.26% 2.26% dd [kernel.kallsyms] [k] security_file_permission
2.08% 2.08% dd [kernel.kallsyms] [k] ksys_write
1.96% 1.96% dd [kernel.kallsyms] [k] rw_verify_area
1.95% 1.95% dd [kernel.kallsyms] [k] read_iter_zero
[...]
# Samples: 9 of event 'branch-miss'
# Event count (approx.): 9
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. .........................
#
22.22% 22.22% dd libc-2.28.so [.] _dl_addr
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_clear_user
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_copy_from_user
11.11% 11.11% dd [kernel.kallsyms] [k] __dentry_kill
11.11% 11.11% dd [kernel.kallsyms] [k] __efistub_memcpy
11.11% 11.11% dd ld-2.28.so [.] 0x0000000000012b7c
11.11% 11.11% dd libc-2.28.so [.] 0x000000000002a980
11.11% 11.11% dd libc-2.28.so [.] 0x0000000000083340
# Samples: 29 of event 'remote-access'
# Event count (approx.): 29
#
# Children Self Command Shared Object Symbol
# ........ ........ ....... ................. ...........................
#
41.38% 41.38% dd [kernel.kallsyms] [k] filemap_map_pages
10.34% 10.34% dd [kernel.kallsyms] [k] unlock_page_memcg
10.34% 10.34% dd [kernel.kallsyms] [k] unmap_page_range
6.90% 6.90% dd [kernel.kallsyms] [k] release_pages
3.45% 3.45% dd [kernel.kallsyms] [k] PageHuge
3.45% 3.45% dd [kernel.kallsyms] [k] __queue_work
3.45% 3.45% dd [kernel.kallsyms] [k] page_add_file_rmap
3.45% 3.45% dd [kernel.kallsyms] [k] page_counter_try_charge
3.45% 3.45% dd [kernel.kallsyms] [k] page_remove_rmap
3.45% 3.45% dd [kernel.kallsyms] [k] xas_start
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000002a1c
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000008b5c
3.45% 3.45% dd ld-2.28.so [.] 0x00000000000093cc
Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.org
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
220 lines
4.6 KiB
C
220 lines
4.6 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* arm_spe_decoder.c: ARM SPE support
|
|
*/
|
|
|
|
#ifndef _GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
#include <errno.h>
|
|
#include <inttypes.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/zalloc.h>
|
|
|
|
#include "../auxtrace.h"
|
|
#include "../debug.h"
|
|
#include "../util.h"
|
|
|
|
#include "arm-spe-decoder.h"
|
|
|
|
#ifndef BIT
|
|
#define BIT(n) (1UL << (n))
|
|
#endif
|
|
|
|
static u64 arm_spe_calc_ip(int index, u64 payload)
|
|
{
|
|
u8 *addr = (u8 *)&payload;
|
|
int ns, el;
|
|
|
|
/* Instruction virtual address or Branch target address */
|
|
if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
|
|
index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
|
|
ns = addr[7] & SPE_ADDR_PKT_NS;
|
|
el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
|
|
|
|
/* Fill highest byte for EL1 or EL2 (VHE) mode */
|
|
if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
|
|
addr[7] = 0xff;
|
|
/* Clean highest byte for other cases */
|
|
else
|
|
addr[7] = 0x0;
|
|
|
|
/* Data access virtual address */
|
|
} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
|
|
|
|
/* Fill highest byte if bits [48..55] is 0xff */
|
|
if (addr[6] == 0xff)
|
|
addr[7] = 0xff;
|
|
/* Otherwise, cleanup tags */
|
|
else
|
|
addr[7] = 0x0;
|
|
|
|
/* Data access physical address */
|
|
} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
|
|
/* Cleanup byte 7 */
|
|
addr[7] = 0x0;
|
|
} else {
|
|
pr_err("unsupported address packet index: 0x%x\n", index);
|
|
}
|
|
|
|
return payload;
|
|
}
|
|
|
|
struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
|
|
{
|
|
struct arm_spe_decoder *decoder;
|
|
|
|
if (!params->get_trace)
|
|
return NULL;
|
|
|
|
decoder = zalloc(sizeof(struct arm_spe_decoder));
|
|
if (!decoder)
|
|
return NULL;
|
|
|
|
decoder->get_trace = params->get_trace;
|
|
decoder->data = params->data;
|
|
|
|
return decoder;
|
|
}
|
|
|
|
void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
|
|
{
|
|
free(decoder);
|
|
}
|
|
|
|
static int arm_spe_get_data(struct arm_spe_decoder *decoder)
|
|
{
|
|
struct arm_spe_buffer buffer = { .buf = 0, };
|
|
int ret;
|
|
|
|
pr_debug("Getting more data\n");
|
|
ret = decoder->get_trace(&buffer, decoder->data);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
decoder->buf = buffer.buf;
|
|
decoder->len = buffer.len;
|
|
|
|
if (!decoder->len)
|
|
pr_debug("No more data\n");
|
|
|
|
return decoder->len;
|
|
}
|
|
|
|
static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
|
|
{
|
|
int ret;
|
|
|
|
do {
|
|
if (!decoder->len) {
|
|
ret = arm_spe_get_data(decoder);
|
|
|
|
/* Failed to read out trace data */
|
|
if (ret <= 0)
|
|
return ret;
|
|
}
|
|
|
|
ret = arm_spe_get_packet(decoder->buf, decoder->len,
|
|
&decoder->packet);
|
|
if (ret <= 0) {
|
|
/* Move forward for 1 byte */
|
|
decoder->buf += 1;
|
|
decoder->len -= 1;
|
|
return -EBADMSG;
|
|
}
|
|
|
|
decoder->buf += ret;
|
|
decoder->len -= ret;
|
|
} while (decoder->packet.type == ARM_SPE_PAD);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int arm_spe_read_record(struct arm_spe_decoder *decoder)
|
|
{
|
|
int err;
|
|
int idx;
|
|
u64 payload, ip;
|
|
|
|
memset(&decoder->record, 0x0, sizeof(decoder->record));
|
|
|
|
while (1) {
|
|
err = arm_spe_get_next_packet(decoder);
|
|
if (err <= 0)
|
|
return err;
|
|
|
|
idx = decoder->packet.index;
|
|
payload = decoder->packet.payload;
|
|
|
|
switch (decoder->packet.type) {
|
|
case ARM_SPE_TIMESTAMP:
|
|
decoder->record.timestamp = payload;
|
|
return 1;
|
|
case ARM_SPE_END:
|
|
return 1;
|
|
case ARM_SPE_ADDRESS:
|
|
ip = arm_spe_calc_ip(idx, payload);
|
|
if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
|
|
decoder->record.from_ip = ip;
|
|
else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
|
|
decoder->record.to_ip = ip;
|
|
break;
|
|
case ARM_SPE_COUNTER:
|
|
break;
|
|
case ARM_SPE_CONTEXT:
|
|
break;
|
|
case ARM_SPE_OP_TYPE:
|
|
break;
|
|
case ARM_SPE_EVENTS:
|
|
if (payload & BIT(EV_L1D_REFILL))
|
|
decoder->record.type |= ARM_SPE_L1D_MISS;
|
|
|
|
if (payload & BIT(EV_L1D_ACCESS))
|
|
decoder->record.type |= ARM_SPE_L1D_ACCESS;
|
|
|
|
if (payload & BIT(EV_TLB_WALK))
|
|
decoder->record.type |= ARM_SPE_TLB_MISS;
|
|
|
|
if (payload & BIT(EV_TLB_ACCESS))
|
|
decoder->record.type |= ARM_SPE_TLB_ACCESS;
|
|
|
|
if ((idx == 1 || idx == 2 || idx == 3) &&
|
|
(payload & BIT(EV_LLC_MISS)))
|
|
decoder->record.type |= ARM_SPE_LLC_MISS;
|
|
|
|
if ((idx == 1 || idx == 2 || idx == 3) &&
|
|
(payload & BIT(EV_LLC_ACCESS)))
|
|
decoder->record.type |= ARM_SPE_LLC_ACCESS;
|
|
|
|
if ((idx == 1 || idx == 2 || idx == 3) &&
|
|
(payload & BIT(EV_REMOTE_ACCESS)))
|
|
decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
|
|
|
|
if (payload & BIT(EV_MISPRED))
|
|
decoder->record.type |= ARM_SPE_BRANCH_MISS;
|
|
|
|
break;
|
|
case ARM_SPE_DATA_SOURCE:
|
|
break;
|
|
case ARM_SPE_BAD:
|
|
break;
|
|
case ARM_SPE_PAD:
|
|
break;
|
|
default:
|
|
pr_err("Get packet error!\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int arm_spe_decode(struct arm_spe_decoder *decoder)
|
|
{
|
|
return arm_spe_read_record(decoder);
|
|
}
|