mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 21:51:40 +00:00
perf cs-etm: Handle PERF_RECORD_AUX_OUTPUT_HW_ID packet
When using dynamically assigned CoreSight trace IDs the drivers can output the ID / CPU association as a PERF_RECORD_AUX_OUTPUT_HW_ID packet. Update cs-etm decoder to handle this packet by setting the CPU/Trace ID mapping. Reviewed-by: James Clark <james.clark@arm.com> Signed-off-by: Mike Leach <mike.leach@linaro.org> Acked-by: Suzuki Poulouse <suzuki.poulose@arm.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Darren Hart <darren@os.amperecomputing.com> Cc: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230331055645.26918-2-mike.leach@linaro.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
e5fa5b4110
commit
b6521ea2a0
@ -31,6 +31,9 @@
|
||||
*/
|
||||
#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
|
||||
|
||||
/* Value to set for unused trace ID values */
|
||||
#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
|
||||
|
||||
/*
|
||||
* Below are the definition of bit offsets for perf option, and works as
|
||||
* arbitrary values for all ETM versions.
|
||||
@ -55,4 +58,16 @@
|
||||
#define ETM4_CFG_BIT_RETSTK 12
|
||||
#define ETM4_CFG_BIT_VMID_OPT 15
|
||||
|
||||
/*
|
||||
* Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
|
||||
* Used to associate a CPU with the CoreSight Trace ID.
|
||||
* [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
|
||||
* [59:08] - Unused (SBZ)
|
||||
* [63:60] - Version
|
||||
*/
|
||||
#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
|
||||
#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
|
||||
|
||||
#define CS_AUX_HW_ID_CURR_VERSION 0
|
||||
|
||||
#endif
|
||||
|
@ -668,6 +668,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
|
||||
switch (t_params->protocol) {
|
||||
case CS_ETM_PROTO_ETMV3:
|
||||
case CS_ETM_PROTO_PTM:
|
||||
csid = (t_params->etmv3.reg_idr & CORESIGHT_TRACE_ID_VAL_MASK);
|
||||
cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
|
||||
decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
|
||||
OCSD_BUILTIN_DCD_ETMV3 :
|
||||
@ -675,11 +676,13 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
|
||||
trace_config = &config_etmv3;
|
||||
break;
|
||||
case CS_ETM_PROTO_ETMV4i:
|
||||
csid = (t_params->etmv4.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK);
|
||||
cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
|
||||
decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
|
||||
trace_config = &trace_config_etmv4;
|
||||
break;
|
||||
case CS_ETM_PROTO_ETE:
|
||||
csid = (t_params->ete.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK);
|
||||
cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete);
|
||||
decoder->decoder_name = OCSD_BUILTIN_DCD_ETE;
|
||||
trace_config = &trace_config_ete;
|
||||
@ -688,6 +691,10 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* if the CPU has no trace ID associated, no decoder needed */
|
||||
if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL)
|
||||
return 0;
|
||||
|
||||
if (d_params->operation == CS_ETM_OPERATION_DECODE) {
|
||||
if (ocsd_dt_create_decoder(decoder->dcd_tree,
|
||||
decoder->decoder_name,
|
||||
|
@ -220,6 +220,143 @@ static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
|
||||
{
|
||||
u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
|
||||
|
||||
switch (cs_etm_magic) {
|
||||
case __perf_cs_etmv3_magic:
|
||||
*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
|
||||
CORESIGHT_TRACE_ID_VAL_MASK);
|
||||
break;
|
||||
case __perf_cs_etmv4_magic:
|
||||
case __perf_cs_ete_magic:
|
||||
*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
|
||||
CORESIGHT_TRACE_ID_VAL_MASK);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* update metadata trace ID from the value found in the AUX_HW_INFO packet.
|
||||
* This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
|
||||
*/
|
||||
static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
|
||||
{
|
||||
u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
|
||||
|
||||
switch (cs_etm_magic) {
|
||||
case __perf_cs_etmv3_magic:
|
||||
cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
|
||||
break;
|
||||
case __perf_cs_etmv4_magic:
|
||||
case __perf_cs_ete_magic:
|
||||
cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* FIELD_GET (linux/bitfield.h) not available outside kernel code,
|
||||
* and the header contains too many dependencies to just copy over,
|
||||
* so roll our own based on the original
|
||||
*/
|
||||
#define __bf_shf(x) (__builtin_ffsll(x) - 1)
|
||||
#define FIELD_GET(_mask, _reg) \
|
||||
({ \
|
||||
(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
|
||||
})
|
||||
|
||||
/*
|
||||
* Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
|
||||
*
|
||||
* The payload associates the Trace ID and the CPU.
|
||||
* The routine is tolerant of seeing multiple packets with the same association,
|
||||
* but a CPU / Trace ID association changing during a session is an error.
|
||||
*/
|
||||
static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
|
||||
union perf_event *event)
|
||||
{
|
||||
struct cs_etm_auxtrace *etm;
|
||||
struct perf_sample sample;
|
||||
struct int_node *inode;
|
||||
struct evsel *evsel;
|
||||
u64 *cpu_data;
|
||||
u64 hw_id;
|
||||
int cpu, version, err;
|
||||
u8 trace_chan_id, curr_chan_id;
|
||||
|
||||
/* extract and parse the HW ID */
|
||||
hw_id = event->aux_output_hw_id.hw_id;
|
||||
version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
|
||||
trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
|
||||
|
||||
/* check that we can handle this version */
|
||||
if (version > CS_AUX_HW_ID_CURR_VERSION)
|
||||
return -EINVAL;
|
||||
|
||||
/* get access to the etm metadata */
|
||||
etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
|
||||
if (!etm || !etm->metadata)
|
||||
return -EINVAL;
|
||||
|
||||
/* parse the sample to get the CPU */
|
||||
evsel = evlist__event2evsel(session->evlist, event);
|
||||
if (!evsel)
|
||||
return -EINVAL;
|
||||
err = evsel__parse_sample(evsel, event, &sample);
|
||||
if (err)
|
||||
return err;
|
||||
cpu = sample.cpu;
|
||||
if (cpu == -1) {
|
||||
/* no CPU in the sample - possibly recorded with an old version of perf */
|
||||
pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* See if the ID is mapped to a CPU, and it matches the current CPU */
|
||||
inode = intlist__find(traceid_list, trace_chan_id);
|
||||
if (inode) {
|
||||
cpu_data = inode->priv;
|
||||
if ((int)cpu_data[CS_ETM_CPU] != cpu) {
|
||||
pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* check that the mapped ID matches */
|
||||
err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
|
||||
if (err)
|
||||
return err;
|
||||
if (curr_chan_id != trace_chan_id) {
|
||||
pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* mapped and matched - return OK */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* not one we've seen before - lets map it */
|
||||
cpu_data = etm->metadata[cpu];
|
||||
err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* if we are picking up the association from the packet, need to plug
|
||||
* the correct trace ID into the metadata for setting up decoders later.
|
||||
*/
|
||||
err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
|
||||
return err;
|
||||
}
|
||||
|
||||
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
|
||||
u8 trace_chan_id)
|
||||
{
|
||||
@ -2668,11 +2805,16 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
|
||||
}
|
||||
|
||||
/*
|
||||
* In per-thread mode, CPU is set to -1, but TID will be set instead. See
|
||||
* auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match.
|
||||
* In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
|
||||
* auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
|
||||
* CPU as we set this always for the AUX_OUTPUT_HW_ID event.
|
||||
* So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
|
||||
* Return 'not found' if mismatch.
|
||||
*/
|
||||
if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) ||
|
||||
auxtrace_event->cpu != sample->cpu)
|
||||
if (auxtrace_event->cpu == (__u32) -1) {
|
||||
if (auxtrace_event->tid != sample->tid)
|
||||
return 1;
|
||||
} else if (auxtrace_event->cpu != sample->cpu)
|
||||
return 1;
|
||||
|
||||
if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
|
||||
@ -2721,6 +2863,17 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
|
||||
u64 offset __maybe_unused, void *data __maybe_unused)
|
||||
{
|
||||
/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
|
||||
if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
|
||||
(*(int *)data)++; /* increment found count */
|
||||
return cs_etm__process_aux_output_hw_id(session, event);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
|
||||
u64 offset __maybe_unused, void *data __maybe_unused)
|
||||
{
|
||||
@ -2839,13 +2992,13 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
|
||||
cs_etm_magic = metadata[i][CS_ETM_MAGIC];
|
||||
switch (cs_etm_magic) {
|
||||
case __perf_cs_etmv3_magic:
|
||||
trace_chan_id = (u8)((metadata[i][CS_ETM_ETMTRACEIDR]) &
|
||||
CORESIGHT_TRACE_ID_VAL_MASK);
|
||||
metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
|
||||
trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
|
||||
break;
|
||||
case __perf_cs_etmv4_magic:
|
||||
case __perf_cs_ete_magic:
|
||||
trace_chan_id = (u8)((metadata[i][CS_ETMV4_TRCTRACEIDR]) &
|
||||
CORESIGHT_TRACE_ID_VAL_MASK);
|
||||
metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
|
||||
trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
|
||||
break;
|
||||
default:
|
||||
/* unknown magic number */
|
||||
@ -2858,6 +3011,35 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we found AUX_HW_ID packets, then set any metadata marked as unused to the
|
||||
* unused value to reduce the number of unneeded decoders created.
|
||||
*/
|
||||
static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
|
||||
{
|
||||
u64 cs_etm_magic;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_cpu; i++) {
|
||||
cs_etm_magic = metadata[i][CS_ETM_MAGIC];
|
||||
switch (cs_etm_magic) {
|
||||
case __perf_cs_etmv3_magic:
|
||||
if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
|
||||
metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
|
||||
break;
|
||||
case __perf_cs_etmv4_magic:
|
||||
case __perf_cs_ete_magic:
|
||||
if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
|
||||
metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
|
||||
break;
|
||||
default:
|
||||
/* unknown magic number */
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cs_etm__process_auxtrace_info_full(union perf_event *event,
|
||||
struct perf_session *session)
|
||||
{
|
||||
@ -2869,6 +3051,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
|
||||
int priv_size = 0;
|
||||
int num_cpu;
|
||||
int err = 0;
|
||||
int aux_hw_id_found;
|
||||
int i, j;
|
||||
u64 *ptr = NULL;
|
||||
u64 **metadata = NULL;
|
||||
@ -3017,8 +3200,43 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
|
||||
if (err)
|
||||
goto err_delete_thread;
|
||||
|
||||
/* before aux records are queued, need to map metadata to trace IDs */
|
||||
/*
|
||||
* Map Trace ID values to CPU metadata.
|
||||
*
|
||||
* Trace metadata will always contain Trace ID values from the legacy algorithm. If the
|
||||
* files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
|
||||
* ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
|
||||
*
|
||||
* The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
|
||||
* the same IDs as the old algorithm as far as is possible, unless there are clashes
|
||||
* in which case a different value will be used. This means an older perf may still
|
||||
* be able to record and read files generate on a newer system.
|
||||
*
|
||||
* For a perf able to interpret AUX_HW_ID packets we first check for the presence of
|
||||
* those packets. If they are there then the values will be mapped and plugged into
|
||||
* the metadata. We then set any remaining metadata values with the used flag to a
|
||||
* value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
|
||||
*
|
||||
* If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
|
||||
* then we map Trace ID values to CPU directly from the metadata - clearing any unused
|
||||
* flags if present.
|
||||
*/
|
||||
|
||||
/* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
|
||||
aux_hw_id_found = 0;
|
||||
err = perf_session__peek_events(session, session->header.data_offset,
|
||||
session->header.data_size,
|
||||
cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
|
||||
if (err)
|
||||
goto err_delete_thread;
|
||||
|
||||
/* if HW ID found then clear any unused metadata ID values */
|
||||
if (aux_hw_id_found)
|
||||
err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
|
||||
/* otherwise, this is a file with metadata values only, map from metadata */
|
||||
else
|
||||
err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
|
||||
|
||||
if (err)
|
||||
goto err_delete_thread;
|
||||
|
||||
@ -3027,14 +3245,6 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
|
||||
goto err_delete_thread;
|
||||
|
||||
etm->data_queued = etm->queues.populated;
|
||||
/*
|
||||
* Print warning in pipe mode, see cs_etm__process_auxtrace_event() and
|
||||
* cs_etm__queue_aux_fragment() for details relating to limitations.
|
||||
*/
|
||||
if (!etm->data_queued)
|
||||
pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n"
|
||||
"Continuing with best effort decoding in piped mode.\n\n");
|
||||
|
||||
return 0;
|
||||
|
||||
err_delete_thread:
|
||||
|
Loading…
Reference in New Issue
Block a user