The 'perf stat' subcommand supports the request for a summary of the interval counter readings. But the summary lines break the CSV output so it's hard for scripts to parse the result. Before: # perf stat -x, -I1000 --interval-count 1 --summary 1.001323097,8013.48,msec,cpu-clock,8013483384,100.00,8.013,CPUs utilized 1.001323097,270,,context-switches,8013513297,100.00,0.034,K/sec 1.001323097,13,,cpu-migrations,8013530032,100.00,0.002,K/sec 1.001323097,184,,page-faults,8013546992,100.00,0.023,K/sec 1.001323097,20574191,,cycles,8013551506,100.00,0.003,GHz 1.001323097,10562267,,instructions,8013564958,100.00,0.51,insn per cycle 1.001323097,2019244,,branches,8013575673,100.00,0.252,M/sec 1.001323097,106152,,branch-misses,8013585776,100.00,5.26,of all branches 8013.48,msec,cpu-clock,8013483384,100.00,7.984,CPUs utilized 270,,context-switches,8013513297,100.00,0.034,K/sec 13,,cpu-migrations,8013530032,100.00,0.002,K/sec 184,,page-faults,8013546992,100.00,0.023,K/sec 20574191,,cycles,8013551506,100.00,0.003,GHz 10562267,,instructions,8013564958,100.00,0.51,insn per cycle 2019244,,branches,8013575673,100.00,0.252,M/sec 106152,,branch-misses,8013585776,100.00,5.26,of all branches The summary line loses the timestamp column, which breaks the CSV output. We add a column at the original 'timestamp' position and it just says 'summary' for the summary line. After: # perf stat -x, -I1000 --interval-count 1 --summary 1.001196053,8012.72,msec,cpu-clock,8012722903,100.00,8.013,CPUs utilized 1.001196053,218,,context-switches,8012753271,100.00,0.027,K/sec 1.001196053,9,,cpu-migrations,8012769767,100.00,0.001,K/sec 1.001196053,0,,page-faults,8012786257,100.00,0.000,K/sec 1.001196053,15004518,,cycles,8012790637,100.00,0.002,GHz 1.001196053,7954691,,instructions,8012804027,100.00,0.53,insn per cycle 1.001196053,1590259,,branches,8012814766,100.00,0.198,M/sec 1.001196053,82601,,branch-misses,8012824365,100.00,5.19,of all branches summary,8012.72,msec,cpu-clock,8012722903,100.00,7.986,CPUs utilized summary,218,,context-switches,8012753271,100.00,0.027,K/sec summary,9,,cpu-migrations,8012769767,100.00,0.001,K/sec summary,0,,page-faults,8012786257,100.00,0.000,K/sec summary,15004518,,cycles,8012790637,100.00,0.002,GHz summary,7954691,,instructions,8012804027,100.00,0.53,insn per cycle summary,1590259,,branches,8012814766,100.00,0.198,M/sec summary,82601,,branch-misses,8012824365,100.00,5.19,of all branches Now it's easy for script to analyse the summary lines. Of course, we also consider not to break possible existing scripts which can continue to use the broken CSV format by using a new '--no-csv-summary.' option. # perf stat -x, -I1000 --interval-count 1 --summary --no-csv-summary 1.001213261,8012.67,msec,cpu-clock,8012672327,100.00,8.013,CPUs utilized 1.001213261,197,,context-switches,8012703742,100.00,24.586,/sec 1.001213261,9,,cpu-migrations,8012720902,100.00,1.123,/sec 1.001213261,644,,page-faults,8012738266,100.00,80.373,/sec 1.001213261,18350698,,cycles,8012744109,100.00,0.002,GHz 1.001213261,12745021,,instructions,8012759001,100.00,0.69,insn per cycle 1.001213261,2458033,,branches,8012770864,100.00,306.768,K/sec 1.001213261,102107,,branch-misses,8012781751,100.00,4.15,of all branches 8012.67,msec,cpu-clock,8012672327,100.00,7.985,CPUs utilized 197,,context-switches,8012703742,100.00,24.586,/sec 9,,cpu-migrations,8012720902,100.00,1.123,/sec 644,,page-faults,8012738266,100.00,80.373,/sec 18350698,,cycles,8012744109,100.00,0.002,GHz 12745021,,instructions,8012759001,100.00,0.69,insn per cycle 2458033,,branches,8012770864,100.00,306.768,K/sec 102107,,branch-misses,8012781751,100.00,4.15,of all branches This option can be enabled in perf config by setting the variable 'stat.no-csv-summary'. # perf config stat.no-csv-summary=true # perf config -l stat.no-csv-summary=true # perf stat -x, -I1000 --interval-count 1 --summary 1.001330198,8013.28,msec,cpu-clock,8013279201,100.00,8.013,CPUs utilized 1.001330198,205,,context-switches,8013308394,100.00,25.583,/sec 1.001330198,10,,cpu-migrations,8013324681,100.00,1.248,/sec 1.001330198,0,,page-faults,8013340926,100.00,0.000,/sec 1.001330198,8027742,,cycles,8013344503,100.00,0.001,GHz 1.001330198,2871717,,instructions,8013356501,100.00,0.36,insn per cycle 1.001330198,553564,,branches,8013366204,100.00,69.081,K/sec 1.001330198,54021,,branch-misses,8013375952,100.00,9.76,of all branches 8013.28,msec,cpu-clock,8013279201,100.00,7.985,CPUs utilized 205,,context-switches,8013308394,100.00,25.583,/sec 10,,cpu-migrations,8013324681,100.00,1.248,/sec 0,,page-faults,8013340926,100.00,0.000,/sec 8027742,,cycles,8013344503,100.00,0.001,GHz 2871717,,instructions,8013356501,100.00,0.36,insn per cycle 553564,,branches,8013366204,100.00,69.081,K/sec 54021,,branch-misses,8013375952,100.00,9.76,of all branches Signed-off-by: Jin Yao <yao.jin@linux.intel.com> Acked-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jin Yao <yao.jin@intel.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lore.kernel.org/lkml/20210319070156.20394-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
258 lines
6.5 KiB
C
258 lines
6.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __PERF_STATS_H
|
|
#define __PERF_STATS_H
|
|
|
|
#include <linux/types.h>
|
|
#include <stdio.h>
|
|
#include <sys/types.h>
|
|
#include <sys/resource.h>
|
|
#include "cpumap.h"
|
|
#include "rblist.h"
|
|
|
|
struct perf_cpu_map;
|
|
struct perf_stat_config;
|
|
struct timespec;
|
|
|
|
struct stats {
|
|
double n, mean, M2;
|
|
u64 max, min;
|
|
};
|
|
|
|
enum perf_stat_evsel_id {
|
|
PERF_STAT_EVSEL_ID__NONE = 0,
|
|
PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
|
|
PERF_STAT_EVSEL_ID__TRANSACTION_START,
|
|
PERF_STAT_EVSEL_ID__ELISION_START,
|
|
PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_HEAVY_OPS,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_BR_MISPREDICT,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_LAT,
|
|
PERF_STAT_EVSEL_ID__TOPDOWN_MEM_BOUND,
|
|
PERF_STAT_EVSEL_ID__SMI_NUM,
|
|
PERF_STAT_EVSEL_ID__APERF,
|
|
PERF_STAT_EVSEL_ID__MAX,
|
|
};
|
|
|
|
struct perf_stat_evsel {
|
|
struct stats res_stats[3];
|
|
enum perf_stat_evsel_id id;
|
|
u64 *group_data;
|
|
};
|
|
|
|
enum aggr_mode {
|
|
AGGR_NONE,
|
|
AGGR_GLOBAL,
|
|
AGGR_SOCKET,
|
|
AGGR_DIE,
|
|
AGGR_CORE,
|
|
AGGR_THREAD,
|
|
AGGR_UNSET,
|
|
AGGR_NODE,
|
|
};
|
|
|
|
enum {
|
|
CTX_BIT_USER = 1 << 0,
|
|
CTX_BIT_KERNEL = 1 << 1,
|
|
CTX_BIT_HV = 1 << 2,
|
|
CTX_BIT_HOST = 1 << 3,
|
|
CTX_BIT_IDLE = 1 << 4,
|
|
CTX_BIT_MAX = 1 << 5,
|
|
};
|
|
|
|
#define NUM_CTX CTX_BIT_MAX
|
|
|
|
enum stat_type {
|
|
STAT_NONE = 0,
|
|
STAT_NSECS,
|
|
STAT_CYCLES,
|
|
STAT_STALLED_CYCLES_FRONT,
|
|
STAT_STALLED_CYCLES_BACK,
|
|
STAT_BRANCHES,
|
|
STAT_CACHEREFS,
|
|
STAT_L1_DCACHE,
|
|
STAT_L1_ICACHE,
|
|
STAT_LL_CACHE,
|
|
STAT_ITLB_CACHE,
|
|
STAT_DTLB_CACHE,
|
|
STAT_CYCLES_IN_TX,
|
|
STAT_TRANSACTION,
|
|
STAT_ELISION,
|
|
STAT_TOPDOWN_TOTAL_SLOTS,
|
|
STAT_TOPDOWN_SLOTS_ISSUED,
|
|
STAT_TOPDOWN_SLOTS_RETIRED,
|
|
STAT_TOPDOWN_FETCH_BUBBLES,
|
|
STAT_TOPDOWN_RECOVERY_BUBBLES,
|
|
STAT_TOPDOWN_RETIRING,
|
|
STAT_TOPDOWN_BAD_SPEC,
|
|
STAT_TOPDOWN_FE_BOUND,
|
|
STAT_TOPDOWN_BE_BOUND,
|
|
STAT_TOPDOWN_HEAVY_OPS,
|
|
STAT_TOPDOWN_BR_MISPREDICT,
|
|
STAT_TOPDOWN_FETCH_LAT,
|
|
STAT_TOPDOWN_MEM_BOUND,
|
|
STAT_SMI_NUM,
|
|
STAT_APERF,
|
|
STAT_MAX
|
|
};
|
|
|
|
struct runtime_stat {
|
|
struct rblist value_list;
|
|
};
|
|
|
|
typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config,
|
|
struct perf_cpu_map *m, int cpu);
|
|
|
|
struct perf_stat_config {
|
|
enum aggr_mode aggr_mode;
|
|
bool scale;
|
|
bool no_inherit;
|
|
bool identifier;
|
|
bool csv_output;
|
|
bool interval_clear;
|
|
bool metric_only;
|
|
bool null_run;
|
|
bool ru_display;
|
|
bool big_num;
|
|
bool no_merge;
|
|
bool walltime_run_table;
|
|
bool all_kernel;
|
|
bool all_user;
|
|
bool percore_show_thread;
|
|
bool summary;
|
|
bool no_csv_summary;
|
|
bool metric_no_group;
|
|
bool metric_no_merge;
|
|
bool stop_read_counter;
|
|
bool quiet;
|
|
FILE *output;
|
|
unsigned int interval;
|
|
unsigned int timeout;
|
|
int initial_delay;
|
|
unsigned int unit_width;
|
|
unsigned int metric_only_len;
|
|
int times;
|
|
int run_count;
|
|
int print_free_counters_hint;
|
|
int print_mixed_hw_group_error;
|
|
struct runtime_stat *stats;
|
|
int stats_num;
|
|
const char *csv_sep;
|
|
struct stats *walltime_nsecs_stats;
|
|
struct rusage ru_data;
|
|
struct cpu_aggr_map *aggr_map;
|
|
aggr_get_id_t aggr_get_id;
|
|
struct cpu_aggr_map *cpus_aggr_map;
|
|
u64 *walltime_run;
|
|
struct rblist metric_events;
|
|
int ctl_fd;
|
|
int ctl_fd_ack;
|
|
bool ctl_fd_close;
|
|
const char *cgroup_list;
|
|
unsigned int topdown_level;
|
|
};
|
|
|
|
void perf_stat__set_big_num(int set);
|
|
void perf_stat__set_no_csv_summary(int set);
|
|
|
|
void update_stats(struct stats *stats, u64 val);
|
|
double avg_stats(struct stats *stats);
|
|
double stddev_stats(struct stats *stats);
|
|
double rel_stddev_stats(double stddev, double avg);
|
|
|
|
static inline void init_stats(struct stats *stats)
|
|
{
|
|
stats->n = 0.0;
|
|
stats->mean = 0.0;
|
|
stats->M2 = 0.0;
|
|
stats->min = (u64) -1;
|
|
stats->max = 0;
|
|
}
|
|
|
|
struct evsel;
|
|
struct evlist;
|
|
|
|
struct perf_aggr_thread_value {
|
|
struct evsel *counter;
|
|
struct aggr_cpu_id id;
|
|
double uval;
|
|
u64 val;
|
|
u64 run;
|
|
u64 ena;
|
|
};
|
|
|
|
bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
|
|
|
|
#define perf_stat_evsel__is(evsel, id) \
|
|
__perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
|
|
|
|
extern struct runtime_stat rt_stat;
|
|
extern struct stats walltime_nsecs_stats;
|
|
|
|
typedef void (*print_metric_t)(struct perf_stat_config *config,
|
|
void *ctx, const char *color, const char *unit,
|
|
const char *fmt, double val);
|
|
typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
|
|
|
|
void runtime_stat__init(struct runtime_stat *st);
|
|
void runtime_stat__exit(struct runtime_stat *st);
|
|
void perf_stat__init_shadow_stats(void);
|
|
void perf_stat__reset_shadow_stats(void);
|
|
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
|
|
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
|
|
int cpu, struct runtime_stat *st);
|
|
struct perf_stat_output_ctx {
|
|
void *ctx;
|
|
print_metric_t print_metric;
|
|
new_line_t new_line;
|
|
bool force_header;
|
|
};
|
|
|
|
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|
struct evsel *evsel,
|
|
double avg, int cpu,
|
|
struct perf_stat_output_ctx *out,
|
|
struct rblist *metric_events,
|
|
struct runtime_stat *st);
|
|
void perf_stat__collect_metric_expr(struct evlist *);
|
|
|
|
int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
|
|
void evlist__free_stats(struct evlist *evlist);
|
|
void evlist__reset_stats(struct evlist *evlist);
|
|
void evlist__reset_prev_raw_counts(struct evlist *evlist);
|
|
void evlist__copy_prev_raw_counts(struct evlist *evlist);
|
|
void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
|
|
|
|
int perf_stat_process_counter(struct perf_stat_config *config,
|
|
struct evsel *counter);
|
|
struct perf_tool;
|
|
union perf_event;
|
|
struct perf_session;
|
|
struct target;
|
|
|
|
int perf_event__process_stat_event(struct perf_session *session,
|
|
union perf_event *event);
|
|
|
|
size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
|
|
size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
|
|
size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
|
|
|
|
int create_perf_stat_counter(struct evsel *evsel,
|
|
struct perf_stat_config *config,
|
|
struct target *target,
|
|
int cpu);
|
|
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
|
|
struct target *_target, struct timespec *ts, int argc, const char **argv);
|
|
|
|
struct metric_expr;
|
|
double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
|
|
#endif
|