linux/tools/perf/util/stat.h
Jin Yao 0bdad97801 perf stat: Align CSV output for summary mode
The 'perf stat' subcommand supports the request for a summary of the
interval counter readings.  But the summary lines break the CSV output
so it's hard for scripts to parse the result.

Before:

  # perf stat -x, -I1000 --interval-count 1 --summary
       1.001323097,8013.48,msec,cpu-clock,8013483384,100.00,8.013,CPUs utilized
       1.001323097,270,,context-switches,8013513297,100.00,0.034,K/sec
       1.001323097,13,,cpu-migrations,8013530032,100.00,0.002,K/sec
       1.001323097,184,,page-faults,8013546992,100.00,0.023,K/sec
       1.001323097,20574191,,cycles,8013551506,100.00,0.003,GHz
       1.001323097,10562267,,instructions,8013564958,100.00,0.51,insn per cycle
       1.001323097,2019244,,branches,8013575673,100.00,0.252,M/sec
       1.001323097,106152,,branch-misses,8013585776,100.00,5.26,of all branches
  8013.48,msec,cpu-clock,8013483384,100.00,7.984,CPUs utilized
  270,,context-switches,8013513297,100.00,0.034,K/sec
  13,,cpu-migrations,8013530032,100.00,0.002,K/sec
  184,,page-faults,8013546992,100.00,0.023,K/sec
  20574191,,cycles,8013551506,100.00,0.003,GHz
  10562267,,instructions,8013564958,100.00,0.51,insn per cycle
  2019244,,branches,8013575673,100.00,0.252,M/sec
  106152,,branch-misses,8013585776,100.00,5.26,of all branches

The summary line loses the timestamp column, which breaks the CSV
output.

We add a column at the original 'timestamp' position and it just says
'summary' for the summary line.

After:

  # perf stat -x, -I1000 --interval-count 1 --summary
       1.001196053,8012.72,msec,cpu-clock,8012722903,100.00,8.013,CPUs utilized
       1.001196053,218,,context-switches,8012753271,100.00,0.027,K/sec
       1.001196053,9,,cpu-migrations,8012769767,100.00,0.001,K/sec
       1.001196053,0,,page-faults,8012786257,100.00,0.000,K/sec
       1.001196053,15004518,,cycles,8012790637,100.00,0.002,GHz
       1.001196053,7954691,,instructions,8012804027,100.00,0.53,insn per cycle
       1.001196053,1590259,,branches,8012814766,100.00,0.198,M/sec
       1.001196053,82601,,branch-misses,8012824365,100.00,5.19,of all branches
           summary,8012.72,msec,cpu-clock,8012722903,100.00,7.986,CPUs utilized
           summary,218,,context-switches,8012753271,100.00,0.027,K/sec
           summary,9,,cpu-migrations,8012769767,100.00,0.001,K/sec
           summary,0,,page-faults,8012786257,100.00,0.000,K/sec
           summary,15004518,,cycles,8012790637,100.00,0.002,GHz
           summary,7954691,,instructions,8012804027,100.00,0.53,insn per cycle
           summary,1590259,,branches,8012814766,100.00,0.198,M/sec
           summary,82601,,branch-misses,8012824365,100.00,5.19,of all branches

Now it's easy for script to analyse the summary lines.

Of course, we also consider not to break possible existing scripts which
can continue to use the broken CSV format by using a new '--no-csv-summary.'
option.

  # perf stat -x, -I1000 --interval-count 1 --summary --no-csv-summary
       1.001213261,8012.67,msec,cpu-clock,8012672327,100.00,8.013,CPUs utilized
       1.001213261,197,,context-switches,8012703742,100.00,24.586,/sec
       1.001213261,9,,cpu-migrations,8012720902,100.00,1.123,/sec
       1.001213261,644,,page-faults,8012738266,100.00,80.373,/sec
       1.001213261,18350698,,cycles,8012744109,100.00,0.002,GHz
       1.001213261,12745021,,instructions,8012759001,100.00,0.69,insn per cycle
       1.001213261,2458033,,branches,8012770864,100.00,306.768,K/sec
       1.001213261,102107,,branch-misses,8012781751,100.00,4.15,of all branches
  8012.67,msec,cpu-clock,8012672327,100.00,7.985,CPUs utilized
  197,,context-switches,8012703742,100.00,24.586,/sec
  9,,cpu-migrations,8012720902,100.00,1.123,/sec
  644,,page-faults,8012738266,100.00,80.373,/sec
  18350698,,cycles,8012744109,100.00,0.002,GHz
  12745021,,instructions,8012759001,100.00,0.69,insn per cycle
  2458033,,branches,8012770864,100.00,306.768,K/sec
  102107,,branch-misses,8012781751,100.00,4.15,of all branches

This option can be enabled in perf config by setting the variable
'stat.no-csv-summary'.

  # perf config stat.no-csv-summary=true

  # perf config -l
  stat.no-csv-summary=true

  # perf stat -x, -I1000 --interval-count 1 --summary
       1.001330198,8013.28,msec,cpu-clock,8013279201,100.00,8.013,CPUs utilized
       1.001330198,205,,context-switches,8013308394,100.00,25.583,/sec
       1.001330198,10,,cpu-migrations,8013324681,100.00,1.248,/sec
       1.001330198,0,,page-faults,8013340926,100.00,0.000,/sec
       1.001330198,8027742,,cycles,8013344503,100.00,0.001,GHz
       1.001330198,2871717,,instructions,8013356501,100.00,0.36,insn per cycle
       1.001330198,553564,,branches,8013366204,100.00,69.081,K/sec
       1.001330198,54021,,branch-misses,8013375952,100.00,9.76,of all branches
  8013.28,msec,cpu-clock,8013279201,100.00,7.985,CPUs utilized
  205,,context-switches,8013308394,100.00,25.583,/sec
  10,,cpu-migrations,8013324681,100.00,1.248,/sec
  0,,page-faults,8013340926,100.00,0.000,/sec
  8027742,,cycles,8013344503,100.00,0.001,GHz
  2871717,,instructions,8013356501,100.00,0.36,insn per cycle
  553564,,branches,8013366204,100.00,69.081,K/sec
  54021,,branch-misses,8013375952,100.00,9.76,of all branches

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jin Yao <yao.jin@intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210319070156.20394-1-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-03-24 10:21:49 -03:00

258 lines
6.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PERF_STATS_H
#define __PERF_STATS_H
#include <linux/types.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/resource.h>
#include "cpumap.h"
#include "rblist.h"
struct perf_cpu_map;
struct perf_stat_config;
struct timespec;
struct stats {
double n, mean, M2;
u64 max, min;
};
enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__NONE = 0,
PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
PERF_STAT_EVSEL_ID__TRANSACTION_START,
PERF_STAT_EVSEL_ID__ELISION_START,
PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING,
PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC,
PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND,
PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND,
PERF_STAT_EVSEL_ID__TOPDOWN_HEAVY_OPS,
PERF_STAT_EVSEL_ID__TOPDOWN_BR_MISPREDICT,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_LAT,
PERF_STAT_EVSEL_ID__TOPDOWN_MEM_BOUND,
PERF_STAT_EVSEL_ID__SMI_NUM,
PERF_STAT_EVSEL_ID__APERF,
PERF_STAT_EVSEL_ID__MAX,
};
struct perf_stat_evsel {
struct stats res_stats[3];
enum perf_stat_evsel_id id;
u64 *group_data;
};
enum aggr_mode {
AGGR_NONE,
AGGR_GLOBAL,
AGGR_SOCKET,
AGGR_DIE,
AGGR_CORE,
AGGR_THREAD,
AGGR_UNSET,
AGGR_NODE,
};
enum {
CTX_BIT_USER = 1 << 0,
CTX_BIT_KERNEL = 1 << 1,
CTX_BIT_HV = 1 << 2,
CTX_BIT_HOST = 1 << 3,
CTX_BIT_IDLE = 1 << 4,
CTX_BIT_MAX = 1 << 5,
};
#define NUM_CTX CTX_BIT_MAX
enum stat_type {
STAT_NONE = 0,
STAT_NSECS,
STAT_CYCLES,
STAT_STALLED_CYCLES_FRONT,
STAT_STALLED_CYCLES_BACK,
STAT_BRANCHES,
STAT_CACHEREFS,
STAT_L1_DCACHE,
STAT_L1_ICACHE,
STAT_LL_CACHE,
STAT_ITLB_CACHE,
STAT_DTLB_CACHE,
STAT_CYCLES_IN_TX,
STAT_TRANSACTION,
STAT_ELISION,
STAT_TOPDOWN_TOTAL_SLOTS,
STAT_TOPDOWN_SLOTS_ISSUED,
STAT_TOPDOWN_SLOTS_RETIRED,
STAT_TOPDOWN_FETCH_BUBBLES,
STAT_TOPDOWN_RECOVERY_BUBBLES,
STAT_TOPDOWN_RETIRING,
STAT_TOPDOWN_BAD_SPEC,
STAT_TOPDOWN_FE_BOUND,
STAT_TOPDOWN_BE_BOUND,
STAT_TOPDOWN_HEAVY_OPS,
STAT_TOPDOWN_BR_MISPREDICT,
STAT_TOPDOWN_FETCH_LAT,
STAT_TOPDOWN_MEM_BOUND,
STAT_SMI_NUM,
STAT_APERF,
STAT_MAX
};
struct runtime_stat {
struct rblist value_list;
};
typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config,
struct perf_cpu_map *m, int cpu);
struct perf_stat_config {
enum aggr_mode aggr_mode;
bool scale;
bool no_inherit;
bool identifier;
bool csv_output;
bool interval_clear;
bool metric_only;
bool null_run;
bool ru_display;
bool big_num;
bool no_merge;
bool walltime_run_table;
bool all_kernel;
bool all_user;
bool percore_show_thread;
bool summary;
bool no_csv_summary;
bool metric_no_group;
bool metric_no_merge;
bool stop_read_counter;
bool quiet;
FILE *output;
unsigned int interval;
unsigned int timeout;
int initial_delay;
unsigned int unit_width;
unsigned int metric_only_len;
int times;
int run_count;
int print_free_counters_hint;
int print_mixed_hw_group_error;
struct runtime_stat *stats;
int stats_num;
const char *csv_sep;
struct stats *walltime_nsecs_stats;
struct rusage ru_data;
struct cpu_aggr_map *aggr_map;
aggr_get_id_t aggr_get_id;
struct cpu_aggr_map *cpus_aggr_map;
u64 *walltime_run;
struct rblist metric_events;
int ctl_fd;
int ctl_fd_ack;
bool ctl_fd_close;
const char *cgroup_list;
unsigned int topdown_level;
};
void perf_stat__set_big_num(int set);
void perf_stat__set_no_csv_summary(int set);
void update_stats(struct stats *stats, u64 val);
double avg_stats(struct stats *stats);
double stddev_stats(struct stats *stats);
double rel_stddev_stats(double stddev, double avg);
static inline void init_stats(struct stats *stats)
{
stats->n = 0.0;
stats->mean = 0.0;
stats->M2 = 0.0;
stats->min = (u64) -1;
stats->max = 0;
}
struct evsel;
struct evlist;
struct perf_aggr_thread_value {
struct evsel *counter;
struct aggr_cpu_id id;
double uval;
u64 val;
u64 run;
u64 ena;
};
bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
#define perf_stat_evsel__is(evsel, id) \
__perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
extern struct runtime_stat rt_stat;
extern struct stats walltime_nsecs_stats;
typedef void (*print_metric_t)(struct perf_stat_config *config,
void *ctx, const char *color, const char *unit,
const char *fmt, double val);
typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx);
void runtime_stat__init(struct runtime_stat *st);
void runtime_stat__exit(struct runtime_stat *st);
void perf_stat__init_shadow_stats(void);
void perf_stat__reset_shadow_stats(void);
void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
int cpu, struct runtime_stat *st);
struct perf_stat_output_ctx {
void *ctx;
print_metric_t print_metric;
new_line_t new_line;
bool force_header;
};
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out,
struct rblist *metric_events,
struct runtime_stat *st);
void perf_stat__collect_metric_expr(struct evlist *);
int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
void evlist__free_stats(struct evlist *evlist);
void evlist__reset_stats(struct evlist *evlist);
void evlist__reset_prev_raw_counts(struct evlist *evlist);
void evlist__copy_prev_raw_counts(struct evlist *evlist);
void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct evsel *counter);
struct perf_tool;
union perf_event;
struct perf_session;
struct target;
int perf_event__process_stat_event(struct perf_session *session,
union perf_event *event);
size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target,
int cpu);
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
struct target *_target, struct timespec *ts, int argc, const char **argv);
struct metric_expr;
double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
#endif