perf stat: Refactor aggregation code

Refactor aggregation code by introducing a single aggr_mode variable and an
enum for aggregation.

Also refactor cpumap code having to do with cpu to socket mappings. All in
preparation for extended modes, such as cpu -> core.

Also fix socket aggregation and ensure that sockets are printed in increasing
order.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1360846649-6411-2-git-send-email-eranian@google.com
[ committer note: Fixup conflicts with a7e191c "--repeat forever" and
  acf2892 "Use perf_evlist__prepare/start_workload()" ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Stephane Eranian 2013-02-14 13:57:27 +01:00 committed by Arnaldo Carvalho de Melo
parent ebf3c675d7
commit 86ee6e18f6
2 changed files with 150 additions and 103 deletions

View File

@ -68,7 +68,7 @@
static void print_stat(int argc, const char **argv); static void print_stat(int argc, const char **argv);
static void print_counter_aggr(struct perf_evsel *counter, char *prefix); static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix); static void print_counter(struct perf_evsel *counter, char *prefix);
static void print_aggr_socket(char *prefix); static void print_aggr(char *prefix);
static struct perf_evlist *evsel_list; static struct perf_evlist *evsel_list;
@ -76,11 +76,16 @@ static struct perf_target target = {
.uid = UINT_MAX, .uid = UINT_MAX,
}; };
enum aggr_mode {
AGGR_NONE,
AGGR_GLOBAL,
AGGR_SOCKET,
};
static int run_count = 1; static int run_count = 1;
static bool no_inherit = false; static bool no_inherit = false;
static bool scale = true; static bool scale = true;
static bool no_aggr = false; static enum aggr_mode aggr_mode = AGGR_GLOBAL;
static bool aggr_socket = false;
static pid_t child_pid = -1; static pid_t child_pid = -1;
static bool null_run = false; static bool null_run = false;
static int detailed_run = 0; static int detailed_run = 0;
@ -96,7 +101,8 @@ static bool sync_run = false;
static unsigned int interval = 0; static unsigned int interval = 0;
static bool forever = false; static bool forever = false;
static struct timespec ref_time; static struct timespec ref_time;
static struct cpu_map *sock_map; static struct cpu_map *aggr_map;
static int (*aggr_get_id)(struct cpu_map *m, int cpu);
static volatile int done = 0; static volatile int done = 0;
@ -355,41 +361,51 @@ static void print_interval(void)
struct timespec ts, rs; struct timespec ts, rs;
char prefix[64]; char prefix[64];
if (no_aggr) { if (aggr_mode == AGGR_GLOBAL) {
list_for_each_entry(counter, &evsel_list->entries, node) {
ps = counter->priv;
memset(ps->res_stats, 0, sizeof(ps->res_stats));
read_counter(counter);
}
} else {
list_for_each_entry(counter, &evsel_list->entries, node) { list_for_each_entry(counter, &evsel_list->entries, node) {
ps = counter->priv; ps = counter->priv;
memset(ps->res_stats, 0, sizeof(ps->res_stats)); memset(ps->res_stats, 0, sizeof(ps->res_stats));
read_counter_aggr(counter); read_counter_aggr(counter);
} }
} else {
list_for_each_entry(counter, &evsel_list->entries, node) {
ps = counter->priv;
memset(ps->res_stats, 0, sizeof(ps->res_stats));
read_counter(counter);
}
} }
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time); diff_timespec(&rs, &ts, &ref_time);
sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
if (num_print_interval == 0 && !csv_output) { if (num_print_interval == 0 && !csv_output) {
if (aggr_socket) switch (aggr_mode) {
case AGGR_SOCKET:
fprintf(output, "# time socket cpus counts events\n"); fprintf(output, "# time socket cpus counts events\n");
else if (no_aggr) break;
case AGGR_NONE:
fprintf(output, "# time CPU counts events\n"); fprintf(output, "# time CPU counts events\n");
else break;
case AGGR_GLOBAL:
default:
fprintf(output, "# time counts events\n"); fprintf(output, "# time counts events\n");
}
} }
if (++num_print_interval == 25) if (++num_print_interval == 25)
num_print_interval = 0; num_print_interval = 0;
if (aggr_socket) switch (aggr_mode) {
print_aggr_socket(prefix); case AGGR_SOCKET:
else if (no_aggr) { print_aggr(prefix);
break;
case AGGR_NONE:
list_for_each_entry(counter, &evsel_list->entries, node) list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter, prefix); print_counter(counter, prefix);
} else { break;
case AGGR_GLOBAL:
default:
list_for_each_entry(counter, &evsel_list->entries, node) list_for_each_entry(counter, &evsel_list->entries, node)
print_counter_aggr(counter, prefix); print_counter_aggr(counter, prefix);
} }
@ -412,12 +428,6 @@ static int __run_perf_stat(int argc, const char **argv)
ts.tv_nsec = 0; ts.tv_nsec = 0;
} }
if (aggr_socket
&& cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
perror("cannot build socket map");
return -1;
}
if (forks) { if (forks) {
if (perf_evlist__prepare_workload(evsel_list, &target, argv, if (perf_evlist__prepare_workload(evsel_list, &target, argv,
false, false) < 0) { false, false) < 0) {
@ -493,17 +503,17 @@ static int __run_perf_stat(int argc, const char **argv)
update_stats(&walltime_nsecs_stats, t1 - t0); update_stats(&walltime_nsecs_stats, t1 - t0);
if (no_aggr) { if (aggr_mode == AGGR_GLOBAL) {
list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter(counter);
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
}
} else {
list_for_each_entry(counter, &evsel_list->entries, node) { list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter_aggr(counter); read_counter_aggr(counter);
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
thread_map__nr(evsel_list->threads)); thread_map__nr(evsel_list->threads));
} }
} else {
list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter(counter);
perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
}
} }
return WEXITSTATUS(status); return WEXITSTATUS(status);
@ -556,26 +566,37 @@ static void print_noise(struct perf_evsel *evsel, double avg)
print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
} }
static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) static void aggr_printout(struct perf_evsel *evsel, int cpu, int nr)
{ {
double msecs = avg / 1e6; switch (aggr_mode) {
char cpustr[16] = { '\0', }; case AGGR_SOCKET:
const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; fprintf(output, "S%*d%s%*d%s",
if (aggr_socket)
sprintf(cpustr, "S%*d%s%*d%s",
csv_output ? 0 : -5, csv_output ? 0 : -5,
cpu, cpu,
csv_sep, csv_sep,
csv_output ? 0 : 4, csv_output ? 0 : 4,
nr, nr,
csv_sep); csv_sep);
else if (no_aggr) break;
sprintf(cpustr, "CPU%*d%s", case AGGR_NONE:
fprintf(output, "CPU%*d%s",
csv_output ? 0 : -4, csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep); perf_evsel__cpus(evsel)->map[cpu], csv_sep);
break;
case AGGR_GLOBAL:
default:
break;
}
}
fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel)); static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
double msecs = avg / 1e6;
const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
aggr_printout(evsel, cpu, nr);
fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
if (evsel->cgrp) if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@ -772,32 +793,21 @@ static void print_ll_cache_misses(int cpu,
static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{ {
double total, ratio = 0.0; double total, ratio = 0.0;
char cpustr[16] = { '\0', };
const char *fmt; const char *fmt;
if (csv_output) if (csv_output)
fmt = "%s%.0f%s%s"; fmt = "%.0f%s%s";
else if (big_num) else if (big_num)
fmt = "%s%'18.0f%s%-25s"; fmt = "%'18.0f%s%-25s";
else else
fmt = "%s%18.0f%s%-25s"; fmt = "%18.0f%s%-25s";
if (aggr_socket) aggr_printout(evsel, cpu, nr);
sprintf(cpustr, "S%*d%s%*d%s",
csv_output ? 0 : -5, if (aggr_mode == AGGR_GLOBAL)
cpu,
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep);
else if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[cpu], csv_sep);
else
cpu = 0; cpu = 0;
fprintf(output, fmt, cpustr, avg, csv_sep, perf_evsel__name(evsel)); fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
if (evsel->cgrp) if (evsel->cgrp)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
@ -896,23 +906,23 @@ static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
} }
} }
static void print_aggr_socket(char *prefix) static void print_aggr(char *prefix)
{ {
struct perf_evsel *counter; struct perf_evsel *counter;
int cpu, s, s2, id, nr;
u64 ena, run, val; u64 ena, run, val;
int cpu, s, s2, sock, nr;
if (!sock_map) if (!(aggr_map || aggr_get_id))
return; return;
for (s = 0; s < sock_map->nr; s++) { for (s = 0; s < aggr_map->nr; s++) {
sock = cpu_map__socket(sock_map, s); id = aggr_map->map[s];
list_for_each_entry(counter, &evsel_list->entries, node) { list_for_each_entry(counter, &evsel_list->entries, node) {
val = ena = run = 0; val = ena = run = 0;
nr = 0; nr = 0;
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
s2 = cpu_map__get_socket(evsel_list->cpus, cpu); s2 = aggr_get_id(evsel_list->cpus, cpu);
if (s2 != sock) if (s2 != id)
continue; continue;
val += counter->counts->cpu[cpu].val; val += counter->counts->cpu[cpu].val;
ena += counter->counts->cpu[cpu].ena; ena += counter->counts->cpu[cpu].ena;
@ -923,18 +933,15 @@ static void print_aggr_socket(char *prefix)
fprintf(output, "%s", prefix); fprintf(output, "%s", prefix);
if (run == 0 || ena == 0) { if (run == 0 || ena == 0) {
fprintf(output, "S%*d%s%*d%s%*s%s%*s", aggr_printout(counter, cpu, nr);
csv_output ? 0 : -5,
s, fprintf(output, "%*s%s%*s",
csv_sep,
csv_output ? 0 : 4,
nr,
csv_sep,
csv_output ? 0 : 18, csv_output ? 0 : 18,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep, csv_sep,
csv_output ? 0 : -24, csv_output ? 0 : -24,
perf_evsel__name(counter)); perf_evsel__name(counter));
if (counter->cgrp) if (counter->cgrp)
fprintf(output, "%s%s", fprintf(output, "%s%s",
csv_sep, counter->cgrp->name); csv_sep, counter->cgrp->name);
@ -944,9 +951,9 @@ static void print_aggr_socket(char *prefix)
} }
if (nsec_counter(counter)) if (nsec_counter(counter))
nsec_printout(sock, nr, counter, val); nsec_printout(id, nr, counter, val);
else else
abs_printout(sock, nr, counter, val); abs_printout(id, nr, counter, val);
if (!csv_output) { if (!csv_output) {
print_noise(counter, 1.0); print_noise(counter, 1.0);
@ -1087,14 +1094,20 @@ static void print_stat(int argc, const char **argv)
fprintf(output, ":\n\n"); fprintf(output, ":\n\n");
} }
if (aggr_socket) switch (aggr_mode) {
print_aggr_socket(NULL); case AGGR_SOCKET:
else if (no_aggr) { print_aggr(NULL);
list_for_each_entry(counter, &evsel_list->entries, node) break;
print_counter(counter, NULL); case AGGR_GLOBAL:
} else {
list_for_each_entry(counter, &evsel_list->entries, node) list_for_each_entry(counter, &evsel_list->entries, node)
print_counter_aggr(counter, NULL); print_counter_aggr(counter, NULL);
break;
case AGGR_NONE:
list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter, NULL);
break;
default:
break;
} }
if (!csv_output) { if (!csv_output) {
@ -1140,6 +1153,25 @@ static int stat__set_big_num(const struct option *opt __maybe_unused,
return 0; return 0;
} }
static int perf_stat_init_aggr_mode(void)
{
switch (aggr_mode) {
case AGGR_SOCKET:
if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
perror("cannot build socket map");
return -1;
}
aggr_get_id = cpu_map__get_socket;
break;
case AGGR_NONE:
case AGGR_GLOBAL:
default:
break;
}
return 0;
}
/* /*
* Add default attributes, if there were no attributes specified or * Add default attributes, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used: * if -d/--detailed, -d -d or -d -d -d is used:
@ -1322,7 +1354,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
stat__set_big_num), stat__set_big_num),
OPT_STRING('C', "cpu", &target.cpu_list, "cpu", OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
"list of cpus to monitor in system-wide"), "list of cpus to monitor in system-wide"),
OPT_BOOLEAN('A', "no-aggr", &no_aggr, "disable CPU count aggregation"), OPT_SET_UINT('A', "no-aggr", &aggr_mode,
"disable CPU count aggregation", AGGR_NONE),
OPT_STRING('x', "field-separator", &csv_sep, "separator", OPT_STRING('x', "field-separator", &csv_sep, "separator",
"print counts with custom separator"), "print counts with custom separator"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name", OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
@ -1337,7 +1370,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"command to run after to the measured command"), "command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &interval, OPT_UINTEGER('I', "interval-print", &interval,
"print counts at regular interval in ms (>= 100)"), "print counts at regular interval in ms (>= 100)"),
OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"), OPT_SET_UINT(0, "aggr-socket", &aggr_mode,
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_END() OPT_END()
}; };
const char * const stat_usage[] = { const char * const stat_usage[] = {
@ -1420,19 +1454,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
} }
/* no_aggr, cgroup are for system-wide only */ /* no_aggr, cgroup are for system-wide only */
if ((no_aggr || nr_cgroups) && !perf_target__has_cpu(&target)) { if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
&& !perf_target__has_cpu(&target)) {
fprintf(stderr, "both cgroup and no-aggregation " fprintf(stderr, "both cgroup and no-aggregation "
"modes only available in system-wide mode\n"); "modes only available in system-wide mode\n");
usage_with_options(stat_usage, options); usage_with_options(stat_usage, options);
} return -1;
if (aggr_socket) {
if (!perf_target__has_cpu(&target)) {
fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
usage_with_options(stat_usage, options);
}
no_aggr = true;
} }
if (add_default_attributes()) if (add_default_attributes())
@ -1458,6 +1486,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
if (perf_evlist__alloc_stats(evsel_list, interval)) if (perf_evlist__alloc_stats(evsel_list, interval))
goto out_free_maps; goto out_free_maps;
if (perf_stat_init_aggr_mode())
goto out;
/* /*
* We dont want to block the signals - that would cause * We dont want to block the signals - that would cause
* child tasks to inherit that and Ctrl-C would not work. * child tasks to inherit that and Ctrl-C would not work.

View File

@ -4,6 +4,7 @@
#include "cpumap.h" #include "cpumap.h"
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
static struct cpu_map *cpu_map__default_new(void) static struct cpu_map *cpu_map__default_new(void)
{ {
@ -219,7 +220,7 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
if (!mnt) if (!mnt)
return -1; return -1;
sprintf(path, snprintf(path, PATH_MAX,
"%s/devices/system/cpu/cpu%d/topology/physical_package_id", "%s/devices/system/cpu/cpu%d/topology/physical_package_id",
mnt, cpu); mnt, cpu);
@ -231,27 +232,42 @@ int cpu_map__get_socket(struct cpu_map *map, int idx)
return ret == 1 ? cpu : -1; return ret == 1 ? cpu : -1;
} }
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) static int cmp_ids(const void *a, const void *b)
{ {
struct cpu_map *sock; return *(int *)a - *(int *)b;
}
static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
int (*f)(struct cpu_map *map, int cpu))
{
struct cpu_map *c;
int nr = cpus->nr; int nr = cpus->nr;
int cpu, s1, s2; int cpu, s1, s2;
sock = calloc(1, sizeof(*sock) + nr * sizeof(int)); /* allocate as much as possible */
if (!sock) c = calloc(1, sizeof(*c) + nr * sizeof(int));
if (!c)
return -1; return -1;
for (cpu = 0; cpu < nr; cpu++) { for (cpu = 0; cpu < nr; cpu++) {
s1 = cpu_map__get_socket(cpus, cpu); s1 = f(cpus, cpu);
for (s2 = 0; s2 < sock->nr; s2++) { for (s2 = 0; s2 < c->nr; s2++) {
if (s1 == sock->map[s2]) if (s1 == c->map[s2])
break; break;
} }
if (s2 == sock->nr) { if (s2 == c->nr) {
sock->map[sock->nr] = s1; c->map[c->nr] = s1;
sock->nr++; c->nr++;
} }
} }
*sockp = sock; /* ensure we process id in increasing order */
qsort(c->map, c->nr, sizeof(int), cmp_ids);
*res = c;
return 0; return 0;
} }
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
{
return cpu_map__build_map(cpus, sockp, cpu_map__get_socket);
}