/* * perfstat: /usr/bin/time -alike performance counter statistics utility * * It summarizes the counter events of all tasks (and child tasks), * covering all CPUs that the command (or workload) executes on. * It only counts the per-task events of the workload started, * independent of how many other tasks run on those CPUs. * * Build with: cc -O2 -g -lrt -Wall -W -o perfstat perfstat.c * * Sample output: * $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null Performance counter stats for 'ls': 163516953 instructions 2295 cache-misses 2855182 branch-misses * * Copyright (C) 2008, Red Hat Inc, Ingo Molnar * * Released under the GPLv2 (not later). * * Percpu counter support by: Yanmin Zhang * Symbolic event options by: Wu Fengguang */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "perfcounters.h" static int nr_cpus = 0; static int system_wide = 0; static void display_help(void) { unsigned int i; int e; printf( "Usage: perfstat [] \n\n" "PerfStat Options (up to %d event types can be specified):\n\n", MAX_COUNTERS); printf( " -e EVENT --event=EVENT # symbolic-name abbreviations"); for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) { if (e != event_symbols[i].event) { e = event_symbols[i].event; printf( "\n %2d: %-20s", e, event_symbols[i].symbol); } else printf(" %s", event_symbols[i].symbol); } printf("\n" " rNNN: raw event type\n\n" " -s # system-wide collection\n\n" " -c --command= # command+arguments to be timed.\n" "\n"); exit(0); } static void process_options(int argc, char *argv[]) { for (;;) { int option_index = 0; /** Options for getopt */ static struct option long_options[] = { {"event", required_argument, NULL, 'e'}, {"help", no_argument, NULL, 'h'}, {"command", no_argument, NULL, 'c'}, {NULL, 0, NULL, 0 } }; int c = getopt_long(argc, argv, "+:e:c:s", long_options, &option_index); if (c == -1) break; switch (c) { case 'c': break; case 's': system_wide = 1; break; case 'e': parse_events(optarg); break; default: break; } } if (optind == argc) goto err; if (!nr_counters) nr_counters = 8; return; err: display_help(); } char fault_here[1000000]; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static void create_counter(int counter) { struct perf_counter_hw_event hw_event; memset(&hw_event, 0, sizeof(hw_event)); hw_event.type = event_id[counter]; hw_event.raw = event_raw[counter]; hw_event.record_type = PERF_RECORD_SIMPLE; hw_event.nmi = 0; if (system_wide) { int cpu; for (cpu = 0; cpu < nr_cpus; cpu ++) { fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); if (fd[cpu][counter] < 0) { printf("perfstat error: syscall returned with %d (%s)\n", fd[cpu][counter], strerror(errno)); exit(-1); } } } else { hw_event.inherit = 1; hw_event.disabled = 1; fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); if (fd[0][counter] < 0) { printf("perfstat error: syscall returned with %d (%s)\n", fd[0][counter], strerror(errno)); exit(-1); } } } int main(int argc, char *argv[]) { unsigned long long t0, t1; int counter; ssize_t res; int status; int pid; process_options(argc, argv); if (system_wide) { nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); assert(nr_cpus >= 0); } else nr_cpus = 1; for (counter = 0; counter < nr_counters; counter++) create_counter(counter); argc -= optind; argv += optind; /* * Enable counters and exec the command: */ t0 = rdclock(); prctl(PR_TASK_PERF_COUNTERS_ENABLE); if ((pid = fork()) < 0) perror("failed to fork"); if (!pid) { if (execvp(argv[0], argv)) { perror(argv[0]); exit(-1); } } while (wait(&status) >= 0) ; prctl(PR_TASK_PERF_COUNTERS_DISABLE); t1 = rdclock(); fflush(stdout); fprintf(stderr, "\n"); fprintf(stderr, " Performance counter stats for \'%s\':\n", argv[0]); fprintf(stderr, "\n"); for (counter = 0; counter < nr_counters; counter++) { int cpu; __u64 count, single_count; count = 0; for (cpu = 0; cpu < nr_cpus; cpu ++) { res = read(fd[cpu][counter], (char *) &single_count, sizeof(single_count)); assert(res == sizeof(single_count)); count += single_count; } if (!event_raw[counter] && (event_id[counter] == PERF_COUNT_CPU_CLOCK || event_id[counter] == PERF_COUNT_TASK_CLOCK)) { double msecs = (double)count / 1000000; fprintf(stderr, " %14.6f %-20s (msecs)\n", msecs, event_name(counter)); } else { fprintf(stderr, " %14Ld %-20s (events)\n", count, event_name(counter)); } if (!counter) fprintf(stderr, "\n"); } fprintf(stderr, "\n"); fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", (double)(t1-t0)/1e6); fprintf(stderr, "\n"); return 0; }