2009-05-26 09:10:09 +00:00
|
|
|
|
|
|
|
#include "../perf.h"
|
|
|
|
#include "util.h"
|
|
|
|
#include "parse-options.h"
|
|
|
|
#include "parse-events.h"
|
|
|
|
#include "exec_cmd.h"
|
2009-06-01 20:50:19 +00:00
|
|
|
#include "string.h"
|
2009-05-26 09:10:09 +00:00
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
extern char *strcasestr(const char *haystack, const char *needle);
|
|
|
|
|
2009-06-06 07:58:57 +00:00
|
|
|
int nr_counters;
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 07:58:57 +00:00
|
|
|
struct perf_counter_attr attrs[MAX_COUNTERS];
|
2009-05-26 09:10:09 +00:00
|
|
|
|
|
|
|
struct event_symbol {
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 12:21:42 +00:00
|
|
|
u8 type;
|
|
|
|
u64 config;
|
2009-06-06 07:58:57 +00:00
|
|
|
char *symbol;
|
2009-05-26 09:10:09 +00:00
|
|
|
};
|
|
|
|
|
2009-06-22 11:13:14 +00:00
|
|
|
#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
|
|
|
|
#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
|
2009-06-06 07:58:57 +00:00
|
|
|
|
2009-05-26 09:10:09 +00:00
|
|
|
static struct event_symbol event_symbols[] = {
|
2009-06-22 11:13:14 +00:00
|
|
|
{ CHW(CPU_CYCLES), "cpu-cycles", },
|
|
|
|
{ CHW(CPU_CYCLES), "cycles", },
|
|
|
|
{ CHW(INSTRUCTIONS), "instructions", },
|
|
|
|
{ CHW(CACHE_REFERENCES), "cache-references", },
|
|
|
|
{ CHW(CACHE_MISSES), "cache-misses", },
|
|
|
|
{ CHW(BRANCH_INSTRUCTIONS), "branch-instructions", },
|
|
|
|
{ CHW(BRANCH_INSTRUCTIONS), "branches", },
|
|
|
|
{ CHW(BRANCH_MISSES), "branch-misses", },
|
|
|
|
{ CHW(BUS_CYCLES), "bus-cycles", },
|
2009-06-11 12:06:28 +00:00
|
|
|
|
2009-06-22 11:13:14 +00:00
|
|
|
{ CSW(CPU_CLOCK), "cpu-clock", },
|
|
|
|
{ CSW(TASK_CLOCK), "task-clock", },
|
|
|
|
{ CSW(PAGE_FAULTS), "page-faults", },
|
|
|
|
{ CSW(PAGE_FAULTS), "faults", },
|
|
|
|
{ CSW(PAGE_FAULTS_MIN), "minor-faults", },
|
|
|
|
{ CSW(PAGE_FAULTS_MAJ), "major-faults", },
|
|
|
|
{ CSW(CONTEXT_SWITCHES), "context-switches", },
|
|
|
|
{ CSW(CONTEXT_SWITCHES), "cs", },
|
|
|
|
{ CSW(CPU_MIGRATIONS), "cpu-migrations", },
|
|
|
|
{ CSW(CPU_MIGRATIONS), "migrations", },
|
2009-05-26 09:10:09 +00:00
|
|
|
};
|
|
|
|
|
2009-05-26 07:17:18 +00:00
|
|
|
#define __PERF_COUNTER_FIELD(config, name) \
|
|
|
|
((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
|
|
|
|
|
|
|
|
#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
|
|
|
|
#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
|
|
|
|
#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
|
|
|
|
#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
|
|
|
|
|
|
|
|
static char *hw_event_names[] = {
|
2009-06-06 11:58:12 +00:00
|
|
|
"cycles",
|
2009-05-26 07:17:18 +00:00
|
|
|
"instructions",
|
2009-06-06 11:58:12 +00:00
|
|
|
"cache-references",
|
|
|
|
"cache-misses",
|
2009-05-26 07:17:18 +00:00
|
|
|
"branches",
|
2009-06-06 11:58:12 +00:00
|
|
|
"branch-misses",
|
|
|
|
"bus-cycles",
|
2009-05-26 07:17:18 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static char *sw_event_names[] = {
|
2009-06-13 11:35:00 +00:00
|
|
|
"cpu-clock-msecs",
|
|
|
|
"task-clock-msecs",
|
2009-06-06 11:58:12 +00:00
|
|
|
"page-faults",
|
|
|
|
"context-switches",
|
|
|
|
"CPU-migrations",
|
|
|
|
"minor-faults",
|
|
|
|
"major-faults",
|
2009-05-26 07:17:18 +00:00
|
|
|
};
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
#define MAX_ALIASES 8
|
|
|
|
|
|
|
|
static char *hw_cache [][MAX_ALIASES] = {
|
2009-06-12 03:17:06 +00:00
|
|
|
{ "L1-data" , "l1-d", "l1d" },
|
2009-06-06 11:58:12 +00:00
|
|
|
{ "L1-instruction" , "l1-i", "l1i" },
|
|
|
|
{ "L2" , "l2" },
|
|
|
|
{ "Data-TLB" , "dtlb", "d-tlb" },
|
|
|
|
{ "Instruction-TLB" , "itlb", "i-tlb" },
|
|
|
|
{ "Branch" , "bpu" , "btb", "bpc" },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static char *hw_cache_op [][MAX_ALIASES] = {
|
2009-06-06 11:58:12 +00:00
|
|
|
{ "Load" , "read" },
|
|
|
|
{ "Store" , "write" },
|
|
|
|
{ "Prefetch" , "speculative-read", "speculative-load" },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static char *hw_cache_result [][MAX_ALIASES] = {
|
2009-06-06 11:58:12 +00:00
|
|
|
{ "Reference" , "ops", "access" },
|
|
|
|
{ "Miss" },
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
};
|
|
|
|
|
2009-06-06 07:58:57 +00:00
|
|
|
char *event_name(int counter)
|
2009-05-26 07:17:18 +00:00
|
|
|
{
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 12:21:42 +00:00
|
|
|
u64 config = attrs[counter].config;
|
2009-06-06 07:58:57 +00:00
|
|
|
int type = attrs[counter].type;
|
2009-05-26 07:17:18 +00:00
|
|
|
static char buf[32];
|
|
|
|
|
2009-06-06 07:58:57 +00:00
|
|
|
if (attrs[counter].type == PERF_TYPE_RAW) {
|
|
|
|
sprintf(buf, "raw 0x%llx", config);
|
2009-05-26 07:17:18 +00:00
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case PERF_TYPE_HARDWARE:
|
2009-06-11 12:06:28 +00:00
|
|
|
if (config < PERF_COUNT_HW_MAX)
|
2009-06-06 07:58:57 +00:00
|
|
|
return hw_event_names[config];
|
2009-05-26 07:17:18 +00:00
|
|
|
return "unknown-hardware";
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
case PERF_TYPE_HW_CACHE: {
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 12:21:42 +00:00
|
|
|
u8 cache_type, cache_op, cache_result;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
static char name[100];
|
|
|
|
|
|
|
|
cache_type = (config >> 0) & 0xff;
|
|
|
|
if (cache_type > PERF_COUNT_HW_CACHE_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-type";
|
|
|
|
|
|
|
|
cache_op = (config >> 8) & 0xff;
|
2009-06-06 11:58:12 +00:00
|
|
|
if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-op";
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
|
|
|
|
cache_result = (config >> 16) & 0xff;
|
2009-06-06 11:58:12 +00:00
|
|
|
if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
|
|
|
|
return "unknown-ext-hardware-cache-result";
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
|
2009-06-06 11:58:12 +00:00
|
|
|
sprintf(name, "%s-Cache-%s-%ses",
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
hw_cache[cache_type][0],
|
|
|
|
hw_cache_op[cache_op][0],
|
|
|
|
hw_cache_result[cache_result][0]);
|
|
|
|
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2009-05-26 07:17:18 +00:00
|
|
|
case PERF_TYPE_SOFTWARE:
|
2009-06-11 12:06:28 +00:00
|
|
|
if (config < PERF_COUNT_SW_MAX)
|
2009-06-06 07:58:57 +00:00
|
|
|
return sw_event_names[config];
|
2009-05-26 07:17:18 +00:00
|
|
|
return "unknown-software";
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return "unknown";
|
|
|
|
}
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < size; i++) {
|
|
|
|
for (j = 0; j < MAX_ALIASES; j++) {
|
|
|
|
if (!names[i][j])
|
|
|
|
break;
|
|
|
|
if (strcasestr(str, names[i][j]))
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-06-06 19:04:17 +00:00
|
|
|
return -1;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
|
|
|
|
{
|
2009-06-06 19:04:17 +00:00
|
|
|
int cache_type = -1, cache_op = 0, cache_result = 0;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
|
|
|
|
cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX);
|
|
|
|
/*
|
|
|
|
* No fallback - if we cannot get a clear cache type
|
|
|
|
* then bail out:
|
|
|
|
*/
|
|
|
|
if (cache_type == -1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
|
|
|
|
/*
|
|
|
|
* Fall back to reads:
|
|
|
|
*/
|
2009-06-06 19:04:17 +00:00
|
|
|
if (cache_op == -1)
|
|
|
|
cache_op = PERF_COUNT_HW_CACHE_OP_READ;
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
|
|
|
|
cache_result = parse_aliases(str, hw_cache_result,
|
|
|
|
PERF_COUNT_HW_CACHE_RESULT_MAX);
|
|
|
|
/*
|
|
|
|
* Fall back to accesses:
|
|
|
|
*/
|
|
|
|
if (cache_result == -1)
|
|
|
|
cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
|
|
|
|
|
|
|
|
attr->config = cache_type | (cache_op << 8) | (cache_result << 16);
|
|
|
|
attr->type = PERF_TYPE_HW_CACHE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-26 09:10:09 +00:00
|
|
|
/*
|
|
|
|
* Each event can have multiple symbolic names.
|
|
|
|
* Symbolic names are (almost) exactly matched.
|
|
|
|
*/
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
|
2009-05-26 09:10:09 +00:00
|
|
|
{
|
perf_counter tools: Define and use our own u64, s64 etc. definitions
On 64-bit powerpc, __u64 is defined to be unsigned long rather than
unsigned long long. This causes compiler warnings every time we
print a __u64 value with %Lx.
Rather than changing __u64, we define our own u64 to be unsigned long
long on all architectures, and similarly s64 as signed long long.
For consistency we also define u32, s32, u16, s16, u8 and s8. These
definitions are put in a new header, types.h, because these definitions
are needed in util/string.h and util/symbol.h.
The main change here is the mechanical change of __[us]{64,32,16,8}
to remove the "__". The other changes are:
* Create types.h
* Include types.h in perf.h, util/string.h and util/symbol.h
* Add types.h to the LIB_H definition in Makefile
* Added (u64) casts in process_overflow_event() and print_sym_table()
to kill two remaining warnings.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: benh@kernel.crashing.org
LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-19 12:21:42 +00:00
|
|
|
u64 config, id;
|
2009-05-26 09:10:09 +00:00
|
|
|
int type;
|
|
|
|
unsigned int i;
|
2009-06-01 20:50:19 +00:00
|
|
|
const char *sep, *pstr;
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 07:58:57 +00:00
|
|
|
if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) {
|
|
|
|
attr->type = PERF_TYPE_RAW;
|
|
|
|
attr->config = config;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-01 20:50:19 +00:00
|
|
|
pstr = str;
|
|
|
|
sep = strchr(pstr, ':');
|
|
|
|
if (sep) {
|
|
|
|
type = atoi(pstr);
|
|
|
|
pstr = sep + 1;
|
|
|
|
id = atoi(pstr);
|
|
|
|
sep = strchr(pstr, ':');
|
|
|
|
if (sep) {
|
|
|
|
pstr = sep + 1;
|
|
|
|
if (strchr(pstr, 'k'))
|
2009-06-06 07:58:57 +00:00
|
|
|
attr->exclude_user = 1;
|
2009-06-01 20:50:19 +00:00
|
|
|
if (strchr(pstr, 'u'))
|
2009-06-06 07:58:57 +00:00
|
|
|
attr->exclude_kernel = 1;
|
2009-06-01 20:50:19 +00:00
|
|
|
}
|
2009-06-06 07:58:57 +00:00
|
|
|
attr->type = type;
|
|
|
|
attr->config = id;
|
|
|
|
|
|
|
|
return 0;
|
2009-05-26 07:17:18 +00:00
|
|
|
}
|
2009-05-26 09:10:09 +00:00
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
|
|
|
|
if (!strncmp(str, event_symbols[i].symbol,
|
2009-06-06 07:58:57 +00:00
|
|
|
strlen(event_symbols[i].symbol))) {
|
|
|
|
|
|
|
|
attr->type = event_symbols[i].type;
|
|
|
|
attr->config = event_symbols[i].config;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2009-05-26 09:10:09 +00:00
|
|
|
}
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
return parse_generic_hw_symbols(str, attr);
|
2009-05-26 09:10:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int parse_events(const struct option *opt, const char *str, int unset)
|
|
|
|
{
|
2009-06-06 07:58:57 +00:00
|
|
|
struct perf_counter_attr attr;
|
|
|
|
int ret;
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 07:58:57 +00:00
|
|
|
memset(&attr, 0, sizeof(attr));
|
2009-05-26 09:10:09 +00:00
|
|
|
again:
|
|
|
|
if (nr_counters == MAX_COUNTERS)
|
|
|
|
return -1;
|
|
|
|
|
perf_counter: Implement generalized cache event types
Extend generic event enumeration with the PERF_TYPE_HW_CACHE
method.
This is a 3-dimensional space:
{ L1-D, L1-I, L2, ITLB, DTLB, BPU } x
{ load, store, prefetch } x
{ accesses, misses }
User-space passes in the 3 coordinates and the kernel provides
a counter. (if the hardware supports that type and if the
combination makes sense.)
Combinations that make no sense produce a -EINVAL.
Combinations that are not supported by the hardware produce -ENOTSUP.
Extend the tools to deal with this, and rewrite the event symbol
parsing code with various popular aliases for the units and
access methods above. So 'l1-cache-miss' and 'l1d-read-ops' are
both valid aliases.
( x86 is supported for now, with the Nehalem event table filled in,
and with Core2 and Atom having placeholder tables. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-06-05 18:22:46 +00:00
|
|
|
ret = parse_event_symbols(str, &attr);
|
2009-06-06 07:58:57 +00:00
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 07:58:57 +00:00
|
|
|
attrs[nr_counters] = attr;
|
2009-05-26 09:10:09 +00:00
|
|
|
nr_counters++;
|
|
|
|
|
|
|
|
str = strstr(str, ",");
|
|
|
|
if (str) {
|
|
|
|
str++;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-06-06 10:24:17 +00:00
|
|
|
static const char * const event_type_descriptors[] = {
|
|
|
|
"",
|
|
|
|
"Hardware event",
|
|
|
|
"Software event",
|
|
|
|
"Tracepoint event",
|
|
|
|
"Hardware cache event",
|
|
|
|
};
|
|
|
|
|
2009-05-26 09:10:09 +00:00
|
|
|
/*
|
2009-06-06 10:24:17 +00:00
|
|
|
* Print the help text for the event symbols:
|
2009-05-26 09:10:09 +00:00
|
|
|
*/
|
2009-06-06 10:24:17 +00:00
|
|
|
void print_events(void)
|
2009-05-26 09:10:09 +00:00
|
|
|
{
|
2009-06-06 10:24:17 +00:00
|
|
|
struct event_symbol *syms = event_symbols;
|
|
|
|
unsigned int i, type, prev_type = -1;
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 10:24:17 +00:00
|
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 10:24:17 +00:00
|
|
|
for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
|
|
|
|
type = syms->type + 1;
|
|
|
|
if (type > ARRAY_SIZE(event_type_descriptors))
|
|
|
|
type = 0;
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 10:24:17 +00:00
|
|
|
if (type != prev_type)
|
|
|
|
fprintf(stderr, "\n");
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 10:24:17 +00:00
|
|
|
fprintf(stderr, " %-30s [%s]\n", syms->symbol,
|
|
|
|
event_type_descriptors[type]);
|
2009-05-26 09:10:09 +00:00
|
|
|
|
2009-06-06 10:24:17 +00:00
|
|
|
prev_type = type;
|
2009-05-26 09:10:09 +00:00
|
|
|
}
|
|
|
|
|
2009-06-06 10:24:17 +00:00
|
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, " %-30s [raw hardware event descriptor]\n",
|
|
|
|
"rNNN");
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
|
|
|
|
exit(129);
|
2009-05-26 09:10:09 +00:00
|
|
|
}
|