perf, x86: Add cache events for the Pentium-4 PMU

Move the HT bit setting code from p4_pmu_event_map to
p4_hw_config. So the cache events can get HT bit set correctly.

Tested on my P4 desktop, below 6 cache events work:

 L1-dcache-load-misses
 LLC-load-misses
 dTLB-load-misses
 dTLB-store-misses
 iTLB-loads
 iTLB-load-misses

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1268908392.13901.128.camel@minggr.sh.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Lin Ming 2010-03-18 18:33:12 +08:00 committed by Ingo Molnar
parent f34edbc1cd
commit cb7d6b5053
3 changed files with 159 additions and 6 deletions

View File

@ -357,6 +357,8 @@
#define MSR_P4_U2L_ESCR0 0x000003b0
#define MSR_P4_U2L_ESCR1 0x000003b1
#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
/* Intel Core-based CPU performance counters */
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a

View File

@ -708,4 +708,14 @@ enum P4_EVENTS_ATTR {
P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1),
};
enum {
KEY_P4_L1D_OP_READ_RESULT_MISS,
KEY_P4_LL_OP_READ_RESULT_MISS,
KEY_P4_DTLB_OP_READ_RESULT_MISS,
KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
KEY_P4_ITLB_OP_READ_RESULT_MISS,
KEY_P4_UOP_TYPE,
};
#endif /* PERF_EVENT_P4_H */

View File

@ -19,6 +19,11 @@ struct p4_event_template {
u64 config; /* packed predefined bits */
int dep; /* upstream dependency event index */
int key; /* index into p4_templates */
u64 msr; /*
* the high 32 bits set into MSR_IA32_PEBS_ENABLE and
* the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT
* for cache events
*/
unsigned int emask; /* ESCR EventMask */
unsigned int escr_msr[2]; /* ESCR MSR for this event */
unsigned int cntr[2]; /* counter index (offset) */
@ -31,6 +36,67 @@ struct p4_pmu_res {
static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config);
#define P4_CACHE_EVENT_CONFIG(event, bit) \
p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \
p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \
p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT)
static __initconst u64 p4_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
{
[ C(L1D ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
/* 1stL_cache_load_miss_retired */
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
| KEY_P4_L1D_OP_READ_RESULT_MISS,
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
/* 2ndL_cache_load_miss_retired */
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
| KEY_P4_LL_OP_READ_RESULT_MISS,
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
/* DTLB_load_miss_retired */
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
| KEY_P4_DTLB_OP_READ_RESULT_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
/* DTLB_store_miss_retired */
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
| KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
/* ITLB_reference.HIT */
[ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT)
| KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
/* ITLB_reference.MISS */
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS)
| KEY_P4_ITLB_OP_READ_RESULT_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = -1,
[ C(RESULT_MISS) ] = -1,
},
},
};
/*
* WARN: CCCR1 doesn't have a working enable bit so try to not
* use it if possible
@ -121,11 +187,77 @@ struct p4_event_template p4_templates[] = {
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
.cntr = { 0, 2 },
},
[7] = {
[KEY_P4_L1D_OP_READ_RESULT_MISS] = {
.opcode = P4_REPLAY_EVENT,
.config = 0,
.dep = -1,
.msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0),
.key = KEY_P4_L1D_OP_READ_RESULT_MISS,
.emask =
P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
.cntr = { 16, 17 },
},
[KEY_P4_LL_OP_READ_RESULT_MISS] = {
.opcode = P4_REPLAY_EVENT,
.config = 0,
.dep = -1,
.msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0),
.key = KEY_P4_LL_OP_READ_RESULT_MISS,
.emask =
P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
.cntr = { 16, 17 },
},
[KEY_P4_DTLB_OP_READ_RESULT_MISS] = {
.opcode = P4_REPLAY_EVENT,
.config = 0,
.dep = -1,
.msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0),
.key = KEY_P4_DTLB_OP_READ_RESULT_MISS,
.emask =
P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
.cntr = { 16, 17 },
},
[KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = {
.opcode = P4_REPLAY_EVENT,
.config = 0,
.dep = -1,
.msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1),
.key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
.emask =
P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
.cntr = { 16, 17 },
},
[KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = {
.opcode = P4_ITLB_REFERENCE,
.config = 0,
.dep = -1,
.msr = 0,
.key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
.emask =
P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT),
.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
.cntr = { 0, 2 },
},
[KEY_P4_ITLB_OP_READ_RESULT_MISS] = {
.opcode = P4_ITLB_REFERENCE,
.config = 0,
.dep = -1,
.msr = 0,
.key = KEY_P4_ITLB_OP_READ_RESULT_MISS,
.emask =
P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS),
.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
.cntr = { 0, 2 },
},
[KEY_P4_UOP_TYPE] = {
.opcode = P4_UOP_TYPE,
.config = 0,
.dep = -1,
.key = 7,
.key = KEY_P4_UOP_TYPE,
.emask =
P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) |
P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
@ -155,10 +287,6 @@ static u64 p4_pmu_event_map(int hw_event)
config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT);
config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED);
/* on HT machine we need a special bit */
if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id()))
config = p4_set_ht_bit(config);
return config;
}
@ -211,6 +339,10 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
/* Count user and OS events unless not requested to */
hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel,
attr->exclude_user));
/* on HT machine we need a special bit */
if (p4_ht_active() && p4_ht_thread(cpu))
hwc->config = p4_set_ht_bit(hwc->config);
return 0;
}
@ -271,6 +403,12 @@ static void p4_pmu_enable_event(struct perf_event *event)
pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx);
return;
}
if (tpl->msr) {
(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32);
(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff);
}
escr_base = (u64)tpl->escr_msr[thread];
/*
@ -577,6 +715,9 @@ static __init int p4_pmu_init(void)
return -ENODEV;
}
memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
pr_cont("Netburst events, ");
x86_pmu = p4_pmu;