mirror of
https://github.com/torvalds/linux.git
synced 2024-12-03 17:41:22 +00:00
x86/intel_rdt: More precise L2 hit/miss measurements
Intel Goldmont processors supports non-architectural precise events that can be used to give us more insight into the success of L2 cache pseudo-locking on these platforms. Introduce a new measurement trigger that will enable two precise events, MEM_LOAD_UOPS_RETIRED.L2_HIT and MEM_LOAD_UOPS_RETIRED.L2_MISS, while accessing pseudo-locked data. A new tracepoint, pseudo_lock_l2, is created to make these results visible to the user. Signed-off-by: Reinette Chatre <reinette.chatre@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: fenghua.yu@intel.com Cc: tony.luck@intel.com Cc: vikas.shivappa@linux.intel.com Cc: gavin.hindman@intel.com Cc: jithu.joseph@intel.com Cc: dave.hansen@intel.com Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/06b1456da65b543479dac8d9493e41f92f175d6c.1529706536.git.reinette.chatre@intel.com
This commit is contained in:
parent
746e08590b
commit
8a2fc0e1bc
@ -23,6 +23,7 @@
|
|||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
#include <asm/intel-family.h>
|
#include <asm/intel-family.h>
|
||||||
#include <asm/intel_rdt_sched.h>
|
#include <asm/intel_rdt_sched.h>
|
||||||
|
#include <asm/perf_event.h>
|
||||||
|
|
||||||
#include "intel_rdt.h"
|
#include "intel_rdt.h"
|
||||||
|
|
||||||
@ -63,6 +64,9 @@ static struct class *pseudo_lock_class;
|
|||||||
* hardware prefetch disable bits are included here as they are documented
|
* hardware prefetch disable bits are included here as they are documented
|
||||||
* in the SDM.
|
* in the SDM.
|
||||||
*
|
*
|
||||||
|
* When adding a platform here also add support for its cache events to
|
||||||
|
* measure_cycles_perf_fn()
|
||||||
|
*
|
||||||
* Return:
|
* Return:
|
||||||
* If platform is supported, the bits to disable hardware prefetchers, 0
|
* If platform is supported, the bits to disable hardware prefetchers, 0
|
||||||
* if platform is not supported.
|
* if platform is not supported.
|
||||||
@ -101,6 +105,16 @@ static u64 get_prefetch_disable_bits(void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper to write 64bit value to MSR without tracing. Used when
|
||||||
|
* use of the cache should be restricted and use of registers used
|
||||||
|
* for local variables avoided.
|
||||||
|
*/
|
||||||
|
static inline void pseudo_wrmsrl_notrace(unsigned int msr, u64 val)
|
||||||
|
{
|
||||||
|
__wrmsr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pseudo_lock_minor_get - Obtain available minor number
|
* pseudo_lock_minor_get - Obtain available minor number
|
||||||
* @minor: Pointer to where new minor number will be stored
|
* @minor: Pointer to where new minor number will be stored
|
||||||
@ -834,6 +848,107 @@ static int measure_cycles_lat_fn(void *_plr)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int measure_cycles_perf_fn(void *_plr)
|
||||||
|
{
|
||||||
|
struct pseudo_lock_region *plr = _plr;
|
||||||
|
unsigned long long l2_hits, l2_miss;
|
||||||
|
u64 l2_hit_bits, l2_miss_bits;
|
||||||
|
unsigned long i;
|
||||||
|
#ifdef CONFIG_KASAN
|
||||||
|
/*
|
||||||
|
* The registers used for local register variables are also used
|
||||||
|
* when KASAN is active. When KASAN is active we use regular variables
|
||||||
|
* at the cost of including cache access latency to these variables
|
||||||
|
* in the measurements.
|
||||||
|
*/
|
||||||
|
unsigned int line_size;
|
||||||
|
unsigned int size;
|
||||||
|
void *mem_r;
|
||||||
|
#else
|
||||||
|
register unsigned int line_size asm("esi");
|
||||||
|
register unsigned int size asm("edi");
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
register void *mem_r asm("rbx");
|
||||||
|
#else
|
||||||
|
register void *mem_r asm("ebx");
|
||||||
|
#endif /* CONFIG_X86_64 */
|
||||||
|
#endif /* CONFIG_KASAN */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Non-architectural event for the Goldmont Microarchitecture
|
||||||
|
* from Intel x86 Architecture Software Developer Manual (SDM):
|
||||||
|
* MEM_LOAD_UOPS_RETIRED D1H (event number)
|
||||||
|
* Umask values:
|
||||||
|
* L1_HIT 01H
|
||||||
|
* L2_HIT 02H
|
||||||
|
* L1_MISS 08H
|
||||||
|
* L2_MISS 10H
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start by setting flags for IA32_PERFEVTSELx:
|
||||||
|
* OS (Operating system mode) 0x2
|
||||||
|
* INT (APIC interrupt enable) 0x10
|
||||||
|
* EN (Enable counter) 0x40
|
||||||
|
*
|
||||||
|
* Then add the Umask value and event number to select performance
|
||||||
|
* event.
|
||||||
|
*/
|
||||||
|
|
||||||
|
switch (boot_cpu_data.x86_model) {
|
||||||
|
case INTEL_FAM6_ATOM_GOLDMONT:
|
||||||
|
case INTEL_FAM6_ATOM_GEMINI_LAKE:
|
||||||
|
l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
|
||||||
|
l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
local_irq_disable();
|
||||||
|
/*
|
||||||
|
* Call wrmsr direcly to avoid the local register variables from
|
||||||
|
* being overwritten due to reordering of their assignment with
|
||||||
|
* the wrmsr calls.
|
||||||
|
*/
|
||||||
|
__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
|
||||||
|
/* Disable events and reset counters */
|
||||||
|
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, 0x0);
|
||||||
|
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x0);
|
||||||
|
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0, 0x0);
|
||||||
|
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_PERFCTR0 + 1, 0x0);
|
||||||
|
/* Set and enable the L2 counters */
|
||||||
|
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0, l2_hit_bits);
|
||||||
|
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1, l2_miss_bits);
|
||||||
|
mem_r = plr->kmem;
|
||||||
|
size = plr->size;
|
||||||
|
line_size = plr->line_size;
|
||||||
|
for (i = 0; i < size; i += line_size) {
|
||||||
|
asm volatile("mov (%0,%1,1), %%eax\n\t"
|
||||||
|
:
|
||||||
|
: "r" (mem_r), "r" (i)
|
||||||
|
: "%eax", "memory");
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Call wrmsr directly (no tracing) to not influence
|
||||||
|
* the cache access counters as they are disabled.
|
||||||
|
*/
|
||||||
|
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0,
|
||||||
|
l2_hit_bits & ~(0x40ULL << 16));
|
||||||
|
pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 1,
|
||||||
|
l2_miss_bits & ~(0x40ULL << 16));
|
||||||
|
l2_hits = native_read_pmc(0);
|
||||||
|
l2_miss = native_read_pmc(1);
|
||||||
|
wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
|
||||||
|
local_irq_enable();
|
||||||
|
trace_pseudo_lock_l2(l2_hits, l2_miss);
|
||||||
|
|
||||||
|
out:
|
||||||
|
plr->thread_done = 1;
|
||||||
|
wake_up_interruptible(&plr->lock_thread_wq);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
|
* pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
|
||||||
*
|
*
|
||||||
@ -844,12 +959,12 @@ static int measure_cycles_lat_fn(void *_plr)
|
|||||||
*
|
*
|
||||||
* Return: 0 on success, <0 on failure
|
* Return: 0 on success, <0 on failure
|
||||||
*/
|
*/
|
||||||
static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp)
|
static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
|
||||||
{
|
{
|
||||||
struct pseudo_lock_region *plr = rdtgrp->plr;
|
struct pseudo_lock_region *plr = rdtgrp->plr;
|
||||||
struct task_struct *thread;
|
struct task_struct *thread;
|
||||||
unsigned int cpu;
|
unsigned int cpu;
|
||||||
int ret;
|
int ret = -1;
|
||||||
|
|
||||||
cpus_read_lock();
|
cpus_read_lock();
|
||||||
mutex_lock(&rdtgroup_mutex);
|
mutex_lock(&rdtgroup_mutex);
|
||||||
@ -866,9 +981,19 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
|
if (sel == 1)
|
||||||
cpu_to_node(cpu),
|
thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
|
||||||
"pseudo_lock_measure/%u", cpu);
|
cpu_to_node(cpu),
|
||||||
|
"pseudo_lock_measure/%u",
|
||||||
|
cpu);
|
||||||
|
else if (sel == 2)
|
||||||
|
thread = kthread_create_on_node(measure_cycles_perf_fn, plr,
|
||||||
|
cpu_to_node(cpu),
|
||||||
|
"pseudo_lock_measure/%u",
|
||||||
|
cpu);
|
||||||
|
else
|
||||||
|
goto out;
|
||||||
|
|
||||||
if (IS_ERR(thread)) {
|
if (IS_ERR(thread)) {
|
||||||
ret = PTR_ERR(thread);
|
ret = PTR_ERR(thread);
|
||||||
goto out;
|
goto out;
|
||||||
@ -897,19 +1022,21 @@ static ssize_t pseudo_lock_measure_trigger(struct file *file,
|
|||||||
size_t buf_size;
|
size_t buf_size;
|
||||||
char buf[32];
|
char buf[32];
|
||||||
int ret;
|
int ret;
|
||||||
bool bv;
|
int sel;
|
||||||
|
|
||||||
buf_size = min(count, (sizeof(buf) - 1));
|
buf_size = min(count, (sizeof(buf) - 1));
|
||||||
if (copy_from_user(buf, user_buf, buf_size))
|
if (copy_from_user(buf, user_buf, buf_size))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
buf[buf_size] = '\0';
|
buf[buf_size] = '\0';
|
||||||
ret = strtobool(buf, &bv);
|
ret = kstrtoint(buf, 10, &sel);
|
||||||
if (ret == 0 && bv) {
|
if (ret == 0) {
|
||||||
|
if (sel != 1 && sel != 2)
|
||||||
|
return -EINVAL;
|
||||||
ret = debugfs_file_get(file->f_path.dentry);
|
ret = debugfs_file_get(file->f_path.dentry);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
ret = pseudo_lock_measure_cycles(rdtgrp);
|
ret = pseudo_lock_measure_cycles(rdtgrp, sel);
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
ret = count;
|
ret = count;
|
||||||
debugfs_file_put(file->f_path.dentry);
|
debugfs_file_put(file->f_path.dentry);
|
||||||
|
@ -15,6 +15,16 @@ TRACE_EVENT(pseudo_lock_mem_latency,
|
|||||||
TP_printk("latency=%u", __entry->latency)
|
TP_printk("latency=%u", __entry->latency)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(pseudo_lock_l2,
|
||||||
|
TP_PROTO(u64 l2_hits, u64 l2_miss),
|
||||||
|
TP_ARGS(l2_hits, l2_miss),
|
||||||
|
TP_STRUCT__entry(__field(u64, l2_hits)
|
||||||
|
__field(u64, l2_miss)),
|
||||||
|
TP_fast_assign(__entry->l2_hits = l2_hits;
|
||||||
|
__entry->l2_miss = l2_miss;),
|
||||||
|
TP_printk("hits=%llu miss=%llu",
|
||||||
|
__entry->l2_hits, __entry->l2_miss));
|
||||||
|
|
||||||
#endif /* _TRACE_PSEUDO_LOCK_H */
|
#endif /* _TRACE_PSEUDO_LOCK_H */
|
||||||
|
|
||||||
#undef TRACE_INCLUDE_PATH
|
#undef TRACE_INCLUDE_PATH
|
||||||
|
Loading…
Reference in New Issue
Block a user