forked from Minki/linux
x86/mm: Micro-optimise clflush_cache_range()
Whilst inspecting the asm for clflush_cache_range() and some perf profiles that required extensive flushing of single cachelines (from part of the intel-gpu-tools GPU benchmarks), we noticed that gcc was reloading boot_cpu_data.x86_clflush_size on every iteration of the loop. We can manually hoist that read which perf regarded as taking ~25% of the function time for a single cacheline flush. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Acked-by: "H. Peter Anvin" <hpa@zytor.com> Cc: Toshi Kani <toshi.kani@hpe.com> Cc: Borislav Petkov <bp@suse.de> Cc: Luis R. Rodriguez <mcgrof@suse.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Sai Praneeth <sai.praneeth.prakhya@intel.com> Link: http://lkml.kernel.org/r/1452246933-10890-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
2039e6acaf
commit
1f1a89ac05
@ -129,14 +129,16 @@ within(unsigned long addr, unsigned long start, unsigned long end)
|
||||
*/
|
||||
void clflush_cache_range(void *vaddr, unsigned int size)
|
||||
{
|
||||
unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
|
||||
const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
|
||||
void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
|
||||
void *vend = vaddr + size;
|
||||
void *p;
|
||||
|
||||
if (p >= vend)
|
||||
return;
|
||||
|
||||
mb();
|
||||
|
||||
for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
|
||||
p < vend; p += boot_cpu_data.x86_clflush_size)
|
||||
for (; p < vend; p += clflush_size)
|
||||
clflushopt(p);
|
||||
|
||||
mb();
|
||||
|
Loading…
Reference in New Issue
Block a user