2005-04-16 22:20:36 +00:00
|
|
|
/*
|
2008-08-02 09:55:55 +00:00
|
|
|
* arch/arm/include/asm/tlbflush.h
|
2005-04-16 22:20:36 +00:00
|
|
|
*
|
|
|
|
* Copyright (C) 1999-2003 Russell King
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
#ifndef _ASMARM_TLBFLUSH_H
|
|
|
|
#define _ASMARM_TLBFLUSH_H
|
|
|
|
|
2011-02-20 12:27:49 +00:00
|
|
|
#ifdef CONFIG_MMU
|
2006-02-24 21:41:25 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
#include <asm/glue.h>
|
|
|
|
|
|
|
|
#define TLB_V4_U_PAGE (1 << 1)
|
|
|
|
#define TLB_V4_D_PAGE (1 << 2)
|
|
|
|
#define TLB_V4_I_PAGE (1 << 3)
|
|
|
|
#define TLB_V6_U_PAGE (1 << 4)
|
|
|
|
#define TLB_V6_D_PAGE (1 << 5)
|
|
|
|
#define TLB_V6_I_PAGE (1 << 6)
|
|
|
|
|
|
|
|
#define TLB_V4_U_FULL (1 << 9)
|
|
|
|
#define TLB_V4_D_FULL (1 << 10)
|
|
|
|
#define TLB_V4_I_FULL (1 << 11)
|
|
|
|
#define TLB_V6_U_FULL (1 << 12)
|
|
|
|
#define TLB_V6_D_FULL (1 << 13)
|
|
|
|
#define TLB_V6_I_FULL (1 << 14)
|
|
|
|
|
|
|
|
#define TLB_V6_U_ASID (1 << 16)
|
|
|
|
#define TLB_V6_D_ASID (1 << 17)
|
|
|
|
#define TLB_V6_I_ASID (1 << 18)
|
|
|
|
|
2013-02-28 16:48:11 +00:00
|
|
|
#define TLB_V6_BP (1 << 19)
|
|
|
|
|
2009-05-30 13:00:14 +00:00
|
|
|
/* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
|
2013-02-28 16:48:11 +00:00
|
|
|
#define TLB_V7_UIS_PAGE (1 << 20)
|
|
|
|
#define TLB_V7_UIS_FULL (1 << 21)
|
|
|
|
#define TLB_V7_UIS_ASID (1 << 22)
|
|
|
|
#define TLB_V7_UIS_BP (1 << 23)
|
2009-05-30 13:00:14 +00:00
|
|
|
|
2011-07-05 08:01:13 +00:00
|
|
|
#define TLB_BARRIER (1 << 28)
|
2008-06-22 20:45:04 +00:00
|
|
|
#define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */
|
2005-04-16 22:20:36 +00:00
|
|
|
#define TLB_DCLEAN (1 << 30)
|
|
|
|
#define TLB_WB (1 << 31)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MMU TLB Model
|
|
|
|
* =============
|
|
|
|
*
|
|
|
|
* We have the following to choose from:
|
|
|
|
* v4 - ARMv4 without write buffer
|
|
|
|
* v4wb - ARMv4 with write buffer without I TLB flush entry instruction
|
|
|
|
* v4wbi - ARMv4 with write buffer with I TLB flush entry instruction
|
2008-06-22 20:45:04 +00:00
|
|
|
* fr - Feroceon (v4wbi with non-outer-cacheable page table walks)
|
2011-07-05 08:01:13 +00:00
|
|
|
* fa - Faraday (v4 with write buffer with UTLB)
|
2005-04-16 22:20:36 +00:00
|
|
|
* v6wbi - ARMv6 with write buffer with I TLB flush entry instruction
|
2008-08-11 23:04:15 +00:00
|
|
|
* v7wbi - identical to v6wbi
|
2005-04-16 22:20:36 +00:00
|
|
|
*/
|
|
|
|
#undef _TLB
|
|
|
|
#undef MULTI_TLB
|
|
|
|
|
2010-09-04 09:47:48 +00:00
|
|
|
#ifdef CONFIG_SMP_ON_UP
|
|
|
|
#define MULTI_TLB 1
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
#define v4_tlb_flags (TLB_V4_U_FULL | TLB_V4_U_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_V4WT
|
|
|
|
# define v4_possible_flags v4_tlb_flags
|
|
|
|
# define v4_always_flags v4_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v4
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v4_possible_flags 0
|
|
|
|
# define v4_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2011-07-05 08:01:13 +00:00
|
|
|
#define fa_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
|
2009-03-25 11:10:01 +00:00
|
|
|
TLB_V4_U_FULL | TLB_V4_U_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_FA
|
|
|
|
# define fa_possible_flags fa_tlb_flags
|
|
|
|
# define fa_always_flags fa_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB fa
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define fa_possible_flags 0
|
|
|
|
# define fa_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
#define v4wbi_tlb_flags (TLB_WB | TLB_DCLEAN | \
|
|
|
|
TLB_V4_I_FULL | TLB_V4_D_FULL | \
|
|
|
|
TLB_V4_I_PAGE | TLB_V4_D_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_V4WBI
|
|
|
|
# define v4wbi_possible_flags v4wbi_tlb_flags
|
|
|
|
# define v4wbi_always_flags v4wbi_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v4wbi
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v4wbi_possible_flags 0
|
|
|
|
# define v4wbi_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2008-06-22 20:45:04 +00:00
|
|
|
#define fr_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_L2CLEAN_FR | \
|
|
|
|
TLB_V4_I_FULL | TLB_V4_D_FULL | \
|
|
|
|
TLB_V4_I_PAGE | TLB_V4_D_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_FEROCEON
|
|
|
|
# define fr_possible_flags fr_tlb_flags
|
|
|
|
# define fr_always_flags fr_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v4wbi
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define fr_possible_flags 0
|
|
|
|
# define fr_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
#define v4wb_tlb_flags (TLB_WB | TLB_DCLEAN | \
|
|
|
|
TLB_V4_I_FULL | TLB_V4_D_FULL | \
|
|
|
|
TLB_V4_D_PAGE)
|
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_V4WB
|
|
|
|
# define v4wb_possible_flags v4wb_tlb_flags
|
|
|
|
# define v4wb_always_flags v4wb_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v4wb
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v4wb_possible_flags 0
|
|
|
|
# define v4wb_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2011-07-05 08:01:13 +00:00
|
|
|
#define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
|
2005-04-16 22:20:36 +00:00
|
|
|
TLB_V6_I_FULL | TLB_V6_D_FULL | \
|
|
|
|
TLB_V6_I_PAGE | TLB_V6_D_PAGE | \
|
2013-02-28 16:48:11 +00:00
|
|
|
TLB_V6_I_ASID | TLB_V6_D_ASID | \
|
|
|
|
TLB_V6_BP)
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
#ifdef CONFIG_CPU_TLB_V6
|
|
|
|
# define v6wbi_possible_flags v6wbi_tlb_flags
|
|
|
|
# define v6wbi_always_flags v6wbi_tlb_flags
|
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v6wbi
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v6wbi_possible_flags 0
|
|
|
|
# define v6wbi_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2013-04-03 16:16:57 +00:00
|
|
|
#define v7wbi_tlb_flags_smp (TLB_WB | TLB_BARRIER | \
|
2013-02-28 16:48:11 +00:00
|
|
|
TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | \
|
|
|
|
TLB_V7_UIS_ASID | TLB_V7_UIS_BP)
|
2011-07-05 08:01:13 +00:00
|
|
|
#define v7wbi_tlb_flags_up (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
|
2013-02-28 16:48:11 +00:00
|
|
|
TLB_V6_U_FULL | TLB_V6_U_PAGE | \
|
|
|
|
TLB_V6_U_ASID | TLB_V6_BP)
|
2009-05-30 13:00:14 +00:00
|
|
|
|
2007-05-18 10:25:31 +00:00
|
|
|
#ifdef CONFIG_CPU_TLB_V7
|
2010-09-04 09:47:48 +00:00
|
|
|
|
|
|
|
# ifdef CONFIG_SMP_ON_UP
|
|
|
|
# define v7wbi_possible_flags (v7wbi_tlb_flags_smp | v7wbi_tlb_flags_up)
|
|
|
|
# define v7wbi_always_flags (v7wbi_tlb_flags_smp & v7wbi_tlb_flags_up)
|
|
|
|
# elif defined(CONFIG_SMP)
|
|
|
|
# define v7wbi_possible_flags v7wbi_tlb_flags_smp
|
|
|
|
# define v7wbi_always_flags v7wbi_tlb_flags_smp
|
|
|
|
# else
|
|
|
|
# define v7wbi_possible_flags v7wbi_tlb_flags_up
|
|
|
|
# define v7wbi_always_flags v7wbi_tlb_flags_up
|
|
|
|
# endif
|
2007-05-18 10:25:31 +00:00
|
|
|
# ifdef _TLB
|
|
|
|
# define MULTI_TLB 1
|
|
|
|
# else
|
|
|
|
# define _TLB v7wbi
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define v7wbi_possible_flags 0
|
|
|
|
# define v7wbi_always_flags (-1UL)
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
#ifndef _TLB
|
|
|
|
#error Unknown TLB model
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-20 21:22:52 +00:00
|
|
|
#include <linux/sched.h>
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
struct cpu_tlb_fns {
|
|
|
|
void (*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *);
|
|
|
|
void (*flush_kern_range)(unsigned long, unsigned long);
|
|
|
|
unsigned long tlb_flags;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Select the calling method
|
|
|
|
*/
|
|
|
|
#ifdef MULTI_TLB
|
|
|
|
|
|
|
|
#define __cpu_flush_user_tlb_range cpu_tlb.flush_user_range
|
|
|
|
#define __cpu_flush_kern_tlb_range cpu_tlb.flush_kern_range
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define __cpu_flush_user_tlb_range __glue(_TLB,_flush_user_tlb_range)
|
|
|
|
#define __cpu_flush_kern_tlb_range __glue(_TLB,_flush_kern_tlb_range)
|
|
|
|
|
|
|
|
extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
|
|
|
|
extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
extern struct cpu_tlb_fns cpu_tlb;
|
|
|
|
|
|
|
|
#define __cpu_tlb_flags cpu_tlb.tlb_flags
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TLB Management
|
|
|
|
* ==============
|
|
|
|
*
|
|
|
|
* The arch/arm/mm/tlb-*.S files implement these methods.
|
|
|
|
*
|
|
|
|
* The TLB specific code is expected to perform whatever tests it
|
|
|
|
* needs to determine if it should invalidate the TLB for each
|
|
|
|
* call. Start addresses are inclusive and end addresses are
|
|
|
|
* exclusive; it is safe to round these addresses down.
|
|
|
|
*
|
|
|
|
* flush_tlb_all()
|
|
|
|
*
|
|
|
|
* Invalidate the entire TLB.
|
|
|
|
*
|
|
|
|
* flush_tlb_mm(mm)
|
|
|
|
*
|
|
|
|
* Invalidate all TLB entries in a particular address
|
|
|
|
* space.
|
|
|
|
* - mm - mm_struct describing address space
|
|
|
|
*
|
|
|
|
* flush_tlb_range(mm,start,end)
|
|
|
|
*
|
|
|
|
* Invalidate a range of TLB entries in the specified
|
|
|
|
* address space.
|
|
|
|
* - mm - mm_struct describing address space
|
|
|
|
* - start - start address (may not be aligned)
|
|
|
|
* - end - end address (exclusive, may not be aligned)
|
|
|
|
*
|
|
|
|
* flush_tlb_page(vaddr,vma)
|
|
|
|
*
|
|
|
|
* Invalidate the specified page in the specified address range.
|
|
|
|
* - vaddr - virtual address (may not be aligned)
|
|
|
|
* - vma - vma_struct describing address range
|
|
|
|
*
|
|
|
|
* flush_kern_tlb_page(kaddr)
|
|
|
|
*
|
|
|
|
* Invalidate the TLB entry for the specified page. The address
|
|
|
|
* will be in the kernels virtual memory space. Current uses
|
|
|
|
* only require the D-TLB to be invalidated.
|
|
|
|
* - kaddr - Kernel virtual memory address
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We optimise the code below by:
|
|
|
|
* - building a set of TLB flags that might be set in __cpu_tlb_flags
|
|
|
|
* - building a set of TLB flags that will always be set in __cpu_tlb_flags
|
|
|
|
* - if we're going to need __cpu_tlb_flags, access it once and only once
|
|
|
|
*
|
|
|
|
* This allows us to build optimal assembly for the single-CPU type case,
|
|
|
|
* and as close to optimal given the compiler constrants for multi-CPU
|
|
|
|
* case. We could do better for the multi-CPU case if the compiler
|
|
|
|
* implemented the "%?" method, but this has been discontinued due to too
|
|
|
|
* many people getting it wrong.
|
|
|
|
*/
|
2012-05-04 11:04:26 +00:00
|
|
|
#define possible_tlb_flags (v4_possible_flags | \
|
2005-04-16 22:20:36 +00:00
|
|
|
v4wbi_possible_flags | \
|
2008-06-22 20:45:04 +00:00
|
|
|
fr_possible_flags | \
|
2005-04-16 22:20:36 +00:00
|
|
|
v4wb_possible_flags | \
|
2009-03-25 11:10:01 +00:00
|
|
|
fa_possible_flags | \
|
2008-08-11 23:04:15 +00:00
|
|
|
v6wbi_possible_flags | \
|
|
|
|
v7wbi_possible_flags)
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2012-05-04 11:04:26 +00:00
|
|
|
#define always_tlb_flags (v4_always_flags & \
|
2005-04-16 22:20:36 +00:00
|
|
|
v4wbi_always_flags & \
|
2008-06-22 20:45:04 +00:00
|
|
|
fr_always_flags & \
|
2005-04-16 22:20:36 +00:00
|
|
|
v4wb_always_flags & \
|
2009-03-25 11:10:01 +00:00
|
|
|
fa_always_flags & \
|
2008-08-11 23:04:15 +00:00
|
|
|
v6wbi_always_flags & \
|
|
|
|
v7wbi_always_flags)
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
#define tlb_flag(f) ((always_tlb_flags & (f)) || (__tlb_flag & possible_tlb_flags & (f)))
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
#define __tlb_op(f, insnarg, arg) \
|
|
|
|
do { \
|
|
|
|
if (always_tlb_flags & (f)) \
|
|
|
|
asm("mcr " insnarg \
|
|
|
|
: : "r" (arg) : "cc"); \
|
|
|
|
else if (possible_tlb_flags & (f)) \
|
|
|
|
asm("tst %1, %2\n\t" \
|
|
|
|
"mcrne " insnarg \
|
|
|
|
: : "r" (arg), "r" (__tlb_flag), "Ir" (f) \
|
|
|
|
: "cc"); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define tlb_op(f, regs, arg) __tlb_op(f, "p15, 0, %0, " regs, arg)
|
|
|
|
#define tlb_l2_op(f, regs, arg) __tlb_op(f, "p15, 1, %0, " regs, arg)
|
|
|
|
|
2005-06-28 12:40:39 +00:00
|
|
|
static inline void local_flush_tlb_all(void)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2007-02-05 13:47:51 +00:00
|
|
|
dsb();
|
2005-04-16 22:20:36 +00:00
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero);
|
|
|
|
tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero);
|
|
|
|
tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero);
|
|
|
|
tlb_op(TLB_V7_UIS_FULL, "c8, c3, 0", zero);
|
2007-02-05 13:47:51 +00:00
|
|
|
|
2011-07-05 08:01:13 +00:00
|
|
|
if (tlb_flag(TLB_BARRIER)) {
|
2010-05-07 17:03:05 +00:00
|
|
|
dsb();
|
|
|
|
isb();
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2005-06-28 12:40:39 +00:00
|
|
|
static inline void local_flush_tlb_mm(struct mm_struct *mm)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const int asid = ASID(mm);
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2007-02-05 13:47:51 +00:00
|
|
|
dsb();
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2013-01-14 20:48:55 +00:00
|
|
|
if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) {
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) {
|
|
|
|
tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero);
|
|
|
|
tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero);
|
|
|
|
tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero);
|
|
|
|
}
|
|
|
|
put_cpu();
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
|
|
|
|
tlb_op(TLB_V6_U_ASID, "c8, c7, 2", asid);
|
|
|
|
tlb_op(TLB_V6_D_ASID, "c8, c6, 2", asid);
|
|
|
|
tlb_op(TLB_V6_I_ASID, "c8, c5, 2", asid);
|
2010-08-05 10:20:51 +00:00
|
|
|
#ifdef CONFIG_ARM_ERRATA_720789
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V7_UIS_ASID, "c8, c3, 0", zero);
|
2010-08-05 10:20:51 +00:00
|
|
|
#else
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V7_UIS_ASID, "c8, c3, 2", asid);
|
2010-08-05 10:20:51 +00:00
|
|
|
#endif
|
2007-02-05 13:47:51 +00:00
|
|
|
|
2011-07-05 08:01:13 +00:00
|
|
|
if (tlb_flag(TLB_BARRIER))
|
2010-05-07 17:03:05 +00:00
|
|
|
dsb();
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
2005-06-28 12:40:39 +00:00
|
|
|
local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2007-02-05 13:47:51 +00:00
|
|
|
dsb();
|
2005-04-16 22:20:36 +00:00
|
|
|
|
2013-01-14 20:48:55 +00:00
|
|
|
if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) &&
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
|
|
|
|
tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr);
|
|
|
|
tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", uaddr);
|
|
|
|
tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", uaddr);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL))
|
2006-08-30 14:02:08 +00:00
|
|
|
asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", uaddr);
|
|
|
|
tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", uaddr);
|
|
|
|
tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", uaddr);
|
2010-08-05 10:20:51 +00:00
|
|
|
#ifdef CONFIG_ARM_ERRATA_720789
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 3", uaddr & PAGE_MASK);
|
2010-08-05 10:20:51 +00:00
|
|
|
#else
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", uaddr);
|
2010-08-05 10:20:51 +00:00
|
|
|
#endif
|
2007-02-05 13:47:51 +00:00
|
|
|
|
2011-07-05 08:01:13 +00:00
|
|
|
if (tlb_flag(TLB_BARRIER))
|
2010-05-07 17:03:05 +00:00
|
|
|
dsb();
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2005-06-28 12:40:39 +00:00
|
|
|
static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
kaddr &= PAGE_MASK;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_WB))
|
2007-02-05 13:47:51 +00:00
|
|
|
dsb();
|
2005-04-16 22:20:36 +00:00
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr);
|
|
|
|
tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr);
|
|
|
|
tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr);
|
2005-04-16 22:20:36 +00:00
|
|
|
if (!tlb_flag(TLB_V4_I_PAGE) && tlb_flag(TLB_V4_I_FULL))
|
2006-08-30 14:02:08 +00:00
|
|
|
asm("mcr p15, 0, %0, c8, c5, 0" : : "r" (zero) : "cc");
|
2005-04-16 22:20:36 +00:00
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_V6_U_PAGE, "c8, c7, 1", kaddr);
|
|
|
|
tlb_op(TLB_V6_D_PAGE, "c8, c6, 1", kaddr);
|
|
|
|
tlb_op(TLB_V6_I_PAGE, "c8, c5, 1", kaddr);
|
|
|
|
tlb_op(TLB_V7_UIS_PAGE, "c8, c3, 1", kaddr);
|
2006-03-07 14:42:27 +00:00
|
|
|
|
2011-07-05 08:01:13 +00:00
|
|
|
if (tlb_flag(TLB_BARRIER)) {
|
2010-05-07 17:03:05 +00:00
|
|
|
dsb();
|
|
|
|
isb();
|
|
|
|
}
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2013-02-28 16:48:11 +00:00
|
|
|
static inline void local_flush_bp_all(void)
|
|
|
|
{
|
|
|
|
const int zero = 0;
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_V7_UIS_BP))
|
|
|
|
asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero));
|
|
|
|
else if (tlb_flag(TLB_V6_BP))
|
|
|
|
asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero));
|
|
|
|
|
|
|
|
if (tlb_flag(TLB_BARRIER))
|
|
|
|
isb();
|
|
|
|
}
|
|
|
|
|
2013-03-26 22:35:04 +00:00
|
|
|
#ifdef CONFIG_ARM_ERRATA_798181
|
|
|
|
static inline void dummy_flush_tlb_a15_erratum(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0.
|
|
|
|
*/
|
|
|
|
asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0));
|
|
|
|
dsb();
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void dummy_flush_tlb_a15_erratum(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
/*
|
|
|
|
* flush_pmd_entry
|
|
|
|
*
|
|
|
|
* Flush a PMD entry (word aligned, or double-word aligned) to
|
|
|
|
* RAM if the TLB for the CPU we are running on requires this.
|
|
|
|
* This is typically used when we are creating PMD entries.
|
|
|
|
*
|
|
|
|
* clean_pmd_entry
|
|
|
|
*
|
|
|
|
* Clean (but don't drain the write buffer) if the CPU requires
|
|
|
|
* these operations. This is typically used when we are removing
|
|
|
|
* PMD entries.
|
|
|
|
*/
|
2011-09-05 16:51:56 +00:00
|
|
|
static inline void flush_pmd_entry(void *pmd)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_DCLEAN, "c7, c10, 1 @ flush_pmd", pmd);
|
|
|
|
tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1 @ L2 flush_pmd", pmd);
|
2008-06-22 20:45:04 +00:00
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
if (tlb_flag(TLB_WB))
|
2007-02-05 13:47:51 +00:00
|
|
|
dsb();
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
2011-09-05 16:51:56 +00:00
|
|
|
static inline void clean_pmd_entry(void *pmd)
|
2005-04-16 22:20:36 +00:00
|
|
|
{
|
|
|
|
const unsigned int __tlb_flag = __cpu_tlb_flags;
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
tlb_op(TLB_DCLEAN, "c7, c10, 1 @ flush_pmd", pmd);
|
|
|
|
tlb_l2_op(TLB_L2CLEAN_FR, "c15, c9, 1 @ L2 flush_pmd", pmd);
|
2005-04-16 22:20:36 +00:00
|
|
|
}
|
|
|
|
|
ARM: Optimize multi-CPU tlb flushing a little more
The compiler does not conditionalize the assembly instructions for
the tlb operations, which leads to sub-optimal code being generated
when building a kernel for multiple CPUs.
We can tweak things fairly simply as the code fragment below shows:
17f8: e3120001 tst r2, #1 ; 0x1
...
1800: 0a000000 beq 1808 <handle_pte_fault+0x194>
1804: ee061f10 mcr 15, 0, r1, cr6, cr0, {0}
1808: e3120004 tst r2, #4 ; 0x4
180c: 0a000000 beq 1814 <handle_pte_fault+0x1a0>
1810: ee081f36 mcr 15, 0, r1, cr8, cr6, {1}
becomes:
17f0: e3120001 tst r2, #1 ; 0x1
17f4: 1e063f10 mcrne 15, 0, r3, cr6, cr0, {0}
17f8: e3120004 tst r2, #4 ; 0x4
17fc: 1e083f36 mcrne 15, 0, r3, cr8, cr6, {1}
Overall, for Realview with V6 and V7 CPUs configured:
text data bss dec hex filename
4153998 207340 5371036 9732374 948116 ../build/realview/vmlinux.before
4153366 207332 5371036 9731734 947e96 ../build/realview/vmlinux.after
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2012-02-04 10:55:38 +00:00
|
|
|
#undef tlb_op
|
2005-04-16 22:20:36 +00:00
|
|
|
#undef tlb_flag
|
|
|
|
#undef always_tlb_flags
|
|
|
|
#undef possible_tlb_flags
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert calls to our calling convention.
|
|
|
|
*/
|
2005-06-28 12:40:39 +00:00
|
|
|
#define local_flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma)
|
|
|
|
#define local_flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)
|
|
|
|
|
|
|
|
#ifndef CONFIG_SMP
|
|
|
|
#define flush_tlb_all local_flush_tlb_all
|
|
|
|
#define flush_tlb_mm local_flush_tlb_mm
|
|
|
|
#define flush_tlb_page local_flush_tlb_page
|
|
|
|
#define flush_tlb_kernel_page local_flush_tlb_kernel_page
|
|
|
|
#define flush_tlb_range local_flush_tlb_range
|
|
|
|
#define flush_tlb_kernel_range local_flush_tlb_kernel_range
|
2013-02-28 16:48:11 +00:00
|
|
|
#define flush_bp_all local_flush_bp_all
|
2005-06-28 12:40:39 +00:00
|
|
|
#else
|
|
|
|
extern void flush_tlb_all(void);
|
|
|
|
extern void flush_tlb_mm(struct mm_struct *mm);
|
|
|
|
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
|
|
|
|
extern void flush_tlb_kernel_page(unsigned long kaddr);
|
|
|
|
extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
|
|
|
|
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
|
2013-02-28 16:48:11 +00:00
|
|
|
extern void flush_bp_all(void);
|
2005-06-28 12:40:39 +00:00
|
|
|
#endif
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
/*
|
2010-09-13 14:57:36 +00:00
|
|
|
* If PG_dcache_clean is not set for the page, we need to ensure that any
|
2005-04-16 22:20:36 +00:00
|
|
|
* cache entries for the kernels virtual memory range are written
|
2010-09-13 14:58:06 +00:00
|
|
|
* back to the page. On ARMv6 and later, the cache coherency is handled via
|
|
|
|
* the set_pte_at() function.
|
2005-04-16 22:20:36 +00:00
|
|
|
*/
|
2010-09-13 14:58:06 +00:00
|
|
|
#if __LINUX_ARM_ARCH__ < 6
|
MM: Pass a PTE pointer to update_mmu_cache() rather than the PTE itself
On VIVT ARM, when we have multiple shared mappings of the same file
in the same MM, we need to ensure that we have coherency across all
copies. We do this via make_coherent() by making the pages
uncacheable.
This used to work fine, until we allowed highmem with highpte - we
now have a page table which is mapped as required, and is not available
for modification via update_mmu_cache().
Ralf Beache suggested getting rid of the PTE value passed to
update_mmu_cache():
On MIPS update_mmu_cache() calls __update_tlb() which walks pagetables
to construct a pointer to the pte again. Passing a pte_t * is much
more elegant. Maybe we might even replace the pte argument with the
pte_t?
Ben Herrenschmidt would also like the pte pointer for PowerPC:
Passing the ptep in there is exactly what I want. I want that
-instead- of the PTE value, because I have issue on some ppc cases,
for I$/D$ coherency, where set_pte_at() may decide to mask out the
_PAGE_EXEC.
So, pass in the mapped page table pointer into update_mmu_cache(), and
remove the PTE value, updating all implementations and call sites to
suit.
Includes a fix from Stephen Rothwell:
sparc: fix fallout from update_mmu_cache API change
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2009-12-18 16:40:18 +00:00
|
|
|
extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
|
|
|
|
pte_t *ptep);
|
2010-09-13 14:58:06 +00:00
|
|
|
#else
|
|
|
|
static inline void update_mmu_cache(struct vm_area_struct *vma,
|
|
|
|
unsigned long addr, pte_t *ptep)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
2005-04-16 22:20:36 +00:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2006-02-24 21:41:25 +00:00
|
|
|
#endif /* CONFIG_MMU */
|
|
|
|
|
2005-04-16 22:20:36 +00:00
|
|
|
#endif
|