mirror of
https://github.com/torvalds/linux.git
synced 2024-12-27 21:33:00 +00:00
c46a7c817e
_PAGE_NUMA is currently an alias of _PROT_PROTNONE to trap NUMA hinting faults on x86. Care is taken such that _PAGE_NUMA is used only in situations where the VMA flags distinguish between NUMA hinting faults and prot_none faults. This decision was x86-specific and conceptually it is difficult requiring special casing to distinguish between PROTNONE and NUMA ptes based on context. Fundamentally, we only need the _PAGE_NUMA bit to tell the difference between an entry that is really unmapped and a page that is protected for NUMA hinting faults as if the PTE is not present then a fault will be trapped. Swap PTEs on x86-64 use the bits after _PAGE_GLOBAL for the offset. This patch shrinks the maximum possible swap size and uses the bit to uniquely distinguish between NUMA hinting ptes and swap ptes. Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Peter Anvin <hpa@zytor.com> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Steven Noonan <steven@uplinklabs.net> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
343 lines
10 KiB
C
343 lines
10 KiB
C
#ifndef _ASM_POWERPC_PGTABLE_H
|
|
#define _ASM_POWERPC_PGTABLE_H
|
|
#ifdef __KERNEL__
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/mmdebug.h>
|
|
#include <asm/processor.h> /* For TASK_SIZE */
|
|
#include <asm/mmu.h>
|
|
#include <asm/page.h>
|
|
|
|
struct mm_struct;
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#if defined(CONFIG_PPC64)
|
|
# include <asm/pgtable-ppc64.h>
|
|
#else
|
|
# include <asm/pgtable-ppc32.h>
|
|
#endif
|
|
|
|
/*
|
|
* We save the slot number & secondary bit in the second half of the
|
|
* PTE page. We use the 8 bytes per each pte entry.
|
|
*/
|
|
#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
/* Generic accessors to PTE bits */
|
|
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
|
|
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
|
|
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
|
|
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
|
|
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
|
|
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
|
|
static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
|
|
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
|
|
static inline int pte_present(pte_t pte)
|
|
{
|
|
return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA);
|
|
}
|
|
|
|
#define pte_present_nonuma pte_present_nonuma
|
|
static inline int pte_present_nonuma(pte_t pte)
|
|
{
|
|
return pte_val(pte) & (_PAGE_PRESENT);
|
|
}
|
|
|
|
#define pte_numa pte_numa
|
|
static inline int pte_numa(pte_t pte)
|
|
{
|
|
return (pte_val(pte) &
|
|
(_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
|
|
}
|
|
|
|
#define pte_mknonnuma pte_mknonnuma
|
|
static inline pte_t pte_mknonnuma(pte_t pte)
|
|
{
|
|
pte_val(pte) &= ~_PAGE_NUMA;
|
|
pte_val(pte) |= _PAGE_PRESENT | _PAGE_ACCESSED;
|
|
return pte;
|
|
}
|
|
|
|
#define pte_mknuma pte_mknuma
|
|
static inline pte_t pte_mknuma(pte_t pte)
|
|
{
|
|
/*
|
|
* We should not set _PAGE_NUMA on non present ptes. Also clear the
|
|
* present bit so that hash_page will return 1 and we collect this
|
|
* as numa fault.
|
|
*/
|
|
if (pte_present(pte)) {
|
|
pte_val(pte) |= _PAGE_NUMA;
|
|
pte_val(pte) &= ~_PAGE_PRESENT;
|
|
} else
|
|
VM_BUG_ON(1);
|
|
return pte;
|
|
}
|
|
|
|
#define ptep_set_numa ptep_set_numa
|
|
static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep)
|
|
{
|
|
if ((pte_val(*ptep) & _PAGE_PRESENT) == 0)
|
|
VM_BUG_ON(1);
|
|
|
|
pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0);
|
|
return;
|
|
}
|
|
|
|
#define pmd_numa pmd_numa
|
|
static inline int pmd_numa(pmd_t pmd)
|
|
{
|
|
return pte_numa(pmd_pte(pmd));
|
|
}
|
|
|
|
#define pmdp_set_numa pmdp_set_numa
|
|
static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
|
|
pmd_t *pmdp)
|
|
{
|
|
if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0)
|
|
VM_BUG_ON(1);
|
|
|
|
pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA);
|
|
return;
|
|
}
|
|
|
|
#define pmd_mknonnuma pmd_mknonnuma
|
|
static inline pmd_t pmd_mknonnuma(pmd_t pmd)
|
|
{
|
|
return pte_pmd(pte_mknonnuma(pmd_pte(pmd)));
|
|
}
|
|
|
|
#define pmd_mknuma pmd_mknuma
|
|
static inline pmd_t pmd_mknuma(pmd_t pmd)
|
|
{
|
|
return pte_pmd(pte_mknuma(pmd_pte(pmd)));
|
|
}
|
|
|
|
# else
|
|
|
|
static inline int pte_present(pte_t pte)
|
|
{
|
|
return pte_val(pte) & _PAGE_PRESENT;
|
|
}
|
|
#endif /* CONFIG_NUMA_BALANCING */
|
|
|
|
/* Conversion functions: convert a page and protection to a page entry,
|
|
* and a page entry and page directory to the page they refer to.
|
|
*
|
|
* Even if PTEs can be unsigned long long, a PFN is always an unsigned
|
|
* long for now.
|
|
*/
|
|
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
|
|
return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
|
|
pgprot_val(pgprot)); }
|
|
static inline unsigned long pte_pfn(pte_t pte) {
|
|
return pte_val(pte) >> PTE_RPN_SHIFT; }
|
|
|
|
/* Keep these as a macros to avoid include dependency mess */
|
|
#define pte_page(x) pfn_to_page(pte_pfn(x))
|
|
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
|
|
|
|
/* Generic modifiers for PTE bits */
|
|
static inline pte_t pte_wrprotect(pte_t pte) {
|
|
pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
|
|
static inline pte_t pte_mkclean(pte_t pte) {
|
|
pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
|
|
static inline pte_t pte_mkold(pte_t pte) {
|
|
pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
|
|
static inline pte_t pte_mkwrite(pte_t pte) {
|
|
pte_val(pte) |= _PAGE_RW; return pte; }
|
|
static inline pte_t pte_mkdirty(pte_t pte) {
|
|
pte_val(pte) |= _PAGE_DIRTY; return pte; }
|
|
static inline pte_t pte_mkyoung(pte_t pte) {
|
|
pte_val(pte) |= _PAGE_ACCESSED; return pte; }
|
|
static inline pte_t pte_mkspecial(pte_t pte) {
|
|
pte_val(pte) |= _PAGE_SPECIAL; return pte; }
|
|
static inline pte_t pte_mkhuge(pte_t pte) {
|
|
return pte; }
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
{
|
|
pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
|
|
return pte;
|
|
}
|
|
|
|
|
|
/* Insert a PTE, top-level function is out of line. It uses an inline
|
|
* low level function in the respective pgtable-* files
|
|
*/
|
|
extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
|
pte_t pte);
|
|
|
|
/* This low level function performs the actual PTE insertion
|
|
* Setting the PTE depends on the MMU type and other factors. It's
|
|
* an horrible mess that I'm not going to try to clean up now but
|
|
* I'm keeping it in one place rather than spread around
|
|
*/
|
|
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep, pte_t pte, int percpu)
|
|
{
|
|
#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
|
|
/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
|
|
* helper pte_update() which does an atomic update. We need to do that
|
|
* because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
|
|
* per-CPU PTE such as a kmap_atomic, we do a simple update preserving
|
|
* the hash bits instead (ie, same as the non-SMP case)
|
|
*/
|
|
if (percpu)
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
else
|
|
pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
|
|
|
|
#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
|
|
/* Second case is 32-bit with 64-bit PTE. In this case, we
|
|
* can just store as long as we do the two halves in the right order
|
|
* with a barrier in between. This is possible because we take care,
|
|
* in the hash code, to pre-invalidate if the PTE was already hashed,
|
|
* which synchronizes us with any concurrent invalidation.
|
|
* In the percpu case, we also fallback to the simple update preserving
|
|
* the hash bits
|
|
*/
|
|
if (percpu) {
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
return;
|
|
}
|
|
#if _PAGE_HASHPTE != 0
|
|
if (pte_val(*ptep) & _PAGE_HASHPTE)
|
|
flush_hash_entry(mm, ptep, addr);
|
|
#endif
|
|
__asm__ __volatile__("\
|
|
stw%U0%X0 %2,%0\n\
|
|
eieio\n\
|
|
stw%U0%X0 %L2,%1"
|
|
: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
|
|
: "r" (pte) : "memory");
|
|
|
|
#elif defined(CONFIG_PPC_STD_MMU_32)
|
|
/* Third case is 32-bit hash table in UP mode, we need to preserve
|
|
* the _PAGE_HASHPTE bit since we may not have invalidated the previous
|
|
* translation in the hash yet (done in a subsequent flush_tlb_xxx())
|
|
* and see we need to keep track that this PTE needs invalidating
|
|
*/
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
|
|
#else
|
|
/* Anything else just stores the PTE normally. That covers all 64-bit
|
|
* cases, and 32-bit non-hash with 32-bit PTEs.
|
|
*/
|
|
*ptep = pte;
|
|
#endif
|
|
}
|
|
|
|
|
|
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
|
|
pte_t *ptep, pte_t entry, int dirty);
|
|
|
|
/*
|
|
* Macro to mark a page protection value as "uncacheable".
|
|
*/
|
|
|
|
#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
|
|
_PAGE_WRITETHRU)
|
|
|
|
#define pgprot_noncached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
_PAGE_NO_CACHE | _PAGE_GUARDED))
|
|
|
|
#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
_PAGE_NO_CACHE))
|
|
|
|
#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
_PAGE_COHERENT))
|
|
|
|
#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
_PAGE_COHERENT | _PAGE_WRITETHRU))
|
|
|
|
#define pgprot_cached_noncoherent(prot) \
|
|
(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
|
|
|
|
#define pgprot_writecombine pgprot_noncached_wc
|
|
|
|
struct file;
|
|
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
unsigned long size, pgprot_t vma_prot);
|
|
#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
|
|
/*
|
|
* ZERO_PAGE is a global shared page that is always zero: used
|
|
* for zero-mapped memory areas etc..
|
|
*/
|
|
extern unsigned long empty_zero_page[];
|
|
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
|
|
extern pgd_t swapper_pg_dir[];
|
|
|
|
extern void paging_init(void);
|
|
|
|
/*
|
|
* kern_addr_valid is intended to indicate whether an address is a valid
|
|
* kernel address. Most 32-bit archs define it as always true (like this)
|
|
* but most 64-bit archs actually perform a test. What should we do here?
|
|
*/
|
|
#define kern_addr_valid(addr) (1)
|
|
|
|
#include <asm-generic/pgtable.h>
|
|
|
|
|
|
/*
|
|
* This gets called at the end of handling a page fault, when
|
|
* the kernel has put a new PTE into the page table for the process.
|
|
* We use it to ensure coherency between the i-cache and d-cache
|
|
* for the page which has just been mapped in.
|
|
* On machines which use an MMU hash table, we use this to put a
|
|
* corresponding HPTE into the hash table ahead of time, instead of
|
|
* waiting for the inevitable extra hash-table miss exception.
|
|
*/
|
|
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
|
|
|
|
extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
|
|
unsigned long end, int write, struct page **pages, int *nr);
|
|
|
|
extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
|
unsigned long end, int write, struct page **pages, int *nr);
|
|
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
|
|
#define pmd_large(pmd) 0
|
|
#define has_transparent_hugepage() 0
|
|
#endif
|
|
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
|
|
unsigned *shift);
|
|
|
|
static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva,
|
|
unsigned long *pte_sizep)
|
|
{
|
|
pte_t *ptep;
|
|
unsigned long ps = *pte_sizep;
|
|
unsigned int shift;
|
|
|
|
ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
|
|
if (!ptep)
|
|
return NULL;
|
|
if (shift)
|
|
*pte_sizep = 1ul << shift;
|
|
else
|
|
*pte_sizep = PAGE_SIZE;
|
|
|
|
if (ps > *pte_sizep)
|
|
return NULL;
|
|
|
|
return ptep;
|
|
}
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_PGTABLE_H */
|