e2b3d202d1
We will be switching PMD_SHIFT to 24 bits to facilitate THP impmenetation. With PMD_SHIFT set to 24, we now have 16MB huge pages allocated at PGD level. That means with 32 bit process we cannot allocate normal pages at all, because we cover the entire address space with one pgd entry. Fix this by switching to a new page table format for hugepages. With the new page table format for 16GB and 16MB hugepages we won't allocate hugepage directory. Instead we encode the PTE information directly at the directory level. This forces 16MB hugepage at PMD level. This will also make the page take walk much simpler later when we add the THP support. With the new table format we have 4 cases for pgds and pmds: (1) invalid (all zeroes) (2) pointer to next table, as normal; bottom 6 bits == 0 (3) leaf pte for huge page, bottom two bits != 00 (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
227 lines
8.0 KiB
C
227 lines
8.0 KiB
C
#ifndef _ASM_POWERPC_PGTABLE_H
|
|
#define _ASM_POWERPC_PGTABLE_H
|
|
#ifdef __KERNEL__
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <asm/processor.h> /* For TASK_SIZE */
|
|
#include <asm/mmu.h>
|
|
#include <asm/page.h>
|
|
|
|
struct mm_struct;
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#if defined(CONFIG_PPC64)
|
|
# include <asm/pgtable-ppc64.h>
|
|
#else
|
|
# include <asm/pgtable-ppc32.h>
|
|
#endif
|
|
|
|
/*
|
|
* We save the slot number & secondary bit in the second half of the
|
|
* PTE page. We use the 8 bytes per each pte entry.
|
|
*/
|
|
#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
/* Generic accessors to PTE bits */
|
|
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
|
|
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
|
|
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
|
|
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
|
|
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
|
|
static inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; }
|
|
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
|
|
static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
|
|
|
|
/* Conversion functions: convert a page and protection to a page entry,
|
|
* and a page entry and page directory to the page they refer to.
|
|
*
|
|
* Even if PTEs can be unsigned long long, a PFN is always an unsigned
|
|
* long for now.
|
|
*/
|
|
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
|
|
return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
|
|
pgprot_val(pgprot)); }
|
|
static inline unsigned long pte_pfn(pte_t pte) {
|
|
return pte_val(pte) >> PTE_RPN_SHIFT; }
|
|
|
|
/* Keep these as a macros to avoid include dependency mess */
|
|
#define pte_page(x) pfn_to_page(pte_pfn(x))
|
|
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
|
|
|
|
/* Generic modifiers for PTE bits */
|
|
static inline pte_t pte_wrprotect(pte_t pte) {
|
|
pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
|
|
static inline pte_t pte_mkclean(pte_t pte) {
|
|
pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
|
|
static inline pte_t pte_mkold(pte_t pte) {
|
|
pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
|
|
static inline pte_t pte_mkwrite(pte_t pte) {
|
|
pte_val(pte) |= _PAGE_RW; return pte; }
|
|
static inline pte_t pte_mkdirty(pte_t pte) {
|
|
pte_val(pte) |= _PAGE_DIRTY; return pte; }
|
|
static inline pte_t pte_mkyoung(pte_t pte) {
|
|
pte_val(pte) |= _PAGE_ACCESSED; return pte; }
|
|
static inline pte_t pte_mkspecial(pte_t pte) {
|
|
pte_val(pte) |= _PAGE_SPECIAL; return pte; }
|
|
static inline pte_t pte_mkhuge(pte_t pte) {
|
|
return pte; }
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
{
|
|
pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
|
|
return pte;
|
|
}
|
|
|
|
|
|
/* Insert a PTE, top-level function is out of line. It uses an inline
|
|
* low level function in the respective pgtable-* files
|
|
*/
|
|
extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
|
pte_t pte);
|
|
|
|
/* This low level function performs the actual PTE insertion
|
|
* Setting the PTE depends on the MMU type and other factors. It's
|
|
* an horrible mess that I'm not going to try to clean up now but
|
|
* I'm keeping it in one place rather than spread around
|
|
*/
|
|
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
pte_t *ptep, pte_t pte, int percpu)
|
|
{
|
|
#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
|
|
/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
|
|
* helper pte_update() which does an atomic update. We need to do that
|
|
* because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
|
|
* per-CPU PTE such as a kmap_atomic, we do a simple update preserving
|
|
* the hash bits instead (ie, same as the non-SMP case)
|
|
*/
|
|
if (percpu)
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
else
|
|
pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
|
|
|
|
#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
|
|
/* Second case is 32-bit with 64-bit PTE. In this case, we
|
|
* can just store as long as we do the two halves in the right order
|
|
* with a barrier in between. This is possible because we take care,
|
|
* in the hash code, to pre-invalidate if the PTE was already hashed,
|
|
* which synchronizes us with any concurrent invalidation.
|
|
* In the percpu case, we also fallback to the simple update preserving
|
|
* the hash bits
|
|
*/
|
|
if (percpu) {
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
return;
|
|
}
|
|
#if _PAGE_HASHPTE != 0
|
|
if (pte_val(*ptep) & _PAGE_HASHPTE)
|
|
flush_hash_entry(mm, ptep, addr);
|
|
#endif
|
|
__asm__ __volatile__("\
|
|
stw%U0%X0 %2,%0\n\
|
|
eieio\n\
|
|
stw%U0%X0 %L2,%1"
|
|
: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
|
|
: "r" (pte) : "memory");
|
|
|
|
#elif defined(CONFIG_PPC_STD_MMU_32)
|
|
/* Third case is 32-bit hash table in UP mode, we need to preserve
|
|
* the _PAGE_HASHPTE bit since we may not have invalidated the previous
|
|
* translation in the hash yet (done in a subsequent flush_tlb_xxx())
|
|
* and see we need to keep track that this PTE needs invalidating
|
|
*/
|
|
*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
|
|
| (pte_val(pte) & ~_PAGE_HASHPTE));
|
|
|
|
#else
|
|
/* Anything else just stores the PTE normally. That covers all 64-bit
|
|
* cases, and 32-bit non-hash with 32-bit PTEs.
|
|
*/
|
|
*ptep = pte;
|
|
#endif
|
|
}
|
|
|
|
|
|
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
|
|
pte_t *ptep, pte_t entry, int dirty);
|
|
|
|
/*
|
|
* Macro to mark a page protection value as "uncacheable".
|
|
*/
|
|
|
|
#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
|
|
_PAGE_WRITETHRU)
|
|
|
|
#define pgprot_noncached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
_PAGE_NO_CACHE | _PAGE_GUARDED))
|
|
|
|
#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
_PAGE_NO_CACHE))
|
|
|
|
#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
_PAGE_COHERENT))
|
|
|
|
#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
|
|
_PAGE_COHERENT | _PAGE_WRITETHRU))
|
|
|
|
#define pgprot_cached_noncoherent(prot) \
|
|
(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
|
|
|
|
#define pgprot_writecombine pgprot_noncached_wc
|
|
|
|
struct file;
|
|
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
unsigned long size, pgprot_t vma_prot);
|
|
#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
|
|
/*
|
|
* ZERO_PAGE is a global shared page that is always zero: used
|
|
* for zero-mapped memory areas etc..
|
|
*/
|
|
extern unsigned long empty_zero_page[];
|
|
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
|
|
extern pgd_t swapper_pg_dir[];
|
|
|
|
extern void paging_init(void);
|
|
|
|
/*
|
|
* kern_addr_valid is intended to indicate whether an address is a valid
|
|
* kernel address. Most 32-bit archs define it as always true (like this)
|
|
* but most 64-bit archs actually perform a test. What should we do here?
|
|
*/
|
|
#define kern_addr_valid(addr) (1)
|
|
|
|
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
|
|
remap_pfn_range(vma, vaddr, pfn, size, prot)
|
|
|
|
#include <asm-generic/pgtable.h>
|
|
|
|
|
|
/*
|
|
* This gets called at the end of handling a page fault, when
|
|
* the kernel has put a new PTE into the page table for the process.
|
|
* We use it to ensure coherency between the i-cache and d-cache
|
|
* for the page which has just been mapped in.
|
|
* On machines which use an MMU hash table, we use this to put a
|
|
* corresponding HPTE into the hash table ahead of time, instead of
|
|
* waiting for the inevitable extra hash-table miss exception.
|
|
*/
|
|
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
|
|
|
|
extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
|
|
unsigned long end, int write, struct page **pages, int *nr);
|
|
|
|
extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
|
|
unsigned long end, int write, struct page **pages, int *nr);
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _ASM_POWERPC_PGTABLE_H */
|