linux/arch/arm64/include/asm/pgtable.h

686 lines
20 KiB
C
Raw Normal View History

/*
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PGTABLE_H
#define __ASM_PGTABLE_H
#include <asm/bug.h>
#include <asm/proc-fns.h>
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
/*
* Software defined PTE bits definition.
*/
#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
/*
* VMALLOC and SPARSEMEM_VMEMMAP ranges.
*
* VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
* (rounded up to PUD_SIZE).
* VMALLOC_START: beginning of the kernel VA space
* VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
* fixed mappings and modules
*/
#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
2015-10-12 15:52:58 +00:00
#ifndef CONFIG_KASAN
#define VMALLOC_START (VA_START)
2015-10-12 15:52:58 +00:00
#else
#include <asm/kasan.h>
#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K)
#endif
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
#define FIRST_USER_ADDRESS 0UL
#ifndef __ASSEMBLY__
#include <linux/mmdebug.h>
extern void __pte_error(const char *file, int line, unsigned long val);
extern void __pmd_error(const char *file, int line, unsigned long val);
arm64: mm: Implement 4 levels of translation tables This patch implements 4 levels of translation tables since 3 levels of page tables with 4KB pages cannot support 40-bit physical address space described in [1] due to the following issue. It is a restriction that kernel logical memory map with 4KB + 3 levels (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create mapping for this region in map_mem function since __phys_to_virt for this region reaches to address overflow. If SoC design follows the document, [1], over 32GB RAM would be placed from 544GB. Even 64GB system is supposed to use the region from 544GB to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels of page tables to avoid hacking __virt_to_phys and __phys_to_virt. However, it is recommended 4 levels of page table should be only enabled if memory map is too sparse or there is about 512GB RAM. References ---------- [1]: Principles of ARM Memory Maps, White Paper, Issue C Signed-off-by: Jungseok Lee <jays.lee@samsung.com> Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> Acked-by: Kukjin Kim <kgene.kim@samsung.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Reviewed-by: Steve Capper <steve.capper@linaro.org> [catalin.marinas@arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] [catalin.marinas@arm.com: early_ioremap_init() updated for 4 levels] [catalin.marinas@arm.com: 48-bit VA depends on BROKEN until KVM is fixed] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Jungseok Lee <jungseoklee85@gmail.com>
2014-05-12 09:40:51 +00:00
extern void __pud_error(const char *file, int line, unsigned long val);
extern void __pgd_error(const char *file, int line, unsigned long val);
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P010 PAGE_COPY
#define __P011 PAGE_COPY
#define __P100 PAGE_READONLY_EXEC
#define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_COPY_EXEC
#define __P111 PAGE_COPY_EXEC
#define __S000 PAGE_NONE
#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
#define __S011 PAGE_SHARED
#define __S100 PAGE_READONLY_EXEC
#define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
*/
extern struct page *empty_zero_page;
#define ZERO_PAGE(vaddr) (empty_zero_page)
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
#define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT)
#define pfn_pte(pfn,prot) (__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
#define pte_none(pte) (!pte_val(pte))
#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
/* Find an entry in the third-level page table. */
#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
#define pte_unmap(pte) do { } while (0)
#define pte_unmap_nested(pte) do { } while (0)
/*
* The following only work if pte_present(). Undefined behaviour otherwise.
*/
#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
#else
#define pte_hw_dirty(pte) (0)
#endif
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
#define pte_valid_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
#define pte_valid_not_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
{
pte_val(pte) &= ~pgprot_val(prot);
return pte;
}
static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
{
pte_val(pte) |= pgprot_val(prot);
return pte;
}
static inline pte_t pte_wrprotect(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_WRITE));
}
static inline pte_t pte_mkwrite(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_WRITE));
}
static inline pte_t pte_mkclean(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
}
static inline pte_t pte_mkdirty(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_DIRTY));
}
static inline pte_t pte_mkold(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_AF));
}
static inline pte_t pte_mkyoung(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_AF));
}
static inline pte_t pte_mkspecial(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
}
static inline pte_t pte_mkcont(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_CONT));
}
static inline pte_t pte_mknoncont(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
static inline void set_pte(pte_t *ptep, pte_t pte)
{
*ptep = pte;
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
* or update_mmu_cache() have the necessary barriers.
*/
if (pte_valid_not_user(pte)) {
dsb(ishst);
isb();
}
}
struct mm_struct;
struct vm_area_struct;
extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
/*
* PTE bits configuration in the presence of hardware Dirty Bit Management
* (PTE_WRITE == PTE_DBM):
*
* Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw)
* 0 0 | 1 0 0
* 0 1 | 1 1 0
* 1 0 | 1 0 1
* 1 1 | 0 1 x
*
* When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
* the page fault mechanism. Checking the dirty status of a pte becomes:
*
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
if (pte_valid_user(pte)) {
if (!pte_special(pte) && pte_exec(pte))
__sync_icache_dcache(pte, addr);
if (pte_sw_dirty(pte) && pte_write(pte))
pte_val(pte) &= ~PTE_RDONLY;
else
pte_val(pte) |= PTE_RDONLY;
}
/*
* If the existing pte is valid, check for potential race with
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
if (IS_ENABLED(CONFIG_DEBUG_VM) && IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
pte_valid(*ptep)) {
BUG_ON(!pte_young(pte));
BUG_ON(pte_write(*ptep) && !pte_dirty(pte));
}
set_pte(ptep, pte);
}
/*
* Huge pte definitions.
*/
#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT))
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
/*
* Hugetlb definitions.
*/
#define HUGE_MAX_HSTATE 2
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
#define __HAVE_ARCH_PTE_SPECIAL
static inline pte_t pud_pte(pud_t pud)
{
return __pte(pud_val(pud));
}
static inline pmd_t pud_pmd(pud_t pud)
{
return __pmd(pud_val(pud));
}
static inline pte_t pmd_pte(pmd_t pmd)
{
return __pte(pmd_val(pmd));
}
static inline pmd_t pte_pmd(pte_t pte)
{
return __pmd(pte_val(pte));
}
static inline pgprot_t mk_sect_prot(pgprot_t prot)
{
return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
}
/*
* THP definitions.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
#define pmd_trans_splitting(pmd) pte_special(pmd_pte(pmd))
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
struct vm_area_struct;
void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pud_write(pud) pte_write(pud_pte(pud))
#define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
static inline int has_transparent_hugepage(void)
{
return 1;
}
#define __pgprot_modify(prot,mask,bits) \
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
/*
* Mark the prot value as uncacheable and unbufferable.
*/
#define pgprot_noncached(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN)
#define pgprot_writecombine(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
#define pgprot_device(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot);
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_present(pmd) (pmd_val(pmd))
#define pmd_bad(pmd) (!(pmd_val(pmd) & 2))
#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_TABLE)
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
#ifdef CONFIG_ARM64_64K_PAGES
#define pud_sect(pud) (0)
#define pud_table(pud) (1)
#else
#define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
PUD_TYPE_SECT)
#define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
PUD_TYPE_TABLE)
#endif
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
dsb(ishst);
isb();
}
static inline void pmd_clear(pmd_t *pmdp)
{
set_pmd(pmdp, __pmd(0));
}
static inline pte_t *pmd_page_vaddr(pmd_t pmd)
{
return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
}
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*/
#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot)
#if CONFIG_PGTABLE_LEVELS > 2
#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd))
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!(pud_val(pud) & 2))
#define pud_present(pud) (pud_val(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
*pudp = pud;
dsb(ishst);
isb();
}
static inline void pud_clear(pud_t *pudp)
{
set_pud(pudp, __pud(0));
}
static inline pmd_t *pud_page_vaddr(pud_t pud)
{
return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
}
/* Find an entry in the second-level page table. */
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
{
return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
}
#define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
#if CONFIG_PGTABLE_LEVELS > 3
arm64: mm: Implement 4 levels of translation tables This patch implements 4 levels of translation tables since 3 levels of page tables with 4KB pages cannot support 40-bit physical address space described in [1] due to the following issue. It is a restriction that kernel logical memory map with 4KB + 3 levels (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create mapping for this region in map_mem function since __phys_to_virt for this region reaches to address overflow. If SoC design follows the document, [1], over 32GB RAM would be placed from 544GB. Even 64GB system is supposed to use the region from 544GB to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels of page tables to avoid hacking __virt_to_phys and __phys_to_virt. However, it is recommended 4 levels of page table should be only enabled if memory map is too sparse or there is about 512GB RAM. References ---------- [1]: Principles of ARM Memory Maps, White Paper, Issue C Signed-off-by: Jungseok Lee <jays.lee@samsung.com> Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> Acked-by: Kukjin Kim <kgene.kim@samsung.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Reviewed-by: Steve Capper <steve.capper@linaro.org> [catalin.marinas@arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] [catalin.marinas@arm.com: early_ioremap_init() updated for 4 levels] [catalin.marinas@arm.com: 48-bit VA depends on BROKEN until KVM is fixed] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Jungseok Lee <jungseoklee85@gmail.com>
2014-05-12 09:40:51 +00:00
#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud))
arm64: mm: Implement 4 levels of translation tables This patch implements 4 levels of translation tables since 3 levels of page tables with 4KB pages cannot support 40-bit physical address space described in [1] due to the following issue. It is a restriction that kernel logical memory map with 4KB + 3 levels (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create mapping for this region in map_mem function since __phys_to_virt for this region reaches to address overflow. If SoC design follows the document, [1], over 32GB RAM would be placed from 544GB. Even 64GB system is supposed to use the region from 544GB to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels of page tables to avoid hacking __virt_to_phys and __phys_to_virt. However, it is recommended 4 levels of page table should be only enabled if memory map is too sparse or there is about 512GB RAM. References ---------- [1]: Principles of ARM Memory Maps, White Paper, Issue C Signed-off-by: Jungseok Lee <jays.lee@samsung.com> Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> Acked-by: Kukjin Kim <kgene.kim@samsung.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Reviewed-by: Steve Capper <steve.capper@linaro.org> [catalin.marinas@arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] [catalin.marinas@arm.com: early_ioremap_init() updated for 4 levels] [catalin.marinas@arm.com: 48-bit VA depends on BROKEN until KVM is fixed] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Jungseok Lee <jungseoklee85@gmail.com>
2014-05-12 09:40:51 +00:00
#define pgd_none(pgd) (!pgd_val(pgd))
#define pgd_bad(pgd) (!(pgd_val(pgd) & 2))
#define pgd_present(pgd) (pgd_val(pgd))
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = pgd;
dsb(ishst);
}
static inline void pgd_clear(pgd_t *pgdp)
{
set_pgd(pgdp, __pgd(0));
}
static inline pud_t *pgd_page_vaddr(pgd_t pgd)
{
return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
}
/* Find an entry in the frst-level page table. */
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
{
return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
}
#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
arm64: mm: Implement 4 levels of translation tables This patch implements 4 levels of translation tables since 3 levels of page tables with 4KB pages cannot support 40-bit physical address space described in [1] due to the following issue. It is a restriction that kernel logical memory map with 4KB + 3 levels (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create mapping for this region in map_mem function since __phys_to_virt for this region reaches to address overflow. If SoC design follows the document, [1], over 32GB RAM would be placed from 544GB. Even 64GB system is supposed to use the region from 544GB to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels of page tables to avoid hacking __virt_to_phys and __phys_to_virt. However, it is recommended 4 levels of page table should be only enabled if memory map is too sparse or there is about 512GB RAM. References ---------- [1]: Principles of ARM Memory Maps, White Paper, Issue C Signed-off-by: Jungseok Lee <jays.lee@samsung.com> Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> Acked-by: Kukjin Kim <kgene.kim@samsung.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Reviewed-by: Steve Capper <steve.capper@linaro.org> [catalin.marinas@arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] [catalin.marinas@arm.com: early_ioremap_init() updated for 4 levels] [catalin.marinas@arm.com: 48-bit VA depends on BROKEN until KVM is fixed] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Jungseok Lee <jungseoklee85@gmail.com>
2014-05-12 09:40:51 +00:00
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
/* to find an entry in a page-table-directory */
#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr))
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
PTE_PROT_NONE | PTE_VALID | PTE_WRITE;
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
pte = pte_mkdirty(pte);
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
return pte;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}
#ifdef CONFIG_ARM64_HW_AFDBM
/*
* Atomic pte/pmd modifications.
*/
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pte_t *ptep)
{
pteval_t pteval;
unsigned int tmp, res;
asm volatile("// ptep_test_and_clear_young\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
" and %0, %0, %4 // clear PTE_AF\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
return res;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp)
{
return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
pteval_t old_pteval;
unsigned int tmp;
asm volatile("// ptep_get_and_clear\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" stxr %w1, xzr, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
return __pte(old_pteval);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* ptep_set_wrprotect - mark read-only while trasferring potential hardware
* dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
*/
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
pteval_t pteval;
unsigned long tmp;
asm volatile("// ptep_set_wrprotect\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n"
" csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n"
" orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n"
" and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
: "cc");
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}
#endif
#endif /* CONFIG_ARM64_HW_AFDBM */
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
/*
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
* bits 2-7: swap type
* bits 8-57: swap offset
*/
#define __SWP_TYPE_SHIFT 2
#define __SWP_TYPE_BITS 6
#define __SWP_OFFSET_BITS 50
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1)
#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK)
#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
/*
* Ensure that there are not more swap files than can be encoded in the kernel
* PTEs.
*/
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
extern int kern_addr_valid(unsigned long addr);
#include <asm-generic/pgtable.h>
#define pgtable_cache_init() do { } while (0)
/*
* On AArch64, the cache coherency is handled via the set_pte_at() function.
*/
static inline void update_mmu_cache(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
/*
* We don't do anything here, so there's a very small chance of
* us retaking a user fault which we just fixed up. The alternative
* is doing a dsb(ishst), but that penalises the fastpath.
*/
}
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#define kc_vaddr_to_offset(v) ((v) & ~VA_START)
#define kc_offset_to_vaddr(o) ((o) | VA_START)
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */