linux/arch/arm64/mm/ioremap.c
Jungseok Lee c79b954bf6 arm64: mm: Implement 4 levels of translation tables
This patch implements 4 levels of translation tables since 3 levels
of page tables with 4KB pages cannot support 40-bit physical address
space described in [1] due to the following issue.

It is a restriction that kernel logical memory map with 4KB + 3 levels
(0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from
544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create
mapping for this region in map_mem function since __phys_to_virt for
this region reaches to address overflow.

If SoC design follows the document, [1], over 32GB RAM would be placed
from 544GB. Even 64GB system is supposed to use the region from 544GB
to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels
of page tables to avoid hacking __virt_to_phys and __phys_to_virt.

However, it is recommended 4 levels of page table should be only enabled
if memory map is too sparse or there is about 512GB RAM.

References
----------
[1]: Principles of ARM Memory Maps, White Paper, Issue C

Signed-off-by: Jungseok Lee <jays.lee@samsung.com>
Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
[catalin.marinas@arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE]
[catalin.marinas@arm.com: early_ioremap_init() updated for 4 levels]
[catalin.marinas@arm.com: 48-bit VA depends on BROKEN until KVM is fixed]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Jungseok Lee <jungseoklee85@gmail.com>
2014-07-23 15:27:40 +01:00

200 lines
5.0 KiB
C

/*
* Based on arch/arm/mm/ioremap.c
*
* (C) Copyright 1995 1996 Linus Torvalds
* Hacked for ARM by Phil Blundell <philb@gnu.org>
* Hacked to allow all architectures to build, and various cleanups
* by Russell King
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/io.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
pgprot_t prot, void *caller)
{
unsigned long last_addr;
unsigned long offset = phys_addr & ~PAGE_MASK;
int err;
unsigned long addr;
struct vm_struct *area;
/*
* Page align the mapping address and size, taking account of any
* offset.
*/
phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(size + offset);
/*
* Don't allow wraparound, zero size or outside PHYS_MASK.
*/
last_addr = phys_addr + size - 1;
if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK))
return NULL;
/*
* Don't allow RAM to be mapped.
*/
if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr))))
return NULL;
area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (!area)
return NULL;
addr = (unsigned long)area->addr;
err = ioremap_page_range(addr, addr + size, phys_addr, prot);
if (err) {
vunmap((void *)addr);
return NULL;
}
return (void __iomem *)(offset + addr);
}
void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot)
{
return __ioremap_caller(phys_addr, size, prot,
__builtin_return_address(0));
}
EXPORT_SYMBOL(__ioremap);
void __iounmap(volatile void __iomem *io_addr)
{
unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
/*
* We could get an address outside vmalloc range in case
* of ioremap_cache() reusing a RAM mapping.
*/
if (VMALLOC_START <= addr && addr < VMALLOC_END)
vunmap((void *)addr);
}
EXPORT_SYMBOL(__iounmap);
void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
{
/* For normal memory we already have a cacheable mapping. */
if (pfn_valid(__phys_to_pfn(phys_addr)))
return (void __iomem *)__phys_to_virt(phys_addr);
return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),
__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_cache);
static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
#ifndef CONFIG_ARM64_2_LEVELS
static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
#endif
#ifdef CONFIG_ARM64_4_LEVELS
static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
#endif
static inline pud_t * __init early_ioremap_pud(unsigned long addr)
{
pgd_t *pgd;
pgd = pgd_offset_k(addr);
BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
return pud_offset(pgd, addr);
}
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
pud_t *pud = early_ioremap_pud(addr);
BUG_ON(pud_none(*pud) || pud_bad(*pud));
return pmd_offset(pud, addr);
}
static inline pte_t * __init early_ioremap_pte(unsigned long addr)
{
pmd_t *pmd = early_ioremap_pmd(addr);
BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
return pte_offset_kernel(pmd, addr);
}
void __init early_ioremap_init(void)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN);
pgd = pgd_offset_k(addr);
pgd_populate(&init_mm, pgd, bm_pud);
pud = pud_offset(pgd, addr);
pud_populate(&init_mm, pud, bm_pmd);
pmd = pmd_offset(pud, addr);
pmd_populate_kernel(&init_mm, pmd, bm_pte);
/*
* The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
pr_warn("pmd %p != %p\n",
pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
fix_to_virt(FIX_BTMAP_END));
pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
pr_warn("FIX_BTMAP_BEGIN: %d\n",
FIX_BTMAP_BEGIN);
}
early_ioremap_setup();
}
void __init __early_set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *pte;
if (idx >= __end_of_fixed_addresses) {
BUG();
return;
}
pte = early_ioremap_pte(addr);
if (pgprot_val(flags))
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
else {
pte_clear(&init_mm, addr, pte);
flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
}
}