2019-06-03 05:44:50 +00:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
|
|
*/
|
|
|
|
#ifndef __ASM_PGTABLE_H
|
|
|
|
#define __ASM_PGTABLE_H
|
|
|
|
|
2015-07-10 16:24:28 +00:00
|
|
|
#include <asm/bug.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
#include <asm/proc-fns.h>
|
|
|
|
|
|
|
|
#include <asm/memory.h>
|
|
|
|
#include <asm/pgtable-hwdef.h>
|
arm64: Remove fixmap include fragility
The asm-generic fixmap.h depends on each architecture's fixmap.h to pull
in the definition of PAGE_KERNEL_RO, if this exists. In the absence of
this, FIXMAP_PAGE_RO will not be defined. In mm/early_ioremap.c the
definition of early_memremap_ro is predicated on FIXMAP_PAGE_RO being
defined.
Currently, the arm64 fixmap.h doesn't include pgtable.h for the
definition of PAGE_KERNEL_RO, and as a knock-on effect early_memremap_ro
is not always defined, leading to link-time failures when it is used.
This has been observed with defconfig on next-20160226.
Unfortunately, as pgtable.h includes fixmap.h, adding the include
introduces a circular dependency, which is just as fragile.
Instead, this patch factors out PAGE_KERNEL_RO and other prot
definitions into a new pgtable-prot header which can be included by poth
pgtable.h and fixmap.h, avoiding the circular dependency, and ensuring
that early_memremap_ro is alwyas defined where it is used.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-02-26 14:31:32 +00:00
|
|
|
#include <asm/pgtable-prot.h>
|
2018-10-29 09:25:58 +00:00
|
|
|
#include <asm/tlbflush.h>
|
2012-03-05 11:49:27 +00:00
|
|
|
|
|
|
|
/*
|
2016-03-30 14:46:00 +00:00
|
|
|
* VMALLOC range.
|
2014-07-16 16:42:43 +00:00
|
|
|
*
|
2016-02-16 12:52:40 +00:00
|
|
|
* VMALLOC_START: beginning of the kernel vmalloc space
|
2019-10-24 12:01:43 +00:00
|
|
|
* VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
|
2016-03-30 14:46:00 +00:00
|
|
|
* and fixed mappings
|
2012-03-05 11:49:27 +00:00
|
|
|
*/
|
2016-02-16 12:52:40 +00:00
|
|
|
#define VMALLOC_START (MODULES_END)
|
2019-08-07 15:55:14 +00:00
|
|
|
#define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2015-02-11 23:26:41 +00:00
|
|
|
#define FIRST_USER_ADDRESS 0UL
|
2012-03-05 11:49:27 +00:00
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
2015-07-10 16:24:28 +00:00
|
|
|
|
2017-06-26 13:27:36 +00:00
|
|
|
#include <asm/cmpxchg.h>
|
2016-01-25 11:45:07 +00:00
|
|
|
#include <asm/fixmap.h>
|
2015-07-10 16:24:28 +00:00
|
|
|
#include <linux/mmdebug.h>
|
2017-12-12 10:48:54 +00:00
|
|
|
#include <linux/mm_types.h>
|
|
|
|
#include <linux/sched.h>
|
2015-07-10 16:24:28 +00:00
|
|
|
|
2019-08-07 15:55:20 +00:00
|
|
|
extern struct page *vmemmap;
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
extern void __pte_error(const char *file, int line, unsigned long val);
|
|
|
|
extern void __pmd_error(const char *file, int line, unsigned long val);
|
2014-05-12 09:40:51 +00:00
|
|
|
extern void __pud_error(const char *file, int line, unsigned long val);
|
2012-03-05 11:49:27 +00:00
|
|
|
extern void __pgd_error(const char *file, int line, unsigned long val);
|
|
|
|
|
2020-06-25 08:03:14 +00:00
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
|
|
|
|
|
|
|
|
/* Set stride and tlb_level in flush_*_tlb_range */
|
|
|
|
#define flush_pmd_tlb_range(vma, addr, end) \
|
|
|
|
__flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
|
|
|
|
#define flush_pud_tlb_range(vma, addr, end) \
|
|
|
|
__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
|
|
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* ZERO_PAGE is a global shared page that is always zero: used
|
|
|
|
* for zero-mapped memory areas etc..
|
|
|
|
*/
|
arm64: mm: place empty_zero_page in bss
Currently the zero page is set up in paging_init, and thus we cannot use
the zero page earlier. We use the zero page as a reserved TTBR value
from which no TLB entries may be allocated (e.g. when uninstalling the
idmap). To enable such usage earlier (as may be required for invasive
changes to the kernel page tables), and to minimise the time that the
idmap is active, we need to be able to use the zero page before
paging_init.
This patch follows the example set by x86, by allocating the zero page
at compile time, in .bss. This means that the zero page itself is
available immediately upon entry to start_kernel (as we zero .bss before
this), and also means that the zero page takes up no space in the raw
Image binary. The associated struct page is allocated in bootmem_init,
and remains unavailable until this time.
Outside of arch code, the only users of empty_zero_page assume that the
empty_zero_page symbol refers to the zeroed memory itself, and that
ZERO_PAGE(x) must be used to acquire the associated struct page,
following the example of x86. This patch also brings arm64 inline with
these assumptions.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-01-25 11:44:57 +00:00
|
|
|
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
2017-01-10 21:35:49 +00:00
|
|
|
#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2014-07-21 13:52:49 +00:00
|
|
|
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
|
|
|
|
|
2017-12-13 17:07:21 +00:00
|
|
|
/*
|
|
|
|
* Macros to convert between a physical address and its placement in a
|
|
|
|
* page table entry, taking care of 52-bit addresses.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_ARM64_PA_BITS_52
|
|
|
|
#define __pte_to_phys(pte) \
|
|
|
|
((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36))
|
|
|
|
#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK)
|
|
|
|
#else
|
|
|
|
#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
|
|
|
|
#define __phys_to_pte_val(phys) (phys)
|
|
|
|
#endif
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2017-12-13 17:07:21 +00:00
|
|
|
#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
|
|
|
|
#define pfn_pte(pfn,prot) \
|
|
|
|
__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
|
2012-03-05 11:49:27 +00:00
|
|
|
|
|
|
|
#define pte_none(pte) (!pte_val(pte))
|
|
|
|
#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
|
|
|
|
#define pte_page(pte) (pfn_to_page(pte_pfn(pte)))
|
2014-07-21 13:52:49 +00:00
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* The following only work if pte_present(). Undefined behaviour otherwise.
|
|
|
|
*/
|
2014-02-25 11:38:53 +00:00
|
|
|
#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
|
|
|
|
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
|
|
|
|
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
|
|
|
|
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
|
2017-01-27 10:54:12 +00:00
|
|
|
#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN))
|
2015-10-07 17:00:21 +00:00
|
|
|
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
|
2019-07-16 23:30:51 +00:00
|
|
|
#define pte_devmap(pte) (!!(pte_val(pte) & PTE_DEVMAP))
|
2012-03-05 11:49:27 +00:00
|
|
|
|
arm64: mm: set the contiguous bit for kernel mappings where appropriate
This is the third attempt at enabling the use of contiguous hints for
kernel mappings. The most recent attempt 0bfc445dec9d was reverted after
it turned out that updating permission attributes on live contiguous ranges
may result in TLB conflicts. So this time, the contiguous hint is not set
for .rodata or for the linear alias of .text/.rodata, both of which are
mapped read-write initially, and remapped read-only at a later stage.
(Note that the latter region could also be unmapped and remapped again
with updated permission attributes, given that the region, while live, is
only mapped for the convenience of the hibernation code, but that also
means the TLB footprint is negligible anyway, so why bother)
This enables the following contiguous range sizes for the virtual mapping
of the kernel image, and for the linear mapping:
granule size | cont PTE | cont PMD |
-------------+------------+------------+
4 KB | 64 KB | 32 MB |
16 KB | 2 MB | 1 GB* |
64 KB | 2 MB | 16 GB* |
* Only when built for 3 or more levels of translation. This is due to the
fact that a 2 level configuration only consists of PGDs and PTEs, and the
added complexity of dealing with folded PMDs is not justified considering
that 16 GB contiguous ranges are likely to be ignored by the hardware (and
16k/2 levels is a niche configuration)
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2017-03-09 20:52:09 +00:00
|
|
|
#define pte_cont_addr_end(addr, end) \
|
|
|
|
({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \
|
|
|
|
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
|
|
|
|
})
|
|
|
|
|
|
|
|
#define pmd_cont_addr_end(addr, end) \
|
|
|
|
({ unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK; \
|
|
|
|
(__boundary - 1 < (end) - 1) ? __boundary : (end); \
|
|
|
|
})
|
|
|
|
|
2015-09-11 17:22:00 +00:00
|
|
|
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
|
2015-07-10 16:24:28 +00:00
|
|
|
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
|
|
|
|
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
|
|
|
|
|
2015-07-28 15:14:03 +00:00
|
|
|
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
|
2017-01-27 10:54:12 +00:00
|
|
|
#define pte_valid_not_user(pte) \
|
2020-01-06 14:35:39 +00:00
|
|
|
((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
|
2015-10-30 18:56:19 +00:00
|
|
|
#define pte_valid_young(pte) \
|
|
|
|
((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
|
2017-10-26 17:36:47 +00:00
|
|
|
#define pte_valid_user(pte) \
|
|
|
|
((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
|
2015-10-30 18:56:19 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Could the pte be present in the TLB? We must check mm_tlb_flush_pending
|
|
|
|
* so that we don't erroneously return false for pages that have been
|
|
|
|
* remapped as PROT_NONE but are yet to be flushed from the TLB.
|
|
|
|
*/
|
|
|
|
#define pte_accessible(mm, pte) \
|
|
|
|
(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2017-10-26 17:36:47 +00:00
|
|
|
/*
|
|
|
|
* p??_access_permitted() is true for valid user mappings (subject to the
|
2020-01-06 14:35:39 +00:00
|
|
|
* write permission check). PROT_NONE mappings do not have the PTE_VALID bit
|
|
|
|
* set.
|
2017-10-26 17:36:47 +00:00
|
|
|
*/
|
|
|
|
#define pte_access_permitted(pte, write) \
|
|
|
|
(pte_valid_user(pte) && (!(write) || pte_write(pte)))
|
|
|
|
#define pmd_access_permitted(pmd, write) \
|
|
|
|
(pte_access_permitted(pmd_pte(pmd), (write)))
|
|
|
|
#define pud_access_permitted(pud, write) \
|
|
|
|
(pte_access_permitted(pud_pte(pud), (write)))
|
|
|
|
|
2014-08-19 19:41:42 +00:00
|
|
|
static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
|
2014-01-15 14:07:12 +00:00
|
|
|
{
|
2014-08-19 19:41:42 +00:00
|
|
|
pte_val(pte) &= ~pgprot_val(prot);
|
2014-01-15 14:07:12 +00:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2014-08-19 19:41:42 +00:00
|
|
|
static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
|
2014-01-15 14:07:12 +00:00
|
|
|
{
|
2014-08-19 19:41:42 +00:00
|
|
|
pte_val(pte) |= pgprot_val(prot);
|
2014-01-15 14:07:12 +00:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2014-08-19 19:41:42 +00:00
|
|
|
static inline pte_t pte_wrprotect(pte_t pte)
|
|
|
|
{
|
2017-07-04 18:04:18 +00:00
|
|
|
pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
|
|
|
|
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
|
|
|
|
return pte;
|
2014-08-19 19:41:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkwrite(pte_t pte)
|
|
|
|
{
|
2017-07-04 18:04:18 +00:00
|
|
|
pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
|
|
|
|
pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
|
|
|
|
return pte;
|
2014-08-19 19:41:42 +00:00
|
|
|
}
|
|
|
|
|
2014-01-15 14:07:12 +00:00
|
|
|
static inline pte_t pte_mkclean(pte_t pte)
|
|
|
|
{
|
2017-12-01 17:22:14 +00:00
|
|
|
pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
|
|
|
|
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
|
|
|
|
|
|
|
|
return pte;
|
2014-01-15 14:07:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkdirty(pte_t pte)
|
|
|
|
{
|
2017-12-01 17:22:14 +00:00
|
|
|
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
|
|
|
|
|
|
|
|
if (pte_write(pte))
|
|
|
|
pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
|
|
|
|
|
|
|
|
return pte;
|
2014-01-15 14:07:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkold(pte_t pte)
|
|
|
|
{
|
2014-08-19 19:41:42 +00:00
|
|
|
return clear_pte_bit(pte, __pgprot(PTE_AF));
|
2014-01-15 14:07:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkyoung(pte_t pte)
|
|
|
|
{
|
2014-08-19 19:41:42 +00:00
|
|
|
return set_pte_bit(pte, __pgprot(PTE_AF));
|
2014-01-15 14:07:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mkspecial(pte_t pte)
|
|
|
|
{
|
2014-08-19 19:41:42 +00:00
|
|
|
return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
|
2014-01-15 14:07:12 +00:00
|
|
|
}
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2015-10-07 17:00:21 +00:00
|
|
|
static inline pte_t pte_mkcont(pte_t pte)
|
|
|
|
{
|
2015-12-17 19:31:26 +00:00
|
|
|
pte = set_pte_bit(pte, __pgprot(PTE_CONT));
|
|
|
|
return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE));
|
2015-10-07 17:00:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t pte_mknoncont(pte_t pte)
|
|
|
|
{
|
|
|
|
return clear_pte_bit(pte, __pgprot(PTE_CONT));
|
|
|
|
}
|
|
|
|
|
2016-08-24 17:27:30 +00:00
|
|
|
static inline pte_t pte_mkpresent(pte_t pte)
|
|
|
|
{
|
|
|
|
return set_pte_bit(pte, __pgprot(PTE_VALID));
|
|
|
|
}
|
|
|
|
|
2015-12-17 19:31:26 +00:00
|
|
|
static inline pmd_t pmd_mkcont(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
|
|
|
|
}
|
|
|
|
|
2019-07-16 23:30:51 +00:00
|
|
|
static inline pte_t pte_mkdevmap(pte_t pte)
|
|
|
|
{
|
arm64: mm: add missing PTE_SPECIAL in pte_mkdevmap on arm64
Without this patch, the MAP_SYNC test case will cause a print_bad_pte
warning on arm64 as follows:
[ 25.542693] BUG: Bad page map in process mapdax333 pte:2e8000448800f53 pmd:41ff5f003
[ 25.546360] page:ffff7e0010220000 refcount:1 mapcount:-1 mapping:ffff8003e29c7440 index:0x0
[ 25.550281] ext4_dax_aops
[ 25.550282] name:"__aaabbbcccddd__"
[ 25.551553] flags: 0x3ffff0000001002(referenced|reserved)
[ 25.555802] raw: 03ffff0000001002 ffff8003dfffa908 0000000000000000 ffff8003e29c7440
[ 25.559446] raw: 0000000000000000 0000000000000000 00000001fffffffe 0000000000000000
[ 25.563075] page dumped because: bad pte
[ 25.564938] addr:0000ffffbe05b000 vm_flags:208000fb anon_vma:0000000000000000 mapping:ffff8003e29c7440 index:0
[ 25.574272] file:__aaabbbcccddd__ fault:ext4_dax_fault mmmmap:ext4_file_mmap readpage:0x0
[ 25.578799] CPU: 1 PID: 1180 Comm: mapdax333 Not tainted 5.2.0+ #21
[ 25.581702] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
[ 25.585624] Call trace:
[ 25.587008] dump_backtrace+0x0/0x178
[ 25.588799] show_stack+0x24/0x30
[ 25.590328] dump_stack+0xa8/0xcc
[ 25.591901] print_bad_pte+0x18c/0x218
[ 25.593628] unmap_page_range+0x778/0xc00
[ 25.595506] unmap_single_vma+0x94/0xe8
[ 25.597304] unmap_vmas+0x90/0x108
[ 25.598901] unmap_region+0xc0/0x128
[ 25.600566] __do_munmap+0x284/0x3f0
[ 25.602245] __vm_munmap+0x78/0xe0
[ 25.603820] __arm64_sys_munmap+0x34/0x48
[ 25.605709] el0_svc_common.constprop.0+0x78/0x168
[ 25.607956] el0_svc_handler+0x34/0x90
[ 25.609698] el0_svc+0x8/0xc
[...]
The root cause is in _vm_normal_page, without the PTE_SPECIAL bit,
the return value will be incorrectly set to pfn_to_page(pfn) instead
of NULL. Besides, this patch also rewrite the pmd_mkdevmap to avoid
setting PTE_SPECIAL for pmd
The MAP_SYNC test case is as follows(Provided by Yibo Cai)
$#include <stdio.h>
$#include <string.h>
$#include <unistd.h>
$#include <sys/file.h>
$#include <sys/mman.h>
$#ifndef MAP_SYNC
$#define MAP_SYNC 0x80000
$#endif
/* mount -o dax /dev/pmem0 /mnt */
$#define F "/mnt/__aaabbbcccddd__"
int main(void)
{
int fd;
char buf[4096];
void *addr;
if ((fd = open(F, O_CREAT|O_TRUNC|O_RDWR, 0644)) < 0) {
perror("open1");
return 1;
}
if (write(fd, buf, 4096) != 4096) {
perror("lseek");
return 1;
}
addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_SYNC, fd, 0);
if (addr == MAP_FAILED) {
perror("mmap");
printf("did you mount with '-o dax'?\n");
return 1;
}
memset(addr, 0x55, 4096);
if (munmap(addr, 4096) == -1) {
perror("munmap");
return 1;
}
close(fd);
return 0;
}
Fixes: 73b20c84d42d ("arm64: mm: implement pte_devmap support")
Reported-by: Yibo Cai <Yibo.Cai@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Robin Murphy <Robin.Murphy@arm.com>
Signed-off-by: Jia He <justin.he@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2019-08-07 04:58:51 +00:00
|
|
|
return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
|
2019-07-16 23:30:51 +00:00
|
|
|
}
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
static inline void set_pte(pte_t *ptep, pte_t pte)
|
|
|
|
{
|
2018-02-15 11:14:56 +00:00
|
|
|
WRITE_ONCE(*ptep, pte);
|
2014-06-09 10:55:03 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Only if the new pte is valid and kernel, otherwise TLB maintenance
|
|
|
|
* or update_mmu_cache() have the necessary barriers.
|
|
|
|
*/
|
Revert "arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}"
This reverts commit 24fe1b0efad4fcdd32ce46cffeab297f22581707.
Commit 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from
set_{pte,pmd,pud}") removed ISB instructions immediately following updates
to the page table, on the grounds that they are not required by the
architecture and a DSB alone is sufficient to ensure that subsequent data
accesses use the new translation:
DDI0487E_a, B2-128:
| ... no instruction that appears in program order after the DSB
| instruction can alter any state of the system or perform any part of
| its functionality until the DSB completes other than:
|
| * Being fetched from memory and decoded
| * Reading the general-purpose, SIMD and floating-point,
| Special-purpose, or System registers that are directly or indirectly
| read without causing side-effects.
However, the same document also states the following:
DDI0487E_a, B2-125:
| DMB and DSB instructions affect reads and writes to the memory system
| generated by Load/Store instructions and data or unified cache
| maintenance instructions being executed by the PE. Instruction fetches
| or accesses caused by a hardware translation table access are not
| explicit accesses.
which appears to claim that the DSB alone is insufficient. Unfortunately,
some CPU designers have followed the second clause above, whereas in Linux
we've been relying on the first. This means that our mapping sequence:
MOV X0, <valid pte>
STR X0, [Xptep] // Store new PTE to page table
DSB ISHST
LDR X1, [X2] // Translates using the new PTE
can actually raise a translation fault on the load instruction because the
translation can be performed speculatively before the page table update and
then marked as "faulting" by the CPU. For user PTEs, this is ok because we
can handle the spurious fault, but for kernel PTEs and intermediate table
entries this results in a panic().
Revert the offending commit to reintroduce the missing barriers.
Cc: <stable@vger.kernel.org>
Fixes: 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}")
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2019-08-22 13:58:37 +00:00
|
|
|
if (pte_valid_not_user(pte)) {
|
2014-06-09 10:55:03 +00:00
|
|
|
dsb(ishst);
|
Revert "arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}"
This reverts commit 24fe1b0efad4fcdd32ce46cffeab297f22581707.
Commit 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from
set_{pte,pmd,pud}") removed ISB instructions immediately following updates
to the page table, on the grounds that they are not required by the
architecture and a DSB alone is sufficient to ensure that subsequent data
accesses use the new translation:
DDI0487E_a, B2-128:
| ... no instruction that appears in program order after the DSB
| instruction can alter any state of the system or perform any part of
| its functionality until the DSB completes other than:
|
| * Being fetched from memory and decoded
| * Reading the general-purpose, SIMD and floating-point,
| Special-purpose, or System registers that are directly or indirectly
| read without causing side-effects.
However, the same document also states the following:
DDI0487E_a, B2-125:
| DMB and DSB instructions affect reads and writes to the memory system
| generated by Load/Store instructions and data or unified cache
| maintenance instructions being executed by the PE. Instruction fetches
| or accesses caused by a hardware translation table access are not
| explicit accesses.
which appears to claim that the DSB alone is insufficient. Unfortunately,
some CPU designers have followed the second clause above, whereas in Linux
we've been relying on the first. This means that our mapping sequence:
MOV X0, <valid pte>
STR X0, [Xptep] // Store new PTE to page table
DSB ISHST
LDR X1, [X2] // Translates using the new PTE
can actually raise a translation fault on the load instruction because the
translation can be performed speculatively before the page table update and
then marked as "faulting" by the CPU. For user PTEs, this is ok because we
can handle the spurious fault, but for kernel PTEs and intermediate table
entries this results in a panic().
Revert the offending commit to reintroduce the missing barriers.
Cc: <stable@vger.kernel.org>
Fixes: 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}")
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2019-08-22 13:58:37 +00:00
|
|
|
isb();
|
|
|
|
}
|
2012-03-05 11:49:27 +00:00
|
|
|
}
|
|
|
|
|
2018-04-17 12:03:09 +00:00
|
|
|
extern void __sync_icache_dcache(pte_t pteval);
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2015-07-10 16:24:28 +00:00
|
|
|
/*
|
|
|
|
* PTE bits configuration in the presence of hardware Dirty Bit Management
|
|
|
|
* (PTE_WRITE == PTE_DBM):
|
|
|
|
*
|
|
|
|
* Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw)
|
|
|
|
* 0 0 | 1 0 0
|
|
|
|
* 0 1 | 1 1 0
|
|
|
|
* 1 0 | 1 0 1
|
|
|
|
* 1 1 | 0 1 x
|
|
|
|
*
|
|
|
|
* When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
|
|
|
|
* the page fault mechanism. Checking the dirty status of a pte becomes:
|
|
|
|
*
|
2015-09-11 17:22:00 +00:00
|
|
|
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
|
2015-07-10 16:24:28 +00:00
|
|
|
*/
|
2019-06-10 12:41:07 +00:00
|
|
|
|
|
|
|
static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
|
|
|
|
pte_t pte)
|
2012-03-05 11:49:27 +00:00
|
|
|
{
|
2018-02-15 11:14:56 +00:00
|
|
|
pte_t old_pte;
|
|
|
|
|
2019-06-10 12:41:07 +00:00
|
|
|
if (!IS_ENABLED(CONFIG_DEBUG_VM))
|
|
|
|
return;
|
|
|
|
|
|
|
|
old_pte = READ_ONCE(*ptep);
|
|
|
|
|
|
|
|
if (!pte_valid(old_pte) || !pte_valid(pte))
|
|
|
|
return;
|
|
|
|
if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1)
|
|
|
|
return;
|
2013-01-09 11:08:10 +00:00
|
|
|
|
2015-07-10 16:24:28 +00:00
|
|
|
/*
|
2019-06-10 12:41:07 +00:00
|
|
|
* Check for potential race with hardware updates of the pte
|
|
|
|
* (ptep_set_access_flags safely changes valid ptes without going
|
|
|
|
* through an invalid entry).
|
2015-07-10 16:24:28 +00:00
|
|
|
*/
|
2019-06-10 12:41:07 +00:00
|
|
|
VM_WARN_ONCE(!pte_young(pte),
|
|
|
|
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
|
|
|
|
__func__, pte_val(old_pte), pte_val(pte));
|
|
|
|
VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
|
|
|
|
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
|
|
|
|
__func__, pte_val(old_pte), pte_val(pte));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pte_t *ptep, pte_t pte)
|
|
|
|
{
|
|
|
|
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
|
|
|
|
__sync_icache_dcache(pte);
|
|
|
|
|
|
|
|
__check_racy_pte_update(mm, ptep, pte);
|
2015-07-10 16:24:28 +00:00
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
set_pte(ptep, pte);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Huge pte definitions.
|
|
|
|
*/
|
2013-04-10 12:48:00 +00:00
|
|
|
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hugetlb definitions.
|
|
|
|
*/
|
2015-12-17 19:31:26 +00:00
|
|
|
#define HUGE_MAX_HSTATE 4
|
2013-04-10 12:48:00 +00:00
|
|
|
#define HPAGE_SHIFT PMD_SHIFT
|
|
|
|
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
|
|
|
|
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
|
|
|
|
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2017-12-13 17:07:21 +00:00
|
|
|
static inline pte_t pgd_pte(pgd_t pgd)
|
|
|
|
{
|
|
|
|
return __pte(pgd_val(pgd));
|
|
|
|
}
|
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
static inline pte_t p4d_pte(p4d_t p4d)
|
|
|
|
{
|
|
|
|
return __pte(p4d_val(p4d));
|
|
|
|
}
|
|
|
|
|
2014-10-09 22:29:25 +00:00
|
|
|
static inline pte_t pud_pte(pud_t pud)
|
|
|
|
{
|
|
|
|
return __pte(pud_val(pud));
|
|
|
|
}
|
|
|
|
|
2018-12-11 17:10:39 +00:00
|
|
|
static inline pud_t pte_pud(pte_t pte)
|
|
|
|
{
|
|
|
|
return __pud(pte_val(pte));
|
|
|
|
}
|
|
|
|
|
2014-10-09 22:29:25 +00:00
|
|
|
static inline pmd_t pud_pmd(pud_t pud)
|
|
|
|
{
|
|
|
|
return __pmd(pud_val(pud));
|
|
|
|
}
|
|
|
|
|
2014-02-25 10:02:13 +00:00
|
|
|
static inline pte_t pmd_pte(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return __pte(pmd_val(pmd));
|
|
|
|
}
|
2013-04-19 15:23:57 +00:00
|
|
|
|
2014-02-25 10:02:13 +00:00
|
|
|
static inline pmd_t pte_pmd(pte_t pte)
|
|
|
|
{
|
|
|
|
return __pmd(pte_val(pte));
|
|
|
|
}
|
2013-04-19 15:23:57 +00:00
|
|
|
|
2019-05-27 03:58:15 +00:00
|
|
|
static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
|
2014-10-20 13:42:07 +00:00
|
|
|
{
|
2019-05-27 03:58:15 +00:00
|
|
|
return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
|
2014-10-20 13:42:07 +00:00
|
|
|
{
|
2019-05-27 03:58:15 +00:00
|
|
|
return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
|
2014-10-20 13:42:07 +00:00
|
|
|
}
|
|
|
|
|
2016-04-08 22:50:28 +00:00
|
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
|
|
/*
|
2020-06-09 04:32:38 +00:00
|
|
|
* See the comment in include/linux/pgtable.h
|
2016-04-08 22:50:28 +00:00
|
|
|
*/
|
|
|
|
static inline int pte_protnone(pte_t pte)
|
|
|
|
{
|
|
|
|
return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int pmd_protnone(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return pte_protnone(pmd_pte(pmd));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-04-19 15:23:57 +00:00
|
|
|
/*
|
|
|
|
* THP definitions.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
|
2014-10-09 22:29:25 +00:00
|
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
2013-04-19 15:23:57 +00:00
|
|
|
|
2016-05-05 09:44:02 +00:00
|
|
|
#define pmd_present(pmd) pte_present(pmd_pte(pmd))
|
2014-12-10 23:44:36 +00:00
|
|
|
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
|
2014-02-25 10:02:13 +00:00
|
|
|
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
|
2018-08-22 20:36:31 +00:00
|
|
|
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
|
2014-02-25 10:02:13 +00:00
|
|
|
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
|
|
|
|
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
|
|
|
|
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
|
2016-05-05 09:44:01 +00:00
|
|
|
#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
|
2014-02-25 10:02:13 +00:00
|
|
|
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
|
|
|
|
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
|
2020-06-03 23:03:45 +00:00
|
|
|
#define pmd_mkinvalid(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
|
2013-04-19 15:23:57 +00:00
|
|
|
|
2016-03-15 10:46:34 +00:00
|
|
|
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
|
|
|
|
|
2014-02-25 10:02:13 +00:00
|
|
|
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
|
2013-04-19 15:23:57 +00:00
|
|
|
|
|
|
|
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
|
|
|
|
|
2019-07-16 23:30:51 +00:00
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd))
|
|
|
|
#endif
|
arm64: mm: add missing PTE_SPECIAL in pte_mkdevmap on arm64
Without this patch, the MAP_SYNC test case will cause a print_bad_pte
warning on arm64 as follows:
[ 25.542693] BUG: Bad page map in process mapdax333 pte:2e8000448800f53 pmd:41ff5f003
[ 25.546360] page:ffff7e0010220000 refcount:1 mapcount:-1 mapping:ffff8003e29c7440 index:0x0
[ 25.550281] ext4_dax_aops
[ 25.550282] name:"__aaabbbcccddd__"
[ 25.551553] flags: 0x3ffff0000001002(referenced|reserved)
[ 25.555802] raw: 03ffff0000001002 ffff8003dfffa908 0000000000000000 ffff8003e29c7440
[ 25.559446] raw: 0000000000000000 0000000000000000 00000001fffffffe 0000000000000000
[ 25.563075] page dumped because: bad pte
[ 25.564938] addr:0000ffffbe05b000 vm_flags:208000fb anon_vma:0000000000000000 mapping:ffff8003e29c7440 index:0
[ 25.574272] file:__aaabbbcccddd__ fault:ext4_dax_fault mmmmap:ext4_file_mmap readpage:0x0
[ 25.578799] CPU: 1 PID: 1180 Comm: mapdax333 Not tainted 5.2.0+ #21
[ 25.581702] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
[ 25.585624] Call trace:
[ 25.587008] dump_backtrace+0x0/0x178
[ 25.588799] show_stack+0x24/0x30
[ 25.590328] dump_stack+0xa8/0xcc
[ 25.591901] print_bad_pte+0x18c/0x218
[ 25.593628] unmap_page_range+0x778/0xc00
[ 25.595506] unmap_single_vma+0x94/0xe8
[ 25.597304] unmap_vmas+0x90/0x108
[ 25.598901] unmap_region+0xc0/0x128
[ 25.600566] __do_munmap+0x284/0x3f0
[ 25.602245] __vm_munmap+0x78/0xe0
[ 25.603820] __arm64_sys_munmap+0x34/0x48
[ 25.605709] el0_svc_common.constprop.0+0x78/0x168
[ 25.607956] el0_svc_handler+0x34/0x90
[ 25.609698] el0_svc+0x8/0xc
[...]
The root cause is in _vm_normal_page, without the PTE_SPECIAL bit,
the return value will be incorrectly set to pfn_to_page(pfn) instead
of NULL. Besides, this patch also rewrite the pmd_mkdevmap to avoid
setting PTE_SPECIAL for pmd
The MAP_SYNC test case is as follows(Provided by Yibo Cai)
$#include <stdio.h>
$#include <string.h>
$#include <unistd.h>
$#include <sys/file.h>
$#include <sys/mman.h>
$#ifndef MAP_SYNC
$#define MAP_SYNC 0x80000
$#endif
/* mount -o dax /dev/pmem0 /mnt */
$#define F "/mnt/__aaabbbcccddd__"
int main(void)
{
int fd;
char buf[4096];
void *addr;
if ((fd = open(F, O_CREAT|O_TRUNC|O_RDWR, 0644)) < 0) {
perror("open1");
return 1;
}
if (write(fd, buf, 4096) != 4096) {
perror("lseek");
return 1;
}
addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_SYNC, fd, 0);
if (addr == MAP_FAILED) {
perror("mmap");
printf("did you mount with '-o dax'?\n");
return 1;
}
memset(addr, 0x55, 4096);
if (munmap(addr, 4096) == -1) {
perror("munmap");
return 1;
}
close(fd);
return 0;
}
Fixes: 73b20c84d42d ("arm64: mm: implement pte_devmap support")
Reported-by: Yibo Cai <Yibo.Cai@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Robin Murphy <Robin.Murphy@arm.com>
Signed-off-by: Jia He <justin.he@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2019-08-07 04:58:51 +00:00
|
|
|
static inline pmd_t pmd_mkdevmap(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return pte_pmd(set_pte_bit(pmd_pte(pmd), __pgprot(PTE_DEVMAP)));
|
|
|
|
}
|
2019-07-16 23:30:51 +00:00
|
|
|
|
2017-12-13 17:07:21 +00:00
|
|
|
#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd))
|
|
|
|
#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
|
|
|
|
#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
|
|
|
|
#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
|
2013-04-19 15:23:57 +00:00
|
|
|
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
|
|
|
|
|
2018-12-11 17:10:40 +00:00
|
|
|
#define pud_young(pud) pte_young(pud_pte(pud))
|
2018-12-11 17:10:39 +00:00
|
|
|
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
|
2014-10-09 22:29:25 +00:00
|
|
|
#define pud_write(pud) pte_write(pud_pte(pud))
|
2017-12-13 17:07:21 +00:00
|
|
|
|
2018-12-11 17:10:41 +00:00
|
|
|
#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
|
|
|
|
|
2017-12-13 17:07:21 +00:00
|
|
|
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
|
|
|
|
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
|
|
|
|
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
|
|
|
|
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
|
2013-04-19 15:23:57 +00:00
|
|
|
|
arm64: mm: fix pmd_write CoW brokenness
Commit 9c7e535fcc17 ("arm64: mm: Route pmd thp functions through pte
equivalents") changed the pmd manipulator and accessor functions to
convert the target pmd to a pte, process it with the pte functions, then
convert it back. Along the way, we gained support for PTE_WRITE, however
this is completely ignored by set_pmd_at, and so we fail to set the
PMD_SECT_RDONLY for PMDs, resulting in all sorts of lovely failures (like
CoW not working).
Partially reverting the offending commit (by making use of
PMD_SECT_RDONLY explicitly for pmd_{write,wrprotect,mkwrite} functions)
leads to further issues because pmd_write can then return potentially
incorrect values for page table entries marked as RDONLY, leading to
BUG_ON(pmd_write(entry)) tripping under some THP workloads.
This patch fixes the issue by routing set_pmd_at through set_pte_at,
which correctly takes the PTE_WRITE flag into account. Given that
THP mappings are always anonymous, the additional cache-flushing code
in __sync_icache_dcache won't impose any significant overhead as the
flush will be skipped.
Cc: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Steve Capper <steve.capper@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2014-05-27 18:11:58 +00:00
|
|
|
#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
|
2013-04-19 15:23:57 +00:00
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
|
|
|
|
#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
|
|
|
|
|
2017-12-13 17:07:21 +00:00
|
|
|
#define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd))
|
|
|
|
#define __phys_to_pgd_val(phys) __phys_to_pte_val(phys)
|
|
|
|
|
2014-04-03 14:57:15 +00:00
|
|
|
#define __pgprot_modify(prot,mask,bits) \
|
|
|
|
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
|
|
|
|
|
2020-06-02 04:51:32 +00:00
|
|
|
#define pgprot_nx(prot) \
|
2020-06-15 15:27:43 +00:00
|
|
|
__pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN)
|
2020-06-02 04:51:32 +00:00
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* Mark the prot value as uncacheable and unbufferable.
|
|
|
|
*/
|
|
|
|
#define pgprot_noncached(prot) \
|
2014-03-12 16:07:06 +00:00
|
|
|
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN)
|
2012-03-05 11:49:27 +00:00
|
|
|
#define pgprot_writecombine(prot) \
|
2014-03-12 16:07:06 +00:00
|
|
|
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
|
2014-09-29 14:29:31 +00:00
|
|
|
#define pgprot_device(prot) \
|
|
|
|
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
|
2019-08-03 09:38:31 +00:00
|
|
|
/*
|
|
|
|
* DMA allocations for non-coherent devices use what the Arm architecture calls
|
|
|
|
* "Normal non-cacheable" memory, which permits speculation, unaligned accesses
|
|
|
|
* and merging of writes. This is different from "Device-nGnR[nE]" memory which
|
|
|
|
* is intended for MMIO and thus forbids speculation, preserves access size,
|
|
|
|
* requires strict alignment and can also force write responses to come from the
|
|
|
|
* endpoint.
|
|
|
|
*/
|
2019-08-26 07:03:44 +00:00
|
|
|
#define pgprot_dmacoherent(prot) \
|
|
|
|
__pgprot_modify(prot, PTE_ATTRINDX_MASK, \
|
|
|
|
PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
|
|
struct file;
|
|
|
|
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
|
|
unsigned long size, pgprot_t vma_prot);
|
|
|
|
|
|
|
|
#define pmd_none(pmd) (!pmd_val(pmd))
|
|
|
|
|
2016-05-05 09:44:01 +00:00
|
|
|
#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT))
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2012-12-07 18:35:41 +00:00
|
|
|
#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
|
|
|
|
PMD_TYPE_TABLE)
|
|
|
|
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
|
|
|
|
PMD_TYPE_SECT)
|
2020-02-04 01:35:14 +00:00
|
|
|
#define pmd_leaf(pmd) pmd_sect(pmd)
|
2012-12-07 18:35:41 +00:00
|
|
|
|
2016-02-25 15:53:44 +00:00
|
|
|
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
|
2019-07-31 20:05:45 +00:00
|
|
|
static inline bool pud_sect(pud_t pud) { return false; }
|
|
|
|
static inline bool pud_table(pud_t pud) { return true; }
|
2014-05-06 13:02:27 +00:00
|
|
|
#else
|
|
|
|
#define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
|
|
|
|
PUD_TYPE_SECT)
|
2014-12-09 07:26:47 +00:00
|
|
|
#define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \
|
|
|
|
PUD_TYPE_TABLE)
|
2014-05-06 13:02:27 +00:00
|
|
|
#endif
|
2012-12-07 18:35:41 +00:00
|
|
|
|
2018-09-24 16:15:02 +00:00
|
|
|
extern pgd_t init_pg_dir[PTRS_PER_PGD];
|
|
|
|
extern pgd_t init_pg_end[];
|
|
|
|
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
|
|
|
|
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
|
arm64/kernel: Fix range on invalidating dcache for boot page tables
Prior to commit 8eb7e28d4c642c31 ("arm64/mm: move runtime pgds to
rodata"), idmap_pgd_dir, tramp_pg_dir, reserved_ttbr0, swapper_pg_dir,
and init_pg_dir were contiguous at the end of the kernel image. The
maintenance at the end of __create_page_tables assumed these were
contiguous, and affected everything from the start of idmap_pg_dir
to the end of init_pg_dir.
That commit moved all but init_pg_dir into the .rodata section, with
other data placed between idmap_pg_dir and init_pg_dir, but did not
update the maintenance. Hence the maintenance is performed on much
more data than necessary (but as the bootloader previously made this
clean to the PoC there is no functional problem).
As we only alter idmap_pg_dir, and init_pg_dir, we only need to perform
maintenance for these. As the other dirs are in .rodata, the bootloader
will have initialised them as expected and cleaned them to the PoC. The
kernel will initialize them as necessary after enabling the MMU.
This patch reworks the maintenance to only cover the idmap_pg_dir and
init_pg_dir to avoid this unnecessary work.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20200427235700.112220-1-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
2020-04-27 23:57:00 +00:00
|
|
|
extern pgd_t idmap_pg_end[];
|
2018-09-24 16:15:02 +00:00
|
|
|
extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
|
|
|
|
|
|
|
|
extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
|
|
|
|
|
|
|
|
static inline bool in_swapper_pgdir(void *addr)
|
|
|
|
{
|
|
|
|
return ((unsigned long)addr & PAGE_MASK) ==
|
|
|
|
((unsigned long)swapper_pg_dir & PAGE_MASK);
|
|
|
|
}
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
|
|
|
|
{
|
2018-10-05 13:49:16 +00:00
|
|
|
#ifdef __PAGETABLE_PMD_FOLDED
|
|
|
|
if (in_swapper_pgdir(pmdp)) {
|
2018-09-24 16:15:02 +00:00
|
|
|
set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd)));
|
|
|
|
return;
|
|
|
|
}
|
2018-10-05 13:49:16 +00:00
|
|
|
#endif /* __PAGETABLE_PMD_FOLDED */
|
2018-09-24 16:15:02 +00:00
|
|
|
|
2018-02-15 11:14:56 +00:00
|
|
|
WRITE_ONCE(*pmdp, pmd);
|
2018-08-22 20:36:31 +00:00
|
|
|
|
Revert "arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}"
This reverts commit 24fe1b0efad4fcdd32ce46cffeab297f22581707.
Commit 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from
set_{pte,pmd,pud}") removed ISB instructions immediately following updates
to the page table, on the grounds that they are not required by the
architecture and a DSB alone is sufficient to ensure that subsequent data
accesses use the new translation:
DDI0487E_a, B2-128:
| ... no instruction that appears in program order after the DSB
| instruction can alter any state of the system or perform any part of
| its functionality until the DSB completes other than:
|
| * Being fetched from memory and decoded
| * Reading the general-purpose, SIMD and floating-point,
| Special-purpose, or System registers that are directly or indirectly
| read without causing side-effects.
However, the same document also states the following:
DDI0487E_a, B2-125:
| DMB and DSB instructions affect reads and writes to the memory system
| generated by Load/Store instructions and data or unified cache
| maintenance instructions being executed by the PE. Instruction fetches
| or accesses caused by a hardware translation table access are not
| explicit accesses.
which appears to claim that the DSB alone is insufficient. Unfortunately,
some CPU designers have followed the second clause above, whereas in Linux
we've been relying on the first. This means that our mapping sequence:
MOV X0, <valid pte>
STR X0, [Xptep] // Store new PTE to page table
DSB ISHST
LDR X1, [X2] // Translates using the new PTE
can actually raise a translation fault on the load instruction because the
translation can be performed speculatively before the page table update and
then marked as "faulting" by the CPU. For user PTEs, this is ok because we
can handle the spurious fault, but for kernel PTEs and intermediate table
entries this results in a panic().
Revert the offending commit to reintroduce the missing barriers.
Cc: <stable@vger.kernel.org>
Fixes: 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}")
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2019-08-22 13:58:37 +00:00
|
|
|
if (pmd_valid(pmd)) {
|
2018-08-22 20:36:31 +00:00
|
|
|
dsb(ishst);
|
Revert "arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}"
This reverts commit 24fe1b0efad4fcdd32ce46cffeab297f22581707.
Commit 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from
set_{pte,pmd,pud}") removed ISB instructions immediately following updates
to the page table, on the grounds that they are not required by the
architecture and a DSB alone is sufficient to ensure that subsequent data
accesses use the new translation:
DDI0487E_a, B2-128:
| ... no instruction that appears in program order after the DSB
| instruction can alter any state of the system or perform any part of
| its functionality until the DSB completes other than:
|
| * Being fetched from memory and decoded
| * Reading the general-purpose, SIMD and floating-point,
| Special-purpose, or System registers that are directly or indirectly
| read without causing side-effects.
However, the same document also states the following:
DDI0487E_a, B2-125:
| DMB and DSB instructions affect reads and writes to the memory system
| generated by Load/Store instructions and data or unified cache
| maintenance instructions being executed by the PE. Instruction fetches
| or accesses caused by a hardware translation table access are not
| explicit accesses.
which appears to claim that the DSB alone is insufficient. Unfortunately,
some CPU designers have followed the second clause above, whereas in Linux
we've been relying on the first. This means that our mapping sequence:
MOV X0, <valid pte>
STR X0, [Xptep] // Store new PTE to page table
DSB ISHST
LDR X1, [X2] // Translates using the new PTE
can actually raise a translation fault on the load instruction because the
translation can be performed speculatively before the page table update and
then marked as "faulting" by the CPU. For user PTEs, this is ok because we
can handle the spurious fault, but for kernel PTEs and intermediate table
entries this results in a panic().
Revert the offending commit to reintroduce the missing barriers.
Cc: <stable@vger.kernel.org>
Fixes: 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}")
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2019-08-22 13:58:37 +00:00
|
|
|
isb();
|
|
|
|
}
|
2012-03-05 11:49:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pmd_clear(pmd_t *pmdp)
|
|
|
|
{
|
|
|
|
set_pmd(pmdp, __pmd(0));
|
|
|
|
}
|
|
|
|
|
2016-01-25 11:45:04 +00:00
|
|
|
static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
|
2012-03-05 11:49:27 +00:00
|
|
|
{
|
2017-12-13 17:07:21 +00:00
|
|
|
return __pmd_to_phys(pmd);
|
2012-03-05 11:49:27 +00:00
|
|
|
}
|
|
|
|
|
2020-06-09 04:33:10 +00:00
|
|
|
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
|
|
|
{
|
|
|
|
return (unsigned long)__va(pmd_page_paddr(pmd));
|
|
|
|
}
|
2019-04-29 17:37:01 +00:00
|
|
|
|
2016-01-25 11:45:03 +00:00
|
|
|
/* Find an entry in the third-level page table. */
|
2017-09-29 10:29:55 +00:00
|
|
|
#define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
|
2016-01-25 11:45:03 +00:00
|
|
|
|
2016-01-25 11:45:07 +00:00
|
|
|
#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr))
|
|
|
|
#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
|
|
|
|
#define pte_clear_fixmap() clear_fixmap(FIX_PTE)
|
|
|
|
|
2020-04-27 23:46:55 +00:00
|
|
|
#define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd))
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2016-02-16 12:52:37 +00:00
|
|
|
/* use ONLY for statically allocated translation tables */
|
|
|
|
#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* Conversion functions: convert a page and protection to a page entry,
|
|
|
|
* and a page entry and page directory to the page they refer to.
|
|
|
|
*/
|
|
|
|
#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot)
|
|
|
|
|
2015-04-14 22:45:39 +00:00
|
|
|
#if CONFIG_PGTABLE_LEVELS > 2
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2014-07-21 13:52:49 +00:00
|
|
|
#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd))
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
#define pud_none(pud) (!pud_val(pud))
|
2016-05-05 09:44:01 +00:00
|
|
|
#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
|
2017-06-08 17:25:26 +00:00
|
|
|
#define pud_present(pud) pte_present(pud_pte(pud))
|
2020-02-04 01:35:14 +00:00
|
|
|
#define pud_leaf(pud) pud_sect(pud)
|
2018-08-22 20:36:31 +00:00
|
|
|
#define pud_valid(pud) pte_valid(pud_pte(pud))
|
2012-03-05 11:49:27 +00:00
|
|
|
|
|
|
|
static inline void set_pud(pud_t *pudp, pud_t pud)
|
|
|
|
{
|
2018-10-05 13:49:16 +00:00
|
|
|
#ifdef __PAGETABLE_PUD_FOLDED
|
|
|
|
if (in_swapper_pgdir(pudp)) {
|
2018-09-24 16:15:02 +00:00
|
|
|
set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
|
|
|
|
return;
|
|
|
|
}
|
2018-10-05 13:49:16 +00:00
|
|
|
#endif /* __PAGETABLE_PUD_FOLDED */
|
2018-09-24 16:15:02 +00:00
|
|
|
|
2018-02-15 11:14:56 +00:00
|
|
|
WRITE_ONCE(*pudp, pud);
|
2018-08-22 20:36:31 +00:00
|
|
|
|
Revert "arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}"
This reverts commit 24fe1b0efad4fcdd32ce46cffeab297f22581707.
Commit 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from
set_{pte,pmd,pud}") removed ISB instructions immediately following updates
to the page table, on the grounds that they are not required by the
architecture and a DSB alone is sufficient to ensure that subsequent data
accesses use the new translation:
DDI0487E_a, B2-128:
| ... no instruction that appears in program order after the DSB
| instruction can alter any state of the system or perform any part of
| its functionality until the DSB completes other than:
|
| * Being fetched from memory and decoded
| * Reading the general-purpose, SIMD and floating-point,
| Special-purpose, or System registers that are directly or indirectly
| read without causing side-effects.
However, the same document also states the following:
DDI0487E_a, B2-125:
| DMB and DSB instructions affect reads and writes to the memory system
| generated by Load/Store instructions and data or unified cache
| maintenance instructions being executed by the PE. Instruction fetches
| or accesses caused by a hardware translation table access are not
| explicit accesses.
which appears to claim that the DSB alone is insufficient. Unfortunately,
some CPU designers have followed the second clause above, whereas in Linux
we've been relying on the first. This means that our mapping sequence:
MOV X0, <valid pte>
STR X0, [Xptep] // Store new PTE to page table
DSB ISHST
LDR X1, [X2] // Translates using the new PTE
can actually raise a translation fault on the load instruction because the
translation can be performed speculatively before the page table update and
then marked as "faulting" by the CPU. For user PTEs, this is ok because we
can handle the spurious fault, but for kernel PTEs and intermediate table
entries this results in a panic().
Revert the offending commit to reintroduce the missing barriers.
Cc: <stable@vger.kernel.org>
Fixes: 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}")
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2019-08-22 13:58:37 +00:00
|
|
|
if (pud_valid(pud)) {
|
2018-08-22 20:36:31 +00:00
|
|
|
dsb(ishst);
|
Revert "arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}"
This reverts commit 24fe1b0efad4fcdd32ce46cffeab297f22581707.
Commit 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from
set_{pte,pmd,pud}") removed ISB instructions immediately following updates
to the page table, on the grounds that they are not required by the
architecture and a DSB alone is sufficient to ensure that subsequent data
accesses use the new translation:
DDI0487E_a, B2-128:
| ... no instruction that appears in program order after the DSB
| instruction can alter any state of the system or perform any part of
| its functionality until the DSB completes other than:
|
| * Being fetched from memory and decoded
| * Reading the general-purpose, SIMD and floating-point,
| Special-purpose, or System registers that are directly or indirectly
| read without causing side-effects.
However, the same document also states the following:
DDI0487E_a, B2-125:
| DMB and DSB instructions affect reads and writes to the memory system
| generated by Load/Store instructions and data or unified cache
| maintenance instructions being executed by the PE. Instruction fetches
| or accesses caused by a hardware translation table access are not
| explicit accesses.
which appears to claim that the DSB alone is insufficient. Unfortunately,
some CPU designers have followed the second clause above, whereas in Linux
we've been relying on the first. This means that our mapping sequence:
MOV X0, <valid pte>
STR X0, [Xptep] // Store new PTE to page table
DSB ISHST
LDR X1, [X2] // Translates using the new PTE
can actually raise a translation fault on the load instruction because the
translation can be performed speculatively before the page table update and
then marked as "faulting" by the CPU. For user PTEs, this is ok because we
can handle the spurious fault, but for kernel PTEs and intermediate table
entries this results in a panic().
Revert the offending commit to reintroduce the missing barriers.
Cc: <stable@vger.kernel.org>
Fixes: 24fe1b0efad4fcdd ("arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}")
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2019-08-22 13:58:37 +00:00
|
|
|
isb();
|
|
|
|
}
|
2012-03-05 11:49:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void pud_clear(pud_t *pudp)
|
|
|
|
{
|
|
|
|
set_pud(pudp, __pud(0));
|
|
|
|
}
|
|
|
|
|
2016-01-25 11:45:04 +00:00
|
|
|
static inline phys_addr_t pud_page_paddr(pud_t pud)
|
2012-03-05 11:49:27 +00:00
|
|
|
{
|
2017-12-13 17:07:21 +00:00
|
|
|
return __pud_to_phys(pud);
|
2012-03-05 11:49:27 +00:00
|
|
|
}
|
|
|
|
|
2020-06-09 04:33:10 +00:00
|
|
|
static inline unsigned long pud_page_vaddr(pud_t pud)
|
|
|
|
{
|
|
|
|
return (unsigned long)__va(pud_page_paddr(pud));
|
|
|
|
}
|
2014-07-21 13:52:49 +00:00
|
|
|
|
2020-06-09 04:33:10 +00:00
|
|
|
/* Find an entry in the second-level page table. */
|
2018-02-15 11:14:56 +00:00
|
|
|
#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
|
2014-07-21 13:52:49 +00:00
|
|
|
|
2016-01-25 11:45:07 +00:00
|
|
|
#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
|
|
|
|
#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr))
|
|
|
|
#define pmd_clear_fixmap() clear_fixmap(FIX_PMD)
|
2014-07-21 13:52:49 +00:00
|
|
|
|
2020-04-27 23:46:55 +00:00
|
|
|
#define pud_page(pud) phys_to_page(__pud_to_phys(pud))
|
2014-10-09 22:29:25 +00:00
|
|
|
|
2016-02-16 12:52:37 +00:00
|
|
|
/* use ONLY for statically allocated translation tables */
|
|
|
|
#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
|
|
|
|
|
2016-01-25 11:45:04 +00:00
|
|
|
#else
|
|
|
|
|
|
|
|
#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
|
|
|
|
|
2016-01-25 11:45:07 +00:00
|
|
|
/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
|
|
|
|
#define pmd_set_fixmap(addr) NULL
|
|
|
|
#define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp)
|
|
|
|
#define pmd_clear_fixmap()
|
|
|
|
|
2016-02-16 12:52:37 +00:00
|
|
|
#define pmd_offset_kimg(dir,addr) ((pmd_t *)dir)
|
|
|
|
|
2015-04-14 22:45:39 +00:00
|
|
|
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
|
2012-03-05 11:49:27 +00:00
|
|
|
|
2015-04-14 22:45:39 +00:00
|
|
|
#if CONFIG_PGTABLE_LEVELS > 3
|
2014-05-12 09:40:51 +00:00
|
|
|
|
2014-07-21 13:52:49 +00:00
|
|
|
#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud))
|
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
#define p4d_none(p4d) (!p4d_val(p4d))
|
|
|
|
#define p4d_bad(p4d) (!(p4d_val(p4d) & 2))
|
|
|
|
#define p4d_present(p4d) (p4d_val(p4d))
|
2014-05-12 09:40:51 +00:00
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
|
2014-05-12 09:40:51 +00:00
|
|
|
{
|
2020-06-04 23:46:23 +00:00
|
|
|
if (in_swapper_pgdir(p4dp)) {
|
|
|
|
set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d)));
|
2018-09-24 16:15:02 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
WRITE_ONCE(*p4dp, p4d);
|
2014-05-12 09:40:51 +00:00
|
|
|
dsb(ishst);
|
2019-08-23 12:03:55 +00:00
|
|
|
isb();
|
2014-05-12 09:40:51 +00:00
|
|
|
}
|
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
static inline void p4d_clear(p4d_t *p4dp)
|
2014-05-12 09:40:51 +00:00
|
|
|
{
|
2020-06-04 23:46:23 +00:00
|
|
|
set_p4d(p4dp, __p4d(0));
|
2014-05-12 09:40:51 +00:00
|
|
|
}
|
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
|
2014-05-12 09:40:51 +00:00
|
|
|
{
|
2020-06-04 23:46:23 +00:00
|
|
|
return __p4d_to_phys(p4d);
|
2014-05-12 09:40:51 +00:00
|
|
|
}
|
|
|
|
|
2020-06-09 04:33:10 +00:00
|
|
|
static inline unsigned long p4d_page_vaddr(p4d_t p4d)
|
|
|
|
{
|
|
|
|
return (unsigned long)__va(p4d_page_paddr(p4d));
|
|
|
|
}
|
2014-07-21 13:52:49 +00:00
|
|
|
|
2020-06-09 04:33:10 +00:00
|
|
|
/* Find an entry in the frst-level page table. */
|
2020-06-04 23:46:23 +00:00
|
|
|
#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
|
2014-07-21 13:52:49 +00:00
|
|
|
|
2016-01-25 11:45:07 +00:00
|
|
|
#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
|
2020-06-04 23:46:23 +00:00
|
|
|
#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr))
|
2016-01-25 11:45:07 +00:00
|
|
|
#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
|
2014-07-21 13:52:49 +00:00
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
#define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
|
2014-12-20 00:49:40 +00:00
|
|
|
|
2016-02-16 12:52:37 +00:00
|
|
|
/* use ONLY for statically allocated translation tables */
|
|
|
|
#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
|
|
|
|
|
2016-01-25 11:45:04 +00:00
|
|
|
#else
|
|
|
|
|
2020-06-04 23:46:23 +00:00
|
|
|
#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;})
|
2016-01-25 11:45:04 +00:00
|
|
|
#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;})
|
|
|
|
|
2016-01-25 11:45:07 +00:00
|
|
|
/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
|
|
|
|
#define pud_set_fixmap(addr) NULL
|
|
|
|
#define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp)
|
|
|
|
#define pud_clear_fixmap()
|
|
|
|
|
2016-02-16 12:52:37 +00:00
|
|
|
#define pud_offset_kimg(dir,addr) ((pud_t *)dir)
|
|
|
|
|
2015-04-14 22:45:39 +00:00
|
|
|
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
2014-05-12 09:40:51 +00:00
|
|
|
|
2014-07-21 13:52:49 +00:00
|
|
|
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
|
|
|
|
|
2016-01-25 11:45:07 +00:00
|
|
|
#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
|
|
|
|
#define pgd_clear_fixmap() clear_fixmap(FIX_PGD)
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
|
|
{
|
2012-12-18 14:15:15 +00:00
|
|
|
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
|
2020-03-16 16:50:45 +00:00
|
|
|
PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP;
|
2015-07-10 16:24:28 +00:00
|
|
|
/* preserve the hardware dirty information */
|
|
|
|
if (pte_hw_dirty(pte))
|
2015-09-11 17:22:01 +00:00
|
|
|
pte = pte_mkdirty(pte);
|
2012-03-05 11:49:27 +00:00
|
|
|
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2014-02-25 10:02:13 +00:00
|
|
|
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
|
|
|
|
{
|
|
|
|
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
|
|
|
|
}
|
|
|
|
|
2016-04-13 15:01:22 +00:00
|
|
|
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
|
|
extern int ptep_set_access_flags(struct vm_area_struct *vma,
|
|
|
|
unsigned long address, pte_t *ptep,
|
|
|
|
pte_t entry, int dirty);
|
|
|
|
|
2016-05-05 09:44:00 +00:00
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
|
|
|
|
static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
|
|
|
|
unsigned long address, pmd_t *pmdp,
|
|
|
|
pmd_t entry, int dirty)
|
|
|
|
{
|
|
|
|
return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
|
|
|
|
}
|
2019-07-16 23:30:51 +00:00
|
|
|
|
|
|
|
static inline int pud_devmap(pud_t pud)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int pgd_devmap(pgd_t pgd)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2016-05-05 09:44:00 +00:00
|
|
|
#endif
|
|
|
|
|
2015-07-10 16:24:28 +00:00
|
|
|
/*
|
|
|
|
* Atomic pte/pmd modifications.
|
|
|
|
*/
|
|
|
|
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
2016-04-13 16:57:37 +00:00
|
|
|
static inline int __ptep_test_and_clear_young(pte_t *ptep)
|
2015-07-10 16:24:28 +00:00
|
|
|
{
|
2017-06-26 13:27:36 +00:00
|
|
|
pte_t old_pte, pte;
|
2015-07-10 16:24:28 +00:00
|
|
|
|
2017-06-26 13:27:36 +00:00
|
|
|
pte = READ_ONCE(*ptep);
|
|
|
|
do {
|
|
|
|
old_pte = pte;
|
|
|
|
pte = pte_mkold(pte);
|
|
|
|
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
|
|
|
|
pte_val(old_pte), pte_val(pte));
|
|
|
|
} while (pte_val(pte) != pte_val(old_pte));
|
2015-07-10 16:24:28 +00:00
|
|
|
|
2017-06-26 13:27:36 +00:00
|
|
|
return pte_young(pte);
|
2015-07-10 16:24:28 +00:00
|
|
|
}
|
|
|
|
|
2016-04-13 16:57:37 +00:00
|
|
|
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
|
|
|
unsigned long address,
|
|
|
|
pte_t *ptep)
|
|
|
|
{
|
|
|
|
return __ptep_test_and_clear_young(ptep);
|
|
|
|
}
|
|
|
|
|
2018-10-29 09:25:58 +00:00
|
|
|
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
|
|
|
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
|
|
|
|
unsigned long address, pte_t *ptep)
|
|
|
|
{
|
|
|
|
int young = ptep_test_and_clear_young(vma, address, ptep);
|
|
|
|
|
|
|
|
if (young) {
|
|
|
|
/*
|
|
|
|
* We can elide the trailing DSB here since the worst that can
|
|
|
|
* happen is that a CPU continues to use the young entry in its
|
|
|
|
* TLB and we mistakenly reclaim the associated page. The
|
|
|
|
* window for such an event is bounded by the next
|
|
|
|
* context-switch, which provides a DSB to complete the TLB
|
|
|
|
* invalidation.
|
|
|
|
*/
|
|
|
|
flush_tlb_page_nosync(vma, address);
|
|
|
|
}
|
|
|
|
|
|
|
|
return young;
|
|
|
|
}
|
|
|
|
|
2015-07-10 16:24:28 +00:00
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
|
|
|
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
|
|
|
unsigned long address,
|
|
|
|
pmd_t *pmdp)
|
|
|
|
{
|
|
|
|
return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
|
|
|
|
|
|
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
|
|
|
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
|
|
|
unsigned long address, pte_t *ptep)
|
|
|
|
{
|
2017-06-26 13:27:36 +00:00
|
|
|
return __pte(xchg_relaxed(&pte_val(*ptep), 0));
|
2015-07-10 16:24:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
2016-05-05 09:43:59 +00:00
|
|
|
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
|
|
|
|
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
|
|
|
|
unsigned long address, pmd_t *pmdp)
|
2015-07-10 16:24:28 +00:00
|
|
|
{
|
|
|
|
return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
|
|
|
|
|
|
/*
|
2017-12-01 17:22:14 +00:00
|
|
|
* ptep_set_wrprotect - mark read-only while trasferring potential hardware
|
|
|
|
* dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
|
2015-07-10 16:24:28 +00:00
|
|
|
*/
|
|
|
|
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
|
|
|
|
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
|
|
|
|
{
|
2017-06-26 13:27:36 +00:00
|
|
|
pte_t old_pte, pte;
|
|
|
|
|
|
|
|
pte = READ_ONCE(*ptep);
|
|
|
|
do {
|
|
|
|
old_pte = pte;
|
2017-12-01 17:22:14 +00:00
|
|
|
/*
|
|
|
|
* If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
|
|
|
|
* clear), set the PTE_DIRTY bit.
|
|
|
|
*/
|
|
|
|
if (pte_hw_dirty(pte))
|
|
|
|
pte = pte_mkdirty(pte);
|
2017-06-26 13:27:36 +00:00
|
|
|
pte = pte_wrprotect(pte);
|
|
|
|
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
|
|
|
|
pte_val(old_pte), pte_val(pte));
|
|
|
|
} while (pte_val(pte) != pte_val(old_pte));
|
2015-07-10 16:24:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
|
|
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
|
|
|
|
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
|
|
|
|
unsigned long address, pmd_t *pmdp)
|
|
|
|
{
|
|
|
|
ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
|
|
|
|
}
|
2018-02-01 00:17:55 +00:00
|
|
|
|
|
|
|
#define pmdp_establish pmdp_establish
|
|
|
|
static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
|
|
|
unsigned long address, pmd_t *pmdp, pmd_t pmd)
|
|
|
|
{
|
|
|
|
return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
|
|
|
|
}
|
2015-07-10 16:24:28 +00:00
|
|
|
#endif
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
/*
|
|
|
|
* Encode and decode a swap entry:
|
2013-11-27 16:59:27 +00:00
|
|
|
* bits 0-1: present (must be zero)
|
2015-02-10 22:10:15 +00:00
|
|
|
* bits 2-7: swap type
|
|
|
|
* bits 8-57: swap offset
|
2016-03-09 16:31:29 +00:00
|
|
|
* bit 58: PTE_PROT_NONE (must be zero)
|
2012-03-05 11:49:27 +00:00
|
|
|
*/
|
2015-02-10 22:10:15 +00:00
|
|
|
#define __SWP_TYPE_SHIFT 2
|
2012-03-05 11:49:27 +00:00
|
|
|
#define __SWP_TYPE_BITS 6
|
2015-02-10 22:10:15 +00:00
|
|
|
#define __SWP_OFFSET_BITS 50
|
2012-03-05 11:49:27 +00:00
|
|
|
#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
|
|
|
|
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
|
2013-11-27 16:59:27 +00:00
|
|
|
#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1)
|
2012-03-05 11:49:27 +00:00
|
|
|
|
|
|
|
#define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
|
2013-11-27 16:59:27 +00:00
|
|
|
#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK)
|
2012-03-05 11:49:27 +00:00
|
|
|
#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
|
|
|
|
|
|
|
|
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
|
|
|
#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure that there are not more swap files than can be encoded in the kernel
|
2014-03-11 10:23:39 +00:00
|
|
|
* PTEs.
|
2012-03-05 11:49:27 +00:00
|
|
|
*/
|
|
|
|
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
|
|
|
|
|
|
|
|
extern int kern_addr_valid(unsigned long addr);
|
|
|
|
|
2015-07-16 18:26:02 +00:00
|
|
|
/*
|
|
|
|
* On AArch64, the cache coherency is handled via the set_pte_at() function.
|
|
|
|
*/
|
|
|
|
static inline void update_mmu_cache(struct vm_area_struct *vma,
|
|
|
|
unsigned long addr, pte_t *ptep)
|
|
|
|
{
|
|
|
|
/*
|
2015-10-06 17:46:30 +00:00
|
|
|
* We don't do anything here, so there's a very small chance of
|
|
|
|
* us retaking a user fault which we just fixed up. The alternative
|
|
|
|
* is doing a dsb(ishst), but that penalises the fastpath.
|
2015-07-16 18:26:02 +00:00
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
|
|
|
|
|
2017-12-13 17:07:18 +00:00
|
|
|
#ifdef CONFIG_ARM64_PA_BITS_52
|
|
|
|
#define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
|
|
|
|
#else
|
|
|
|
#define phys_to_ttbr(addr) (addr)
|
|
|
|
#endif
|
|
|
|
|
2019-10-11 14:09:37 +00:00
|
|
|
/*
|
|
|
|
* On arm64 without hardware Access Flag, copying from user will fail because
|
|
|
|
* the pte is old and cannot be marked young. So we always end up with zeroed
|
|
|
|
* page after fork() + CoW for pfn mappings. We don't always have a
|
|
|
|
* hardware-managed access flag on arm64.
|
|
|
|
*/
|
|
|
|
static inline bool arch_faults_on_old_pte(void)
|
|
|
|
{
|
|
|
|
WARN_ON(preemptible());
|
|
|
|
|
|
|
|
return !cpu_has_hw_af();
|
|
|
|
}
|
|
|
|
#define arch_faults_on_old_pte arch_faults_on_old_pte
|
|
|
|
|
2012-03-05 11:49:27 +00:00
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
|
|
|
|
#endif /* __ASM_PGTABLE_H */
|