linux/arch/arm64/mm/pageattr.c
Mike Rapoport 6d47c23b16 set_memory: allow querying whether set_direct_map_*() is actually enabled
On arm64, set_direct_map_*() functions may return 0 without actually
changing the linear map.  This behaviour can be controlled using kernel
parameters, so we need a way to determine at runtime whether calls to
set_direct_map_invalid_noflush() and set_direct_map_default_noflush() have
any effect.

Extend set_memory API with can_set_direct_map() function that allows
checking if calling set_direct_map_*() will actually change the page
table, replace several occurrences of open coded checks in arm64 with the
new function and provide a generic stub for architectures that always
modify page tables upon calls to set_direct_map APIs.

[arnd@arndb.de: arm64: kfence: fix header inclusion ]

Link: https://lkml.kernel.org/r/20210518072034.31572-4-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christopher Lameter <cl@linux.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Bottomley <jejb@linux.ibm.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Palmer Dabbelt <palmerdabbelt@google.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tycho Andersen <tycho@tycho.ws>
Cc: Will Deacon <will@kernel.org>
Cc: kernel test robot <lkp@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-07-08 11:48:20 -07:00

242 lines
5.9 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2014, The Linux Foundation. All rights reserved.
*/
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/tlbflush.h>
struct page_change_data {
pgprot_t set_mask;
pgprot_t clear_mask;
};
bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED);
bool can_set_direct_map(void)
{
return rodata_full || debug_pagealloc_enabled();
}
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
pte_t pte = READ_ONCE(*ptep);
pte = clear_pte_bit(pte, cdata->clear_mask);
pte = set_pte_bit(pte, cdata->set_mask);
set_pte(ptep, pte);
return 0;
}
/*
* This function assumes that the range is mapped with PAGE_SIZE pages.
*/
static int __change_memory_common(unsigned long start, unsigned long size,
pgprot_t set_mask, pgprot_t clear_mask)
{
struct page_change_data data;
int ret;
data.set_mask = set_mask;
data.clear_mask = clear_mask;
ret = apply_to_page_range(&init_mm, start, size, change_page_range,
&data);
flush_tlb_kernel_range(start, start + size);
return ret;
}
static int change_memory_common(unsigned long addr, int numpages,
pgprot_t set_mask, pgprot_t clear_mask)
{
unsigned long start = addr;
unsigned long size = PAGE_SIZE * numpages;
unsigned long end = start + size;
struct vm_struct *area;
int i;
if (!PAGE_ALIGNED(addr)) {
start &= PAGE_MASK;
end = start + size;
WARN_ON_ONCE(1);
}
/*
* Kernel VA mappings are always live, and splitting live section
* mappings into page mappings may cause TLB conflicts. This means
* we have to ensure that changing the permission bits of the range
* we are operating on does not result in such splitting.
*
* Let's restrict ourselves to mappings created by vmalloc (or vmap).
* Those are guaranteed to consist entirely of page mappings, and
* splitting is never needed.
*
* So check whether the [addr, addr + size) interval is entirely
* covered by precisely one VM area that has the VM_ALLOC flag set.
*/
area = find_vm_area((void *)addr);
if (!area ||
end > (unsigned long)area->addr + area->size ||
!(area->flags & VM_ALLOC))
return -EINVAL;
if (!numpages)
return 0;
/*
* If we are manipulating read-only permissions, apply the same
* change to the linear mapping of the pages that back this VM area.
*/
if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
pgprot_val(clear_mask) == PTE_RDONLY)) {
for (i = 0; i < area->nr_pages; i++) {
__change_memory_common((u64)page_address(area->pages[i]),
PAGE_SIZE, set_mask, clear_mask);
}
}
/*
* Get rid of potentially aliasing lazily unmapped vm areas that may
* have permissions set that deviate from the ones we are setting here.
*/
vm_unmap_aliases();
return __change_memory_common(start, size, set_mask, clear_mask);
}
int set_memory_ro(unsigned long addr, int numpages)
{
return change_memory_common(addr, numpages,
__pgprot(PTE_RDONLY),
__pgprot(PTE_WRITE));
}
int set_memory_rw(unsigned long addr, int numpages)
{
return change_memory_common(addr, numpages,
__pgprot(PTE_WRITE),
__pgprot(PTE_RDONLY));
}
int set_memory_nx(unsigned long addr, int numpages)
{
return change_memory_common(addr, numpages,
__pgprot(PTE_PXN),
__pgprot(PTE_MAYBE_GP));
}
int set_memory_x(unsigned long addr, int numpages)
{
return change_memory_common(addr, numpages,
__pgprot(PTE_MAYBE_GP),
__pgprot(PTE_PXN));
}
int set_memory_valid(unsigned long addr, int numpages, int enable)
{
if (enable)
return __change_memory_common(addr, PAGE_SIZE * numpages,
__pgprot(PTE_VALID),
__pgprot(0));
else
return __change_memory_common(addr, PAGE_SIZE * numpages,
__pgprot(0),
__pgprot(PTE_VALID));
}
int set_direct_map_invalid_noflush(struct page *page)
{
struct page_change_data data = {
.set_mask = __pgprot(0),
.clear_mask = __pgprot(PTE_VALID),
};
if (!can_set_direct_map())
return 0;
return apply_to_page_range(&init_mm,
(unsigned long)page_address(page),
PAGE_SIZE, change_page_range, &data);
}
int set_direct_map_default_noflush(struct page *page)
{
struct page_change_data data = {
.set_mask = __pgprot(PTE_VALID | PTE_WRITE),
.clear_mask = __pgprot(PTE_RDONLY),
};
if (!can_set_direct_map())
return 0;
return apply_to_page_range(&init_mm,
(unsigned long)page_address(page),
PAGE_SIZE, change_page_range, &data);
}
#ifdef CONFIG_DEBUG_PAGEALLOC
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
if (!can_set_direct_map())
return;
set_memory_valid((unsigned long)page_address(page), numpages, enable);
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
/*
* This function is used to determine if a linear map page has been marked as
* not-valid. Walk the page table and check the PTE_VALID bit. This is based
* on kern_addr_valid(), which almost does what we need.
*
* Because this is only called on the kernel linear map, p?d_sect() implies
* p?d_present(). When debug_pagealloc is enabled, sections mappings are
* disabled.
*/
bool kernel_page_present(struct page *page)
{
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
if (!can_set_direct_map())
return true;
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
p4dp = p4d_offset(pgdp, addr);
if (p4d_none(READ_ONCE(*p4dp)))
return false;
pudp = pud_offset(p4dp, addr);
pud = READ_ONCE(*pudp);
if (pud_none(pud))
return false;
if (pud_sect(pud))
return true;
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp);
if (pmd_none(pmd))
return false;
if (pmd_sect(pmd))
return true;
ptep = pte_offset_kernel(pmdp, addr);
return pte_valid(READ_ONCE(*ptep));
}