forked from Minki/linux
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm changes from Ingo Molnar: "The main changes are: continued PAT work by Toshi Kani, plus a new boot time warning about insecure RWX kernel mappings, by Stephen Smalley. The new CONFIG_DEBUG_WX=y warning is marked default-y if CONFIG_DEBUG_RODATA=y is already eanbled, as a special exception, as these bugs are hard to notice and this check already found several live bugs" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Warn on W^X mappings x86/mm: Fix no-change case in try_preserve_large_page() x86/mm: Fix __split_large_page() to handle large PAT bit x86/mm: Fix try_preserve_large_page() to handle large PAT bit x86/mm: Fix gup_huge_p?d() to handle large PAT bit x86/mm: Fix slow_virt_to_phys() to handle large PAT bit x86/mm: Fix page table dump to show PAT bit x86/asm: Add pud_pgprot() and pmd_pgprot() x86/asm: Fix pud/pmd interfaces to handle large PAT bit x86/asm: Add pud/pmd mask interfaces to handle large PAT bit x86/asm: Move PUD_PAGE macros to page_types.h x86/vdso32: Define PGTABLE_LEVELS to 32bit VDSO
This commit is contained in:
commit
639ab3eb38
@ -65,10 +65,14 @@ config EARLY_PRINTK_EFI
|
||||
This is useful for kernel debugging when your machine crashes very
|
||||
early before the console code is initialized.
|
||||
|
||||
config X86_PTDUMP_CORE
|
||||
def_bool n
|
||||
|
||||
config X86_PTDUMP
|
||||
bool "Export kernel pagetable layout to userspace via debugfs"
|
||||
depends on DEBUG_KERNEL
|
||||
select DEBUG_FS
|
||||
select X86_PTDUMP_CORE
|
||||
---help---
|
||||
Say Y here if you want to show the kernel pagetable layout in a
|
||||
debugfs file. This information is only useful for kernel developers
|
||||
@ -79,7 +83,8 @@ config X86_PTDUMP
|
||||
|
||||
config EFI_PGT_DUMP
|
||||
bool "Dump the EFI pagetable"
|
||||
depends on EFI && X86_PTDUMP
|
||||
depends on EFI
|
||||
select X86_PTDUMP_CORE
|
||||
---help---
|
||||
Enable this if you want to dump the EFI page table before
|
||||
enabling virtual mode. This can be used to debug miscellaneous
|
||||
@ -105,6 +110,35 @@ config DEBUG_RODATA_TEST
|
||||
feature as well as for the change_page_attr() infrastructure.
|
||||
If in doubt, say "N"
|
||||
|
||||
config DEBUG_WX
|
||||
bool "Warn on W+X mappings at boot"
|
||||
depends on DEBUG_RODATA
|
||||
default y
|
||||
select X86_PTDUMP_CORE
|
||||
---help---
|
||||
Generate a warning if any W+X mappings are found at boot.
|
||||
|
||||
This is useful for discovering cases where the kernel is leaving
|
||||
W+X mappings after applying NX, as such mappings are a security risk.
|
||||
|
||||
Look for a message in dmesg output like this:
|
||||
|
||||
x86/mm: Checked W+X mappings: passed, no W+X pages found.
|
||||
|
||||
or like this, if the check failed:
|
||||
|
||||
x86/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
|
||||
|
||||
Note that even if the check fails, your kernel is possibly
|
||||
still fine, as W+X mappings are not a security hole in
|
||||
themselves, what they do is that they make the exploitation
|
||||
of other unfixed kernel bugs easier.
|
||||
|
||||
There is no runtime or memory usage effect of this option
|
||||
once the kernel has booted up - it's a one time check.
|
||||
|
||||
If in doubt, say "Y".
|
||||
|
||||
config DEBUG_SET_MODULE_RONX
|
||||
bool "Set loadable kernel module data as NX and text as RO"
|
||||
depends on MODULES
|
||||
|
@ -14,11 +14,13 @@
|
||||
*/
|
||||
#undef CONFIG_64BIT
|
||||
#undef CONFIG_X86_64
|
||||
#undef CONFIG_PGTABLE_LEVELS
|
||||
#undef CONFIG_ILLEGAL_POINTER_VALUE
|
||||
#undef CONFIG_SPARSEMEM_VMEMMAP
|
||||
#undef CONFIG_NR_CPUS
|
||||
|
||||
#define CONFIG_X86_32 1
|
||||
#define CONFIG_PGTABLE_LEVELS 2
|
||||
#define CONFIG_PAGE_OFFSET 0
|
||||
#define CONFIG_ILLEGAL_POINTER_VALUE 0
|
||||
#define CONFIG_NR_CPUS 1
|
||||
|
@ -26,9 +26,6 @@
|
||||
#define MCE_STACK 4
|
||||
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
|
||||
|
||||
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
|
||||
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
|
||||
|
||||
/*
|
||||
* Set __PAGE_OFFSET to the most negative possible address +
|
||||
* PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
|
||||
|
@ -20,6 +20,9 @@
|
||||
#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
|
||||
#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
|
||||
|
||||
#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
|
||||
#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
|
||||
|
||||
#define HPAGE_SHIFT PMD_SHIFT
|
||||
#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
|
||||
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
|
||||
|
@ -19,6 +19,13 @@
|
||||
#include <asm/x86_init.h>
|
||||
|
||||
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
|
||||
void ptdump_walk_pgd_level_checkwx(void);
|
||||
|
||||
#ifdef CONFIG_DEBUG_WX
|
||||
#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
|
||||
#else
|
||||
#define debug_checkwx() do { } while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ZERO_PAGE is a global shared page that is always zero: used
|
||||
@ -142,12 +149,12 @@ static inline unsigned long pte_pfn(pte_t pte)
|
||||
|
||||
static inline unsigned long pmd_pfn(pmd_t pmd)
|
||||
{
|
||||
return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
|
||||
return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned long pud_pfn(pud_t pud)
|
||||
{
|
||||
return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
|
||||
return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
|
||||
@ -379,7 +386,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
|
||||
return __pgprot(preservebits | addbits);
|
||||
}
|
||||
|
||||
#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
|
||||
#define pte_pgprot(x) __pgprot(pte_flags(x))
|
||||
#define pmd_pgprot(x) __pgprot(pmd_flags(x))
|
||||
#define pud_pgprot(x) __pgprot(pud_flags(x))
|
||||
|
||||
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
|
||||
|
||||
@ -502,14 +511,15 @@ static inline int pmd_none(pmd_t pmd)
|
||||
|
||||
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
||||
{
|
||||
return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
|
||||
return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently stuck as a macro due to indirect forward reference to
|
||||
* linux/mmzone.h's __section_mem_map_addr() definition:
|
||||
*/
|
||||
#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT)
|
||||
#define pmd_page(pmd) \
|
||||
pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
|
||||
|
||||
/*
|
||||
* the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
|
||||
@ -570,14 +580,15 @@ static inline int pud_present(pud_t pud)
|
||||
|
||||
static inline unsigned long pud_page_vaddr(pud_t pud)
|
||||
{
|
||||
return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
|
||||
return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently stuck as a macro due to indirect forward reference to
|
||||
* linux/mmzone.h's __section_mem_map_addr() definition:
|
||||
*/
|
||||
#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
|
||||
#define pud_page(pud) \
|
||||
pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
|
||||
|
||||
/* Find an entry in the second-level page table.. */
|
||||
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
|
||||
|
@ -209,10 +209,10 @@ enum page_cache_mode {
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
|
||||
/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
|
||||
#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
|
||||
|
||||
/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
|
||||
/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
|
||||
#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
|
||||
|
||||
typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
|
||||
@ -276,14 +276,46 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline pudval_t pud_pfn_mask(pud_t pud)
|
||||
{
|
||||
if (native_pud_val(pud) & _PAGE_PSE)
|
||||
return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
|
||||
else
|
||||
return PTE_PFN_MASK;
|
||||
}
|
||||
|
||||
static inline pudval_t pud_flags_mask(pud_t pud)
|
||||
{
|
||||
if (native_pud_val(pud) & _PAGE_PSE)
|
||||
return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
|
||||
else
|
||||
return ~PTE_PFN_MASK;
|
||||
}
|
||||
|
||||
static inline pudval_t pud_flags(pud_t pud)
|
||||
{
|
||||
return native_pud_val(pud) & PTE_FLAGS_MASK;
|
||||
return native_pud_val(pud) & pud_flags_mask(pud);
|
||||
}
|
||||
|
||||
static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
|
||||
{
|
||||
if (native_pmd_val(pmd) & _PAGE_PSE)
|
||||
return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
|
||||
else
|
||||
return PTE_PFN_MASK;
|
||||
}
|
||||
|
||||
static inline pmdval_t pmd_flags_mask(pmd_t pmd)
|
||||
{
|
||||
if (native_pmd_val(pmd) & _PAGE_PSE)
|
||||
return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
|
||||
else
|
||||
return ~PTE_PFN_MASK;
|
||||
}
|
||||
|
||||
static inline pmdval_t pmd_flags(pmd_t pmd)
|
||||
{
|
||||
return native_pmd_val(pmd) & PTE_FLAGS_MASK;
|
||||
return native_pmd_val(pmd) & pmd_flags_mask(pmd);
|
||||
}
|
||||
|
||||
static inline pte_t native_make_pte(pteval_t val)
|
||||
|
@ -14,7 +14,7 @@ obj-$(CONFIG_SMP) += tlb.o
|
||||
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
|
||||
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
|
||||
obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o
|
||||
|
||||
obj-$(CONFIG_HIGHMEM) += highmem_32.o
|
||||
|
||||
|
@ -32,6 +32,8 @@ struct pg_state {
|
||||
const struct addr_marker *marker;
|
||||
unsigned long lines;
|
||||
bool to_dmesg;
|
||||
bool check_wx;
|
||||
unsigned long wx_pages;
|
||||
};
|
||||
|
||||
struct addr_marker {
|
||||
@ -155,7 +157,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
|
||||
pt_dump_cont_printf(m, dmsg, " ");
|
||||
if ((level == 4 && pr & _PAGE_PAT) ||
|
||||
((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
|
||||
pt_dump_cont_printf(m, dmsg, "pat ");
|
||||
pt_dump_cont_printf(m, dmsg, "PAT ");
|
||||
else
|
||||
pt_dump_cont_printf(m, dmsg, " ");
|
||||
if (pr & _PAGE_GLOBAL)
|
||||
@ -198,8 +200,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
|
||||
* we have now. "break" is either changing perms, levels or
|
||||
* address space marker.
|
||||
*/
|
||||
prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
|
||||
cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
|
||||
prot = pgprot_val(new_prot);
|
||||
cur = pgprot_val(st->current_prot);
|
||||
|
||||
if (!st->level) {
|
||||
/* First entry */
|
||||
@ -214,6 +216,16 @@ static void note_page(struct seq_file *m, struct pg_state *st,
|
||||
const char *unit = units;
|
||||
unsigned long delta;
|
||||
int width = sizeof(unsigned long) * 2;
|
||||
pgprotval_t pr = pgprot_val(st->current_prot);
|
||||
|
||||
if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
|
||||
WARN_ONCE(1,
|
||||
"x86/mm: Found insecure W+X mapping at address %p/%pS\n",
|
||||
(void *)st->start_address,
|
||||
(void *)st->start_address);
|
||||
st->wx_pages += (st->current_address -
|
||||
st->start_address) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now print the actual finished series
|
||||
@ -269,13 +281,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
|
||||
{
|
||||
int i;
|
||||
pte_t *start;
|
||||
pgprotval_t prot;
|
||||
|
||||
start = (pte_t *) pmd_page_vaddr(addr);
|
||||
for (i = 0; i < PTRS_PER_PTE; i++) {
|
||||
pgprot_t prot = pte_pgprot(*start);
|
||||
|
||||
prot = pte_flags(*start);
|
||||
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
|
||||
note_page(m, st, prot, 4);
|
||||
note_page(m, st, __pgprot(prot), 4);
|
||||
start++;
|
||||
}
|
||||
}
|
||||
@ -287,18 +299,19 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
|
||||
{
|
||||
int i;
|
||||
pmd_t *start;
|
||||
pgprotval_t prot;
|
||||
|
||||
start = (pmd_t *) pud_page_vaddr(addr);
|
||||
for (i = 0; i < PTRS_PER_PMD; i++) {
|
||||
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
|
||||
if (!pmd_none(*start)) {
|
||||
pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
|
||||
|
||||
if (pmd_large(*start) || !pmd_present(*start))
|
||||
if (pmd_large(*start) || !pmd_present(*start)) {
|
||||
prot = pmd_flags(*start);
|
||||
note_page(m, st, __pgprot(prot), 3);
|
||||
else
|
||||
} else {
|
||||
walk_pte_level(m, st, *start,
|
||||
P + i * PMD_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
note_page(m, st, __pgprot(0), 3);
|
||||
start++;
|
||||
@ -318,19 +331,20 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
||||
{
|
||||
int i;
|
||||
pud_t *start;
|
||||
pgprotval_t prot;
|
||||
|
||||
start = (pud_t *) pgd_page_vaddr(addr);
|
||||
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
|
||||
if (!pud_none(*start)) {
|
||||
pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
|
||||
|
||||
if (pud_large(*start) || !pud_present(*start))
|
||||
if (pud_large(*start) || !pud_present(*start)) {
|
||||
prot = pud_flags(*start);
|
||||
note_page(m, st, __pgprot(prot), 2);
|
||||
else
|
||||
} else {
|
||||
walk_pmd_level(m, st, *start,
|
||||
P + i * PUD_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
note_page(m, st, __pgprot(0), 2);
|
||||
|
||||
@ -344,13 +358,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
|
||||
#define pgd_none(a) pud_none(__pud(pgd_val(a)))
|
||||
#endif
|
||||
|
||||
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
|
||||
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
|
||||
bool checkwx)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
pgd_t *start = (pgd_t *) &init_level4_pgt;
|
||||
#else
|
||||
pgd_t *start = swapper_pg_dir;
|
||||
#endif
|
||||
pgprotval_t prot;
|
||||
int i;
|
||||
struct pg_state st = {};
|
||||
|
||||
@ -359,16 +375,20 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
|
||||
st.to_dmesg = true;
|
||||
}
|
||||
|
||||
st.check_wx = checkwx;
|
||||
if (checkwx)
|
||||
st.wx_pages = 0;
|
||||
|
||||
for (i = 0; i < PTRS_PER_PGD; i++) {
|
||||
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
|
||||
if (!pgd_none(*start)) {
|
||||
pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
|
||||
|
||||
if (pgd_large(*start) || !pgd_present(*start))
|
||||
if (pgd_large(*start) || !pgd_present(*start)) {
|
||||
prot = pgd_flags(*start);
|
||||
note_page(m, &st, __pgprot(prot), 1);
|
||||
else
|
||||
} else {
|
||||
walk_pud_level(m, &st, *start,
|
||||
i * PGD_LEVEL_MULT);
|
||||
}
|
||||
} else
|
||||
note_page(m, &st, __pgprot(0), 1);
|
||||
|
||||
@ -378,8 +398,26 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
|
||||
/* Flush out the last page */
|
||||
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
|
||||
note_page(m, &st, __pgprot(0), 0);
|
||||
if (!checkwx)
|
||||
return;
|
||||
if (st.wx_pages)
|
||||
pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
|
||||
st.wx_pages);
|
||||
else
|
||||
pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
|
||||
}
|
||||
|
||||
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
|
||||
{
|
||||
ptdump_walk_pgd_level_core(m, pgd, false);
|
||||
}
|
||||
|
||||
void ptdump_walk_pgd_level_checkwx(void)
|
||||
{
|
||||
ptdump_walk_pgd_level_core(NULL, NULL, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_PTDUMP
|
||||
static int ptdump_show(struct seq_file *m, void *v)
|
||||
{
|
||||
ptdump_walk_pgd_level(m, NULL);
|
||||
@ -397,10 +435,13 @@ static const struct file_operations ptdump_fops = {
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
#endif
|
||||
|
||||
static int pt_dump_init(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_PTDUMP
|
||||
struct dentry *pe;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Not a compile-time constant on x86-32 */
|
||||
@ -412,10 +453,12 @@ static int pt_dump_init(void)
|
||||
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_PTDUMP
|
||||
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
|
||||
&ptdump_fops);
|
||||
if (!pe)
|
||||
return -ENOMEM;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -118,21 +118,20 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long mask;
|
||||
pte_t pte = *(pte_t *)&pmd;
|
||||
struct page *head, *page;
|
||||
int refs;
|
||||
|
||||
mask = _PAGE_PRESENT|_PAGE_USER;
|
||||
if (write)
|
||||
mask |= _PAGE_RW;
|
||||
if ((pte_flags(pte) & mask) != mask)
|
||||
if ((pmd_flags(pmd) & mask) != mask)
|
||||
return 0;
|
||||
/* hugepages are never "special" */
|
||||
VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
|
||||
VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
|
||||
|
||||
refs = 0;
|
||||
head = pte_page(pte);
|
||||
head = pmd_page(pmd);
|
||||
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON_PAGE(compound_head(page) != head, page);
|
||||
@ -195,21 +194,20 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
|
||||
unsigned long end, int write, struct page **pages, int *nr)
|
||||
{
|
||||
unsigned long mask;
|
||||
pte_t pte = *(pte_t *)&pud;
|
||||
struct page *head, *page;
|
||||
int refs;
|
||||
|
||||
mask = _PAGE_PRESENT|_PAGE_USER;
|
||||
if (write)
|
||||
mask |= _PAGE_RW;
|
||||
if ((pte_flags(pte) & mask) != mask)
|
||||
if ((pud_flags(pud) & mask) != mask)
|
||||
return 0;
|
||||
/* hugepages are never "special" */
|
||||
VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
|
||||
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
|
||||
VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
|
||||
|
||||
refs = 0;
|
||||
head = pte_page(pte);
|
||||
head = pud_page(pud);
|
||||
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
|
||||
do {
|
||||
VM_BUG_ON_PAGE(compound_head(page) != head, page);
|
||||
|
@ -957,6 +957,8 @@ void mark_rodata_ro(void)
|
||||
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
|
||||
#endif
|
||||
mark_nxdata_nx();
|
||||
if (__supported_pte_mask & _PAGE_NX)
|
||||
debug_checkwx();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1150,6 +1150,8 @@ void mark_rodata_ro(void)
|
||||
free_init_pages("unused kernel",
|
||||
(unsigned long) __va(__pa_symbol(rodata_end)),
|
||||
(unsigned long) __va(__pa_symbol(_sdata)));
|
||||
|
||||
debug_checkwx();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -414,18 +414,28 @@ pmd_t *lookup_pmd_address(unsigned long address)
|
||||
phys_addr_t slow_virt_to_phys(void *__virt_addr)
|
||||
{
|
||||
unsigned long virt_addr = (unsigned long)__virt_addr;
|
||||
phys_addr_t phys_addr;
|
||||
unsigned long offset;
|
||||
unsigned long phys_addr, offset;
|
||||
enum pg_level level;
|
||||
unsigned long pmask;
|
||||
pte_t *pte;
|
||||
|
||||
pte = lookup_address(virt_addr, &level);
|
||||
BUG_ON(!pte);
|
||||
pmask = page_level_mask(level);
|
||||
offset = virt_addr & ~pmask;
|
||||
phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
|
||||
return (phys_addr | offset);
|
||||
|
||||
switch (level) {
|
||||
case PG_LEVEL_1G:
|
||||
phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
|
||||
offset = virt_addr & ~PUD_PAGE_MASK;
|
||||
break;
|
||||
case PG_LEVEL_2M:
|
||||
phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
|
||||
offset = virt_addr & ~PMD_PAGE_MASK;
|
||||
break;
|
||||
default:
|
||||
phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
|
||||
offset = virt_addr & ~PAGE_MASK;
|
||||
}
|
||||
|
||||
return (phys_addr_t)(phys_addr | offset);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(slow_virt_to_phys);
|
||||
|
||||
@ -458,7 +468,7 @@ static int
|
||||
try_preserve_large_page(pte_t *kpte, unsigned long address,
|
||||
struct cpa_data *cpa)
|
||||
{
|
||||
unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
|
||||
unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
|
||||
pte_t new_pte, old_pte, *tmp;
|
||||
pgprot_t old_prot, new_prot, req_prot;
|
||||
int i, do_split = 1;
|
||||
@ -478,17 +488,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
|
||||
|
||||
switch (level) {
|
||||
case PG_LEVEL_2M:
|
||||
#ifdef CONFIG_X86_64
|
||||
old_prot = pmd_pgprot(*(pmd_t *)kpte);
|
||||
old_pfn = pmd_pfn(*(pmd_t *)kpte);
|
||||
break;
|
||||
case PG_LEVEL_1G:
|
||||
#endif
|
||||
psize = page_level_size(level);
|
||||
pmask = page_level_mask(level);
|
||||
old_prot = pud_pgprot(*(pud_t *)kpte);
|
||||
old_pfn = pud_pfn(*(pud_t *)kpte);
|
||||
break;
|
||||
default:
|
||||
do_split = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
psize = page_level_size(level);
|
||||
pmask = page_level_mask(level);
|
||||
|
||||
/*
|
||||
* Calculate the number of pages, which fit into this large
|
||||
* page starting at address:
|
||||
@ -504,7 +518,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
|
||||
* up accordingly.
|
||||
*/
|
||||
old_pte = *kpte;
|
||||
old_prot = req_prot = pgprot_large_2_4k(pte_pgprot(old_pte));
|
||||
req_prot = pgprot_large_2_4k(old_prot);
|
||||
|
||||
pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
|
||||
pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
|
||||
@ -530,10 +544,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
|
||||
req_prot = canon_pgprot(req_prot);
|
||||
|
||||
/*
|
||||
* old_pte points to the large page base address. So we need
|
||||
* old_pfn points to the large page base pfn. So we need
|
||||
* to add the offset of the virtual address:
|
||||
*/
|
||||
pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
|
||||
pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
|
||||
cpa->pfn = pfn;
|
||||
|
||||
new_prot = static_protections(req_prot, address, pfn);
|
||||
@ -544,7 +558,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
|
||||
* the pages in the range we try to preserve:
|
||||
*/
|
||||
addr = address & pmask;
|
||||
pfn = pte_pfn(old_pte);
|
||||
pfn = old_pfn;
|
||||
for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
|
||||
pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
|
||||
|
||||
@ -574,7 +588,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
|
||||
* The address is aligned and the number of pages
|
||||
* covers the full page.
|
||||
*/
|
||||
new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
|
||||
new_pte = pfn_pte(old_pfn, new_prot);
|
||||
__set_pmd_pte(kpte, address, new_pte);
|
||||
cpa->flags |= CPA_FLUSHTLB;
|
||||
do_split = 0;
|
||||
@ -591,7 +605,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
|
||||
struct page *base)
|
||||
{
|
||||
pte_t *pbase = (pte_t *)page_address(base);
|
||||
unsigned long pfn, pfninc = 1;
|
||||
unsigned long ref_pfn, pfn, pfninc = 1;
|
||||
unsigned int i, level;
|
||||
pte_t *tmp;
|
||||
pgprot_t ref_prot;
|
||||
@ -608,26 +622,33 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
|
||||
}
|
||||
|
||||
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
|
||||
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
|
||||
|
||||
/* promote PAT bit to correct position */
|
||||
if (level == PG_LEVEL_2M)
|
||||
switch (level) {
|
||||
case PG_LEVEL_2M:
|
||||
ref_prot = pmd_pgprot(*(pmd_t *)kpte);
|
||||
/* clear PSE and promote PAT bit to correct position */
|
||||
ref_prot = pgprot_large_2_4k(ref_prot);
|
||||
ref_pfn = pmd_pfn(*(pmd_t *)kpte);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (level == PG_LEVEL_1G) {
|
||||
case PG_LEVEL_1G:
|
||||
ref_prot = pud_pgprot(*(pud_t *)kpte);
|
||||
ref_pfn = pud_pfn(*(pud_t *)kpte);
|
||||
pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* Set the PSE flags only if the PRESENT flag is set
|
||||
* Clear the PSE flags if the PRESENT flag is not set
|
||||
* otherwise pmd_present/pmd_huge will return true
|
||||
* even on a non present pmd.
|
||||
*/
|
||||
if (pgprot_val(ref_prot) & _PAGE_PRESENT)
|
||||
pgprot_val(ref_prot) |= _PAGE_PSE;
|
||||
else
|
||||
if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
|
||||
pgprot_val(ref_prot) &= ~_PAGE_PSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
spin_unlock(&pgd_lock);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Set the GLOBAL flags only if the PRESENT flag is set
|
||||
@ -643,7 +664,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
|
||||
/*
|
||||
* Get the target pfn from the original entry:
|
||||
*/
|
||||
pfn = pte_pfn(*kpte);
|
||||
pfn = ref_pfn;
|
||||
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
|
||||
set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user