forked from Minki/linux
Merge branch 'for-next/kexec' into for-next/core
Significant steps along the road to leaving the MMU enabled during kexec relocation. * for-next/kexec: arm64: hibernate: add __force attribute to gfp_t casting arm64: kexec: arm64_relocate_new_kernel don't use x0 as temp arm64: kexec: arm64_relocate_new_kernel clean-ups and optimizations arm64: kexec: call kexec_image_info only once arm64: kexec: move relocation function setup arm64: trans_pgd: hibernate: idmap the single page that holds the copy page routines arm64: mm: Always update TCR_EL1 from __cpu_set_tcr_t0sz() arm64: trans_pgd: pass NULL instead of init_mm to *_populate functions arm64: trans_pgd: pass allocator trans_pgd_create_copy arm64: trans_pgd: make trans_pgd_map_page generic arm64: hibernate: move page handling function to new trans_pgd.c arm64: hibernate: variable pudp is used instead of pd4dp arm64: kexec: make dtb_mem always enabled
This commit is contained in:
commit
b374d0f981
@ -1132,6 +1132,10 @@ config CRASH_DUMP
|
||||
|
||||
For more details see Documentation/admin-guide/kdump/kdump.rst
|
||||
|
||||
config TRANS_TABLE
|
||||
def_bool y
|
||||
depends on HIBERNATION
|
||||
|
||||
config XEN_DOM0
|
||||
def_bool y
|
||||
depends on XEN
|
||||
|
@ -90,18 +90,19 @@ static inline void crash_prepare_suspend(void) {}
|
||||
static inline void crash_post_resume(void) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KEXEC_FILE
|
||||
#define ARCH_HAS_KIMAGE_ARCH
|
||||
|
||||
struct kimage_arch {
|
||||
void *dtb;
|
||||
unsigned long dtb_mem;
|
||||
phys_addr_t dtb_mem;
|
||||
phys_addr_t kern_reloc;
|
||||
/* Core ELF header buffer */
|
||||
void *elf_headers;
|
||||
unsigned long elf_headers_mem;
|
||||
unsigned long elf_headers_sz;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_KEXEC_FILE
|
||||
extern const struct kexec_file_ops kexec_image_ops;
|
||||
|
||||
struct kimage;
|
||||
|
@ -81,16 +81,15 @@ static inline bool __cpu_uses_extended_idmap_level(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* Set TCR.T0SZ to its default value (based on VA_BITS)
|
||||
* Ensure TCR.T0SZ is set to the provided value.
|
||||
*/
|
||||
static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
|
||||
{
|
||||
unsigned long tcr;
|
||||
unsigned long tcr = read_sysreg(tcr_el1);
|
||||
|
||||
if (!__cpu_uses_extended_idmap())
|
||||
if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz)
|
||||
return;
|
||||
|
||||
tcr = read_sysreg(tcr_el1);
|
||||
tcr &= ~TCR_T0SZ_MASK;
|
||||
tcr |= t0sz << TCR_T0SZ_OFFSET;
|
||||
write_sysreg(tcr, tcr_el1);
|
||||
|
39
arch/arm64/include/asm/trans_pgd.h
Normal file
39
arch/arm64/include/asm/trans_pgd.h
Normal file
@ -0,0 +1,39 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2020, Microsoft Corporation.
|
||||
* Pavel Tatashin <pasha.tatashin@soleen.com>
|
||||
*/
|
||||
|
||||
#ifndef _ASM_TRANS_TABLE_H
|
||||
#define _ASM_TRANS_TABLE_H
|
||||
|
||||
#include <linux/bits.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/pgtable-types.h>
|
||||
|
||||
/*
|
||||
* trans_alloc_page
|
||||
* - Allocator that should return exactly one zeroed page, if this
|
||||
* allocator fails, trans_pgd_create_copy() and trans_pgd_map_page()
|
||||
* return -ENOMEM error.
|
||||
*
|
||||
* trans_alloc_arg
|
||||
* - Passed to trans_alloc_page as an argument
|
||||
*/
|
||||
|
||||
struct trans_pgd_info {
|
||||
void * (*trans_alloc_page)(void *arg);
|
||||
void *trans_alloc_arg;
|
||||
};
|
||||
|
||||
int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd,
|
||||
unsigned long start, unsigned long end);
|
||||
|
||||
int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
|
||||
void *page, unsigned long dst_addr, pgprot_t pgprot);
|
||||
|
||||
int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
|
||||
unsigned long *t0sz, void *page);
|
||||
|
||||
#endif /* _ASM_TRANS_TABLE_H */
|
@ -16,7 +16,6 @@
|
||||
#define pr_fmt(x) "hibernate: " x
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/suspend.h>
|
||||
@ -31,13 +30,12 @@
|
||||
#include <asm/memory.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/mte.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pgtable-hwdef.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/smp_plat.h>
|
||||
#include <asm/suspend.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/trans_pgd.h>
|
||||
#include <asm/virt.h>
|
||||
|
||||
/*
|
||||
@ -178,52 +176,9 @@ int arch_hibernation_header_restore(void *addr)
|
||||
}
|
||||
EXPORT_SYMBOL(arch_hibernation_header_restore);
|
||||
|
||||
static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
|
||||
unsigned long dst_addr,
|
||||
pgprot_t pgprot)
|
||||
static void *hibernate_page_alloc(void *arg)
|
||||
{
|
||||
pgd_t *pgdp;
|
||||
p4d_t *p4dp;
|
||||
pud_t *pudp;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
|
||||
pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
|
||||
if (pgd_none(READ_ONCE(*pgdp))) {
|
||||
pudp = (void *)get_safe_page(GFP_ATOMIC);
|
||||
if (!pudp)
|
||||
return -ENOMEM;
|
||||
pgd_populate(&init_mm, pgdp, pudp);
|
||||
}
|
||||
|
||||
p4dp = p4d_offset(pgdp, dst_addr);
|
||||
if (p4d_none(READ_ONCE(*p4dp))) {
|
||||
pudp = (void *)get_safe_page(GFP_ATOMIC);
|
||||
if (!pudp)
|
||||
return -ENOMEM;
|
||||
p4d_populate(&init_mm, p4dp, pudp);
|
||||
}
|
||||
|
||||
pudp = pud_offset(p4dp, dst_addr);
|
||||
if (pud_none(READ_ONCE(*pudp))) {
|
||||
pmdp = (void *)get_safe_page(GFP_ATOMIC);
|
||||
if (!pmdp)
|
||||
return -ENOMEM;
|
||||
pud_populate(&init_mm, pudp, pmdp);
|
||||
}
|
||||
|
||||
pmdp = pmd_offset(pudp, dst_addr);
|
||||
if (pmd_none(READ_ONCE(*pmdp))) {
|
||||
ptep = (void *)get_safe_page(GFP_ATOMIC);
|
||||
if (!ptep)
|
||||
return -ENOMEM;
|
||||
pmd_populate_kernel(&init_mm, pmdp, ptep);
|
||||
}
|
||||
|
||||
ptep = pte_offset_kernel(pmdp, dst_addr);
|
||||
set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
|
||||
|
||||
return 0;
|
||||
return (void *)get_safe_page((__force gfp_t)(unsigned long)arg);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -239,11 +194,16 @@ static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
|
||||
* page system.
|
||||
*/
|
||||
static int create_safe_exec_page(void *src_start, size_t length,
|
||||
unsigned long dst_addr,
|
||||
phys_addr_t *phys_dst_addr)
|
||||
{
|
||||
struct trans_pgd_info trans_info = {
|
||||
.trans_alloc_page = hibernate_page_alloc,
|
||||
.trans_alloc_arg = (__force void *)GFP_ATOMIC,
|
||||
};
|
||||
|
||||
void *page = (void *)get_safe_page(GFP_ATOMIC);
|
||||
pgd_t *trans_pgd;
|
||||
phys_addr_t trans_ttbr0;
|
||||
unsigned long t0sz;
|
||||
int rc;
|
||||
|
||||
if (!page)
|
||||
@ -251,13 +211,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
|
||||
|
||||
memcpy(page, src_start, length);
|
||||
__flush_icache_range((unsigned long)page, (unsigned long)page + length);
|
||||
|
||||
trans_pgd = (void *)get_safe_page(GFP_ATOMIC);
|
||||
if (!trans_pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = trans_pgd_map_page(trans_pgd, page, dst_addr,
|
||||
PAGE_KERNEL_EXEC);
|
||||
rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@ -270,12 +224,15 @@ static int create_safe_exec_page(void *src_start, size_t length,
|
||||
* page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
|
||||
* runtime services), while for a userspace-driven test_resume cycle it
|
||||
* points to userspace page tables (and we must point it at a zero page
|
||||
* ourselves). Elsewhere we only (un)install the idmap with preemption
|
||||
* disabled, so T0SZ should be as required regardless.
|
||||
* ourselves).
|
||||
*
|
||||
* We change T0SZ as part of installing the idmap. This is undone by
|
||||
* cpu_uninstall_idmap() in __cpu_suspend_exit().
|
||||
*/
|
||||
cpu_set_reserved_ttbr0();
|
||||
local_flush_tlb_all();
|
||||
write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1);
|
||||
__cpu_set_tcr_t0sz(t0sz);
|
||||
write_sysreg(trans_ttbr0, ttbr0_el1);
|
||||
isb();
|
||||
|
||||
*phys_dst_addr = virt_to_phys(page);
|
||||
@ -462,182 +419,6 @@ int swsusp_arch_suspend(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
|
||||
{
|
||||
pte_t pte = READ_ONCE(*src_ptep);
|
||||
|
||||
if (pte_valid(pte)) {
|
||||
/*
|
||||
* Resume will overwrite areas that may be marked
|
||||
* read only (code, rodata). Clear the RDONLY bit from
|
||||
* the temporary mappings we use during restore.
|
||||
*/
|
||||
set_pte(dst_ptep, pte_mkwrite(pte));
|
||||
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
|
||||
/*
|
||||
* debug_pagealloc will removed the PTE_VALID bit if
|
||||
* the page isn't in use by the resume kernel. It may have
|
||||
* been in use by the original kernel, in which case we need
|
||||
* to put it back in our copy to do the restore.
|
||||
*
|
||||
* Before marking this entry valid, check the pfn should
|
||||
* be mapped.
|
||||
*/
|
||||
BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
||||
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
|
||||
}
|
||||
}
|
||||
|
||||
static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pte_t *src_ptep;
|
||||
pte_t *dst_ptep;
|
||||
unsigned long addr = start;
|
||||
|
||||
dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!dst_ptep)
|
||||
return -ENOMEM;
|
||||
pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
|
||||
dst_ptep = pte_offset_kernel(dst_pmdp, start);
|
||||
|
||||
src_ptep = pte_offset_kernel(src_pmdp, start);
|
||||
do {
|
||||
_copy_pte(dst_ptep, src_ptep, addr);
|
||||
} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pmd_t *src_pmdp;
|
||||
pmd_t *dst_pmdp;
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
|
||||
if (pud_none(READ_ONCE(*dst_pudp))) {
|
||||
dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!dst_pmdp)
|
||||
return -ENOMEM;
|
||||
pud_populate(&init_mm, dst_pudp, dst_pmdp);
|
||||
}
|
||||
dst_pmdp = pmd_offset(dst_pudp, start);
|
||||
|
||||
src_pmdp = pmd_offset(src_pudp, start);
|
||||
do {
|
||||
pmd_t pmd = READ_ONCE(*src_pmdp);
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_none(pmd))
|
||||
continue;
|
||||
if (pmd_table(pmd)) {
|
||||
if (copy_pte(dst_pmdp, src_pmdp, addr, next))
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
set_pmd(dst_pmdp,
|
||||
__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
|
||||
}
|
||||
} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pud_t *dst_pudp;
|
||||
pud_t *src_pudp;
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
|
||||
if (p4d_none(READ_ONCE(*dst_p4dp))) {
|
||||
dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
|
||||
if (!dst_pudp)
|
||||
return -ENOMEM;
|
||||
p4d_populate(&init_mm, dst_p4dp, dst_pudp);
|
||||
}
|
||||
dst_pudp = pud_offset(dst_p4dp, start);
|
||||
|
||||
src_pudp = pud_offset(src_p4dp, start);
|
||||
do {
|
||||
pud_t pud = READ_ONCE(*src_pudp);
|
||||
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(pud))
|
||||
continue;
|
||||
if (pud_table(pud)) {
|
||||
if (copy_pmd(dst_pudp, src_pudp, addr, next))
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
set_pud(dst_pudp,
|
||||
__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
|
||||
}
|
||||
} while (dst_pudp++, src_pudp++, addr = next, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
p4d_t *dst_p4dp;
|
||||
p4d_t *src_p4dp;
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
|
||||
dst_p4dp = p4d_offset(dst_pgdp, start);
|
||||
src_p4dp = p4d_offset(src_pgdp, start);
|
||||
do {
|
||||
next = p4d_addr_end(addr, end);
|
||||
if (p4d_none(READ_ONCE(*src_p4dp)))
|
||||
continue;
|
||||
if (copy_pud(dst_p4dp, src_p4dp, addr, next))
|
||||
return -ENOMEM;
|
||||
} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
pgd_t *src_pgdp = pgd_offset_k(start);
|
||||
|
||||
dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(READ_ONCE(*src_pgdp)))
|
||||
continue;
|
||||
if (copy_p4d(dst_pgdp, src_pgdp, addr, next))
|
||||
return -ENOMEM;
|
||||
} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
int rc;
|
||||
pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
|
||||
|
||||
if (!trans_pgd) {
|
||||
pr_err("Failed to allocate memory for temporary page tables.\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rc = copy_page_tables(trans_pgd, start, end);
|
||||
if (!rc)
|
||||
*dst_pgdp = trans_pgd;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
|
||||
*
|
||||
@ -650,16 +431,20 @@ int swsusp_arch_resume(void)
|
||||
void *zero_page;
|
||||
size_t exit_size;
|
||||
pgd_t *tmp_pg_dir;
|
||||
phys_addr_t phys_hibernate_exit;
|
||||
void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
|
||||
void *, phys_addr_t, phys_addr_t);
|
||||
struct trans_pgd_info trans_info = {
|
||||
.trans_alloc_page = hibernate_page_alloc,
|
||||
.trans_alloc_arg = (void *)GFP_ATOMIC,
|
||||
};
|
||||
|
||||
/*
|
||||
* Restoring the memory image will overwrite the ttbr1 page tables.
|
||||
* Create a second copy of just the linear map, and use this when
|
||||
* restoring.
|
||||
*/
|
||||
rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END);
|
||||
rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET,
|
||||
PAGE_END);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@ -673,19 +458,13 @@ int swsusp_arch_resume(void)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Locate the exit code in the bottom-but-one page, so that *NULL
|
||||
* still has disastrous affects.
|
||||
*/
|
||||
hibernate_exit = (void *)PAGE_SIZE;
|
||||
exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
|
||||
/*
|
||||
* Copy swsusp_arch_suspend_exit() to a safe page. This will generate
|
||||
* a new set of ttbr0 page tables and load them.
|
||||
*/
|
||||
rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
|
||||
(unsigned long)hibernate_exit,
|
||||
&phys_hibernate_exit);
|
||||
(phys_addr_t *)&hibernate_exit);
|
||||
if (rc) {
|
||||
pr_err("Failed to create safe executable page for hibernate_exit code.\n");
|
||||
return rc;
|
||||
@ -704,7 +483,7 @@ int swsusp_arch_resume(void)
|
||||
* We can skip this step if we booted at EL1, or are running with VHE.
|
||||
*/
|
||||
if (el2_reset_needed()) {
|
||||
phys_addr_t el2_vectors = phys_hibernate_exit; /* base */
|
||||
phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit;
|
||||
el2_vectors += hibernate_el2_vectors -
|
||||
__hibernate_exit_text_start; /* offset */
|
||||
|
||||
|
@ -42,6 +42,7 @@ static void _kexec_image_info(const char *func, int line,
|
||||
pr_debug(" start: %lx\n", kimage->start);
|
||||
pr_debug(" head: %lx\n", kimage->head);
|
||||
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
|
||||
pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
|
||||
|
||||
for (i = 0; i < kimage->nr_segments; i++) {
|
||||
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
|
||||
@ -58,6 +59,23 @@ void machine_kexec_cleanup(struct kimage *kimage)
|
||||
/* Empty routine needed to avoid build errors. */
|
||||
}
|
||||
|
||||
int machine_kexec_post_load(struct kimage *kimage)
|
||||
{
|
||||
void *reloc_code = page_to_virt(kimage->control_code_page);
|
||||
|
||||
memcpy(reloc_code, arm64_relocate_new_kernel,
|
||||
arm64_relocate_new_kernel_size);
|
||||
kimage->arch.kern_reloc = __pa(reloc_code);
|
||||
kexec_image_info(kimage);
|
||||
|
||||
/* Flush the reloc_code in preparation for its execution. */
|
||||
__flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
|
||||
flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
|
||||
arm64_relocate_new_kernel_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* machine_kexec_prepare - Prepare for a kexec reboot.
|
||||
*
|
||||
@ -67,8 +85,6 @@ void machine_kexec_cleanup(struct kimage *kimage)
|
||||
*/
|
||||
int machine_kexec_prepare(struct kimage *kimage)
|
||||
{
|
||||
kexec_image_info(kimage);
|
||||
|
||||
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
|
||||
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
|
||||
return -EBUSY;
|
||||
@ -143,8 +159,6 @@ static void kexec_segment_flush(const struct kimage *kimage)
|
||||
*/
|
||||
void machine_kexec(struct kimage *kimage)
|
||||
{
|
||||
phys_addr_t reboot_code_buffer_phys;
|
||||
void *reboot_code_buffer;
|
||||
bool in_kexec_crash = (kimage == kexec_crash_image);
|
||||
bool stuck_cpus = cpus_are_stuck_in_kernel();
|
||||
|
||||
@ -155,31 +169,6 @@ void machine_kexec(struct kimage *kimage)
|
||||
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
|
||||
"Some CPUs may be stale, kdump will be unreliable.\n");
|
||||
|
||||
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
|
||||
reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
|
||||
|
||||
kexec_image_info(kimage);
|
||||
|
||||
/*
|
||||
* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
|
||||
* after the kernel is shut down.
|
||||
*/
|
||||
memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
|
||||
arm64_relocate_new_kernel_size);
|
||||
|
||||
/* Flush the reboot_code_buffer in preparation for its execution. */
|
||||
__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
|
||||
|
||||
/*
|
||||
* Although we've killed off the secondary CPUs, we don't update
|
||||
* the online mask if we're handling a crash kernel and consequently
|
||||
* need to avoid flush_icache_range(), which will attempt to IPI
|
||||
* the offline CPUs. Therefore, we must use the __* variant here.
|
||||
*/
|
||||
__flush_icache_range((uintptr_t)reboot_code_buffer,
|
||||
(uintptr_t)reboot_code_buffer +
|
||||
arm64_relocate_new_kernel_size);
|
||||
|
||||
/* Flush the kimage list and its buffers. */
|
||||
kexec_list_flush(kimage);
|
||||
|
||||
@ -193,7 +182,7 @@ void machine_kexec(struct kimage *kimage)
|
||||
|
||||
/*
|
||||
* cpu_soft_restart will shutdown the MMU, disable data caches, then
|
||||
* transfer control to the reboot_code_buffer which contains a copy of
|
||||
* transfer control to the kern_reloc which contains a copy of
|
||||
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
|
||||
* uses physical addressing to relocate the new image to its final
|
||||
* position and transfers control to the image entry point when the
|
||||
@ -203,12 +192,8 @@ void machine_kexec(struct kimage *kimage)
|
||||
* userspace (kexec-tools).
|
||||
* In kexec_file case, the kernel starts directly without purgatory.
|
||||
*/
|
||||
cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start,
|
||||
#ifdef CONFIG_KEXEC_FILE
|
||||
kimage->arch.dtb_mem);
|
||||
#else
|
||||
0);
|
||||
#endif
|
||||
cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start,
|
||||
kimage->arch.dtb_mem);
|
||||
|
||||
BUG(); /* Should never get here. */
|
||||
}
|
||||
|
@ -17,28 +17,24 @@
|
||||
/*
|
||||
* arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
|
||||
*
|
||||
* The memory that the old kernel occupies may be overwritten when coping the
|
||||
* The memory that the old kernel occupies may be overwritten when copying the
|
||||
* new image to its final location. To assure that the
|
||||
* arm64_relocate_new_kernel routine which does that copy is not overwritten,
|
||||
* all code and data needed by arm64_relocate_new_kernel must be between the
|
||||
* symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
|
||||
* machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
|
||||
* control_code_page, a special page which has been set up to be preserved
|
||||
* during the copy operation.
|
||||
* safe memory that has been set up to be preserved during the copy operation.
|
||||
*/
|
||||
SYM_CODE_START(arm64_relocate_new_kernel)
|
||||
|
||||
/* Setup the list loop variables. */
|
||||
mov x18, x2 /* x18 = dtb address */
|
||||
mov x17, x1 /* x17 = kimage_start */
|
||||
mov x16, x0 /* x16 = kimage_head */
|
||||
raw_dcache_line_size x15, x0 /* x15 = dcache line size */
|
||||
mov x14, xzr /* x14 = entry ptr */
|
||||
mov x13, xzr /* x13 = copy dest */
|
||||
|
||||
/* Check if the new image needs relocation. */
|
||||
tbnz x16, IND_DONE_BIT, .Ldone
|
||||
|
||||
raw_dcache_line_size x15, x1 /* x15 = dcache line size */
|
||||
.Lloop:
|
||||
and x12, x16, PAGE_MASK /* x12 = addr */
|
||||
|
||||
@ -47,44 +43,28 @@ SYM_CODE_START(arm64_relocate_new_kernel)
|
||||
tbz x16, IND_SOURCE_BIT, .Ltest_indirection
|
||||
|
||||
/* Invalidate dest page to PoC. */
|
||||
mov x0, x13
|
||||
add x20, x0, #PAGE_SIZE
|
||||
mov x2, x13
|
||||
add x20, x2, #PAGE_SIZE
|
||||
sub x1, x15, #1
|
||||
bic x0, x0, x1
|
||||
2: dc ivac, x0
|
||||
add x0, x0, x15
|
||||
cmp x0, x20
|
||||
bic x2, x2, x1
|
||||
2: dc ivac, x2
|
||||
add x2, x2, x15
|
||||
cmp x2, x20
|
||||
b.lo 2b
|
||||
dsb sy
|
||||
|
||||
mov x20, x13
|
||||
mov x21, x12
|
||||
copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
|
||||
|
||||
/* dest += PAGE_SIZE */
|
||||
add x13, x13, PAGE_SIZE
|
||||
copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
|
||||
b .Lnext
|
||||
|
||||
.Ltest_indirection:
|
||||
tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
|
||||
|
||||
/* ptr = addr */
|
||||
mov x14, x12
|
||||
mov x14, x12 /* ptr = addr */
|
||||
b .Lnext
|
||||
|
||||
.Ltest_destination:
|
||||
tbz x16, IND_DESTINATION_BIT, .Lnext
|
||||
|
||||
/* dest = addr */
|
||||
mov x13, x12
|
||||
|
||||
mov x13, x12 /* dest = addr */
|
||||
.Lnext:
|
||||
/* entry = *ptr++ */
|
||||
ldr x16, [x14], #8
|
||||
|
||||
/* while (!(entry & DONE)) */
|
||||
tbz x16, IND_DONE_BIT, .Lloop
|
||||
|
||||
ldr x16, [x14], #8 /* entry = *ptr++ */
|
||||
tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */
|
||||
.Ldone:
|
||||
/* wait for writes from copy_page to finish */
|
||||
dsb nsh
|
||||
|
@ -6,6 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
|
||||
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
|
||||
obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
|
||||
obj-$(CONFIG_NUMA) += numa.o
|
||||
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
|
||||
obj-$(CONFIG_ARM64_MTE) += mteswap.o
|
||||
|
324
arch/arm64/mm/trans_pgd.c
Normal file
324
arch/arm64/mm/trans_pgd.c
Normal file
@ -0,0 +1,324 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Transitional page tables for kexec and hibernate
|
||||
*
|
||||
* This file derived from: arch/arm64/kernel/hibernate.c
|
||||
*
|
||||
* Copyright (c) 2020, Microsoft Corporation.
|
||||
* Pavel Tatashin <pasha.tatashin@soleen.com>
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Transitional tables are used during system transferring from one world to
|
||||
* another: such as during hibernate restore, and kexec reboots. During these
|
||||
* phases one cannot rely on page table not being overwritten. This is because
|
||||
* hibernate and kexec can overwrite the current page tables during transition.
|
||||
*/
|
||||
|
||||
#include <asm/trans_pgd.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mmzone.h>
|
||||
|
||||
static void *trans_alloc(struct trans_pgd_info *info)
|
||||
{
|
||||
return info->trans_alloc_page(info->trans_alloc_arg);
|
||||
}
|
||||
|
||||
static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
|
||||
{
|
||||
pte_t pte = READ_ONCE(*src_ptep);
|
||||
|
||||
if (pte_valid(pte)) {
|
||||
/*
|
||||
* Resume will overwrite areas that may be marked
|
||||
* read only (code, rodata). Clear the RDONLY bit from
|
||||
* the temporary mappings we use during restore.
|
||||
*/
|
||||
set_pte(dst_ptep, pte_mkwrite(pte));
|
||||
} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
|
||||
/*
|
||||
* debug_pagealloc will removed the PTE_VALID bit if
|
||||
* the page isn't in use by the resume kernel. It may have
|
||||
* been in use by the original kernel, in which case we need
|
||||
* to put it back in our copy to do the restore.
|
||||
*
|
||||
* Before marking this entry valid, check the pfn should
|
||||
* be mapped.
|
||||
*/
|
||||
BUG_ON(!pfn_valid(pte_pfn(pte)));
|
||||
|
||||
set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
|
||||
}
|
||||
}
|
||||
|
||||
static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp,
|
||||
pmd_t *src_pmdp, unsigned long start, unsigned long end)
|
||||
{
|
||||
pte_t *src_ptep;
|
||||
pte_t *dst_ptep;
|
||||
unsigned long addr = start;
|
||||
|
||||
dst_ptep = trans_alloc(info);
|
||||
if (!dst_ptep)
|
||||
return -ENOMEM;
|
||||
pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
|
||||
dst_ptep = pte_offset_kernel(dst_pmdp, start);
|
||||
|
||||
src_ptep = pte_offset_kernel(src_pmdp, start);
|
||||
do {
|
||||
_copy_pte(dst_ptep, src_ptep, addr);
|
||||
} while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp,
|
||||
pud_t *src_pudp, unsigned long start, unsigned long end)
|
||||
{
|
||||
pmd_t *src_pmdp;
|
||||
pmd_t *dst_pmdp;
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
|
||||
if (pud_none(READ_ONCE(*dst_pudp))) {
|
||||
dst_pmdp = trans_alloc(info);
|
||||
if (!dst_pmdp)
|
||||
return -ENOMEM;
|
||||
pud_populate(NULL, dst_pudp, dst_pmdp);
|
||||
}
|
||||
dst_pmdp = pmd_offset(dst_pudp, start);
|
||||
|
||||
src_pmdp = pmd_offset(src_pudp, start);
|
||||
do {
|
||||
pmd_t pmd = READ_ONCE(*src_pmdp);
|
||||
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_none(pmd))
|
||||
continue;
|
||||
if (pmd_table(pmd)) {
|
||||
if (copy_pte(info, dst_pmdp, src_pmdp, addr, next))
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
set_pmd(dst_pmdp,
|
||||
__pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
|
||||
}
|
||||
} while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp,
|
||||
p4d_t *src_p4dp, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pud_t *dst_pudp;
|
||||
pud_t *src_pudp;
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
|
||||
if (p4d_none(READ_ONCE(*dst_p4dp))) {
|
||||
dst_pudp = trans_alloc(info);
|
||||
if (!dst_pudp)
|
||||
return -ENOMEM;
|
||||
p4d_populate(NULL, dst_p4dp, dst_pudp);
|
||||
}
|
||||
dst_pudp = pud_offset(dst_p4dp, start);
|
||||
|
||||
src_pudp = pud_offset(src_p4dp, start);
|
||||
do {
|
||||
pud_t pud = READ_ONCE(*src_pudp);
|
||||
|
||||
next = pud_addr_end(addr, end);
|
||||
if (pud_none(pud))
|
||||
continue;
|
||||
if (pud_table(pud)) {
|
||||
if (copy_pmd(info, dst_pudp, src_pudp, addr, next))
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
set_pud(dst_pudp,
|
||||
__pud(pud_val(pud) & ~PUD_SECT_RDONLY));
|
||||
}
|
||||
} while (dst_pudp++, src_pudp++, addr = next, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp,
|
||||
pgd_t *src_pgdp, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
p4d_t *dst_p4dp;
|
||||
p4d_t *src_p4dp;
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
|
||||
dst_p4dp = p4d_offset(dst_pgdp, start);
|
||||
src_p4dp = p4d_offset(src_pgdp, start);
|
||||
do {
|
||||
next = p4d_addr_end(addr, end);
|
||||
if (p4d_none(READ_ONCE(*src_p4dp)))
|
||||
continue;
|
||||
if (copy_pud(info, dst_p4dp, src_p4dp, addr, next))
|
||||
return -ENOMEM;
|
||||
} while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long next;
|
||||
unsigned long addr = start;
|
||||
pgd_t *src_pgdp = pgd_offset_k(start);
|
||||
|
||||
dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
if (pgd_none(READ_ONCE(*src_pgdp)))
|
||||
continue;
|
||||
if (copy_p4d(info, dst_pgdp, src_pgdp, addr, next))
|
||||
return -ENOMEM;
|
||||
} while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create trans_pgd and copy linear map.
|
||||
* info: contains allocator and its argument
|
||||
* dst_pgdp: new page table that is created, and to which map is copied.
|
||||
* start: Start of the interval (inclusive).
|
||||
* end: End of the interval (exclusive).
|
||||
*
|
||||
* Returns 0 on success, and -ENOMEM on failure.
|
||||
*/
|
||||
int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
int rc;
|
||||
pgd_t *trans_pgd = trans_alloc(info);
|
||||
|
||||
if (!trans_pgd) {
|
||||
pr_err("Failed to allocate memory for temporary page tables.\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rc = copy_page_tables(info, trans_pgd, start, end);
|
||||
if (!rc)
|
||||
*dst_pgdp = trans_pgd;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add map entry to trans_pgd for a base-size page at PTE level.
|
||||
* info: contains allocator and its argument
|
||||
* trans_pgd: page table in which new map is added.
|
||||
* page: page to be mapped.
|
||||
* dst_addr: new VA address for the page
|
||||
* pgprot: protection for the page.
|
||||
*
|
||||
* Returns 0 on success, and -ENOMEM on failure.
|
||||
*/
|
||||
int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd,
|
||||
void *page, unsigned long dst_addr, pgprot_t pgprot)
|
||||
{
|
||||
pgd_t *pgdp;
|
||||
p4d_t *p4dp;
|
||||
pud_t *pudp;
|
||||
pmd_t *pmdp;
|
||||
pte_t *ptep;
|
||||
|
||||
pgdp = pgd_offset_pgd(trans_pgd, dst_addr);
|
||||
if (pgd_none(READ_ONCE(*pgdp))) {
|
||||
p4dp = trans_alloc(info);
|
||||
if (!pgdp)
|
||||
return -ENOMEM;
|
||||
pgd_populate(NULL, pgdp, p4dp);
|
||||
}
|
||||
|
||||
p4dp = p4d_offset(pgdp, dst_addr);
|
||||
if (p4d_none(READ_ONCE(*p4dp))) {
|
||||
pudp = trans_alloc(info);
|
||||
if (!pudp)
|
||||
return -ENOMEM;
|
||||
p4d_populate(NULL, p4dp, pudp);
|
||||
}
|
||||
|
||||
pudp = pud_offset(p4dp, dst_addr);
|
||||
if (pud_none(READ_ONCE(*pudp))) {
|
||||
pmdp = trans_alloc(info);
|
||||
if (!pmdp)
|
||||
return -ENOMEM;
|
||||
pud_populate(NULL, pudp, pmdp);
|
||||
}
|
||||
|
||||
pmdp = pmd_offset(pudp, dst_addr);
|
||||
if (pmd_none(READ_ONCE(*pmdp))) {
|
||||
ptep = trans_alloc(info);
|
||||
if (!ptep)
|
||||
return -ENOMEM;
|
||||
pmd_populate_kernel(NULL, pmdp, ptep);
|
||||
}
|
||||
|
||||
ptep = pte_offset_kernel(pmdp, dst_addr);
|
||||
set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The page we want to idmap may be outside the range covered by VA_BITS that
|
||||
* can be built using the kernel's p?d_populate() helpers. As a one off, for a
|
||||
* single page, we build these page tables bottom up and just assume that will
|
||||
* need the maximum T0SZ.
|
||||
*
|
||||
* Returns 0 on success, and -ENOMEM on failure.
|
||||
* On success trans_ttbr0 contains page table with idmapped page, t0sz is set to
|
||||
* maximum T0SZ for this page.
|
||||
*/
|
||||
int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
|
||||
unsigned long *t0sz, void *page)
|
||||
{
|
||||
phys_addr_t dst_addr = virt_to_phys(page);
|
||||
unsigned long pfn = __phys_to_pfn(dst_addr);
|
||||
int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47;
|
||||
int bits_mapped = PAGE_SHIFT - 4;
|
||||
unsigned long level_mask, prev_level_entry, *levels[4];
|
||||
int this_level, index, level_lsb, level_msb;
|
||||
|
||||
dst_addr &= PAGE_MASK;
|
||||
prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_EXEC));
|
||||
|
||||
for (this_level = 3; this_level >= 0; this_level--) {
|
||||
levels[this_level] = trans_alloc(info);
|
||||
if (!levels[this_level])
|
||||
return -ENOMEM;
|
||||
|
||||
level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level);
|
||||
level_msb = min(level_lsb + bits_mapped, max_msb);
|
||||
level_mask = GENMASK_ULL(level_msb, level_lsb);
|
||||
|
||||
index = (dst_addr & level_mask) >> level_lsb;
|
||||
*(levels[this_level] + index) = prev_level_entry;
|
||||
|
||||
pfn = virt_to_pfn(levels[this_level]);
|
||||
prev_level_entry = pte_val(pfn_pte(pfn,
|
||||
__pgprot(PMD_TYPE_TABLE)));
|
||||
|
||||
if (level_msb == max_msb)
|
||||
break;
|
||||
}
|
||||
|
||||
*trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn));
|
||||
*t0sz = TCR_T0SZ(max_msb + 1);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user