b398123bff
On arm64, during kdump kernel saves vmcore, it runs into the following bug: ... [ 15.148919] usercopy: Kernel memory exposure attempt detected from SLUB object 'kmem_cache_node' (offset 0, size 4096)! [ 15.159707] ------------[ cut here ]------------ [ 15.164311] kernel BUG at mm/usercopy.c:99! [ 15.168482] Internal error: Oops - BUG: 0 [#1] SMP [ 15.173261] Modules linked in: xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce sbsa_gwdt ast i2c_algo_bit drm_vram_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec drm_ttm_helper ttm drm nvme nvme_core xgene_hwmon i2c_designware_platform i2c_designware_core dm_mirror dm_region_hash dm_log dm_mod overlay squashfs zstd_decompress loop [ 15.206186] CPU: 0 PID: 542 Comm: cp Not tainted 5.16.0-rc4 #1 [ 15.212006] Hardware name: GIGABYTE R272-P30-JG/MP32-AR0-JG, BIOS F12 (SCP: 1.5.20210426) 05/13/2021 [ 15.221125] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 15.228073] pc : usercopy_abort+0x9c/0xa0 [ 15.232074] lr : usercopy_abort+0x9c/0xa0 [ 15.236070] sp : ffff8000121abba0 [ 15.239371] x29: ffff8000121abbb0 x28: 0000000000003000 x27: 0000000000000000 [ 15.246494] x26: 0000000080000400 x25: 0000ffff885c7000 x24: 0000000000000000 [ 15.253617] x23: 000007ff80400000 x22: ffff07ff80401000 x21: 0000000000000001 [ 15.260739] x20: 0000000000001000 x19: ffff07ff80400000 x18: ffffffffffffffff [ 15.267861] x17: 656a626f2042554c x16: 53206d6f72662064 x15: 6574636574656420 [ 15.274983] x14: 74706d6574746120 x13: 2129363930342065 x12: 7a6973202c302074 [ 15.282105] x11: ffffc8b041d1b148 x10: 00000000ffff8000 x9 : ffffc8b04012812c [ 15.289228] x8 : 00000000ffff7fff x7 : ffffc8b041d1b148 x6 : 0000000000000000 [ 15.296349] x5 : 0000000000000000 x4 : 0000000000007fff x3 : 0000000000000000 [ 15.303471] x2 : 0000000000000000 x1 : ffff07ff8c064800 x0 : 000000000000006b [ 15.310593] Call trace: [ 15.313027] usercopy_abort+0x9c/0xa0 [ 15.316677] __check_heap_object+0xd4/0xf0 [ 15.320762] __check_object_size.part.0+0x160/0x1e0 [ 15.325628] __check_object_size+0x2c/0x40 [ 15.329711] copy_oldmem_page+0x7c/0x140 [ 15.333623] read_from_oldmem.part.0+0xfc/0x1c0 [ 15.338142] __read_vmcore.constprop.0+0x23c/0x350 [ 15.342920] read_vmcore+0x28/0x34 [ 15.346309] proc_reg_read+0xb4/0xf0 [ 15.349871] vfs_read+0xb8/0x1f0 [ 15.353088] ksys_read+0x74/0x100 [ 15.356390] __arm64_sys_read+0x28/0x34 ... This bug introduced by commitb261dba2fd
("arm64: kdump: Remove custom linux,usable-memory-range handling"), which moves memblock_cap_memory_range() to fdt, but it breaches the rules that memblock_cap_memory_range() should come after memblock_add() etc as said in commite888fa7bb8
("memblock: Check memory add/cap ordering"). As a consequence, the virtual address set up by copy_oldmem_page() does not bail out from the test of virt_addr_valid() in check_heap_object(), and finally hits the BUG_ON(). Since memblock allocator has no idea about when the memblock is fully populated, while efi_init() is aware, so tackling this issue by calling the interface early_init_dt_check_for_usable_mem_range() exposed by of/fdt. Fixes:b261dba2fd
("arm64: kdump: Remove custom linux,usable-memory-range handling") Signed-off-by: Pingfan Liu <kernelfans@gmail.com> Cc: Rob Herring <robh+dt@kernel.org> Cc: Zhen Lei <thunder.leizhen@huawei.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Mike Rapoport <rppt@kernel.org> Cc: Geert Uytterhoeven <geert+renesas@glider.be> Cc: Frank Rowand <frowand.list@gmail.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Nick Terrell <terrelln@fb.com> Cc: linux-arm-kernel@lists.infradead.org To: devicetree@vger.kernel.org To: linux-efi@vger.kernel.org Acked-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Rob Herring <robh@kernel.org> Link: https://lore.kernel.org/r/20211215021348.8766-1-kernelfans@gmail.com
283 lines
7.5 KiB
C
283 lines
7.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Extensible Firmware Interface
|
|
*
|
|
* Based on Extensible Firmware Interface Specification version 2.4
|
|
*
|
|
* Copyright (C) 2013 - 2015 Linaro Ltd.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) "efi: " fmt
|
|
|
|
#include <linux/efi.h>
|
|
#include <linux/fwnode.h>
|
|
#include <linux/init.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/mm_types.h>
|
|
#include <linux/of.h>
|
|
#include <linux/of_address.h>
|
|
#include <linux/of_fdt.h>
|
|
#include <linux/platform_device.h>
|
|
#include <linux/screen_info.h>
|
|
|
|
#include <asm/efi.h>
|
|
|
|
static int __init is_memory(efi_memory_desc_t *md)
|
|
{
|
|
if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Translate a EFI virtual address into a physical address: this is necessary,
|
|
* as some data members of the EFI system table are virtually remapped after
|
|
* SetVirtualAddressMap() has been called.
|
|
*/
|
|
static phys_addr_t __init efi_to_phys(unsigned long addr)
|
|
{
|
|
efi_memory_desc_t *md;
|
|
|
|
for_each_efi_memory_desc(md) {
|
|
if (!(md->attribute & EFI_MEMORY_RUNTIME))
|
|
continue;
|
|
if (md->virt_addr == 0)
|
|
/* no virtual mapping has been installed by the stub */
|
|
break;
|
|
if (md->virt_addr <= addr &&
|
|
(addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT))
|
|
return md->phys_addr + addr - md->virt_addr;
|
|
}
|
|
return addr;
|
|
}
|
|
|
|
static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR;
|
|
static __initdata unsigned long cpu_state_table = EFI_INVALID_TABLE_ADDR;
|
|
|
|
static const efi_config_table_type_t arch_tables[] __initconst = {
|
|
{LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table},
|
|
{LINUX_EFI_ARM_CPU_STATE_TABLE_GUID, &cpu_state_table},
|
|
{}
|
|
};
|
|
|
|
static void __init init_screen_info(void)
|
|
{
|
|
struct screen_info *si;
|
|
|
|
if (IS_ENABLED(CONFIG_ARM) &&
|
|
screen_info_table != EFI_INVALID_TABLE_ADDR) {
|
|
si = early_memremap_ro(screen_info_table, sizeof(*si));
|
|
if (!si) {
|
|
pr_err("Could not map screen_info config table\n");
|
|
return;
|
|
}
|
|
screen_info = *si;
|
|
early_memunmap(si, sizeof(*si));
|
|
|
|
/* dummycon on ARM needs non-zero values for columns/lines */
|
|
screen_info.orig_video_cols = 80;
|
|
screen_info.orig_video_lines = 25;
|
|
}
|
|
|
|
if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
|
|
memblock_is_map_memory(screen_info.lfb_base))
|
|
memblock_mark_nomap(screen_info.lfb_base, screen_info.lfb_size);
|
|
}
|
|
|
|
static int __init uefi_init(u64 efi_system_table)
|
|
{
|
|
efi_config_table_t *config_tables;
|
|
efi_system_table_t *systab;
|
|
size_t table_size;
|
|
int retval;
|
|
|
|
systab = early_memremap_ro(efi_system_table, sizeof(efi_system_table_t));
|
|
if (systab == NULL) {
|
|
pr_warn("Unable to map EFI system table.\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
set_bit(EFI_BOOT, &efi.flags);
|
|
if (IS_ENABLED(CONFIG_64BIT))
|
|
set_bit(EFI_64BIT, &efi.flags);
|
|
|
|
retval = efi_systab_check_header(&systab->hdr, 2);
|
|
if (retval)
|
|
goto out;
|
|
|
|
efi.runtime = systab->runtime;
|
|
efi.runtime_version = systab->hdr.revision;
|
|
|
|
efi_systab_report_header(&systab->hdr, efi_to_phys(systab->fw_vendor));
|
|
|
|
table_size = sizeof(efi_config_table_t) * systab->nr_tables;
|
|
config_tables = early_memremap_ro(efi_to_phys(systab->tables),
|
|
table_size);
|
|
if (config_tables == NULL) {
|
|
pr_warn("Unable to map EFI config table array.\n");
|
|
retval = -ENOMEM;
|
|
goto out;
|
|
}
|
|
retval = efi_config_parse_tables(config_tables, systab->nr_tables,
|
|
IS_ENABLED(CONFIG_ARM) ? arch_tables
|
|
: NULL);
|
|
|
|
early_memunmap(config_tables, table_size);
|
|
out:
|
|
early_memunmap(systab, sizeof(efi_system_table_t));
|
|
return retval;
|
|
}
|
|
|
|
/*
|
|
* Return true for regions that can be used as System RAM.
|
|
*/
|
|
static __init int is_usable_memory(efi_memory_desc_t *md)
|
|
{
|
|
switch (md->type) {
|
|
case EFI_LOADER_CODE:
|
|
case EFI_LOADER_DATA:
|
|
case EFI_ACPI_RECLAIM_MEMORY:
|
|
case EFI_BOOT_SERVICES_CODE:
|
|
case EFI_BOOT_SERVICES_DATA:
|
|
case EFI_CONVENTIONAL_MEMORY:
|
|
case EFI_PERSISTENT_MEMORY:
|
|
/*
|
|
* Special purpose memory is 'soft reserved', which means it
|
|
* is set aside initially, but can be hotplugged back in or
|
|
* be assigned to the dax driver after boot.
|
|
*/
|
|
if (efi_soft_reserve_enabled() &&
|
|
(md->attribute & EFI_MEMORY_SP))
|
|
return false;
|
|
|
|
/*
|
|
* According to the spec, these regions are no longer reserved
|
|
* after calling ExitBootServices(). However, we can only use
|
|
* them as System RAM if they can be mapped writeback cacheable.
|
|
*/
|
|
return (md->attribute & EFI_MEMORY_WB);
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static __init void reserve_regions(void)
|
|
{
|
|
efi_memory_desc_t *md;
|
|
u64 paddr, npages, size;
|
|
|
|
if (efi_enabled(EFI_DBG))
|
|
pr_info("Processing EFI memory map:\n");
|
|
|
|
/*
|
|
* Discard memblocks discovered so far: if there are any at this
|
|
* point, they originate from memory nodes in the DT, and UEFI
|
|
* uses its own memory map instead.
|
|
*/
|
|
memblock_dump_all();
|
|
memblock_remove(0, PHYS_ADDR_MAX);
|
|
|
|
for_each_efi_memory_desc(md) {
|
|
paddr = md->phys_addr;
|
|
npages = md->num_pages;
|
|
|
|
if (efi_enabled(EFI_DBG)) {
|
|
char buf[64];
|
|
|
|
pr_info(" 0x%012llx-0x%012llx %s\n",
|
|
paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
|
|
efi_md_typeattr_format(buf, sizeof(buf), md));
|
|
}
|
|
|
|
memrange_efi_to_native(&paddr, &npages);
|
|
size = npages << PAGE_SHIFT;
|
|
|
|
if (is_memory(md)) {
|
|
early_init_dt_add_memory_arch(paddr, size);
|
|
|
|
if (!is_usable_memory(md))
|
|
memblock_mark_nomap(paddr, size);
|
|
|
|
/* keep ACPI reclaim memory intact for kexec etc. */
|
|
if (md->type == EFI_ACPI_RECLAIM_MEMORY)
|
|
memblock_reserve(paddr, size);
|
|
}
|
|
}
|
|
}
|
|
|
|
void __init efi_init(void)
|
|
{
|
|
struct efi_memory_map_data data;
|
|
u64 efi_system_table;
|
|
|
|
/* Grab UEFI information placed in FDT by stub */
|
|
efi_system_table = efi_get_fdt_params(&data);
|
|
if (!efi_system_table)
|
|
return;
|
|
|
|
if (efi_memmap_init_early(&data) < 0) {
|
|
/*
|
|
* If we are booting via UEFI, the UEFI memory map is the only
|
|
* description of memory we have, so there is little point in
|
|
* proceeding if we cannot access it.
|
|
*/
|
|
panic("Unable to map EFI memory map.\n");
|
|
}
|
|
|
|
WARN(efi.memmap.desc_version != 1,
|
|
"Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
|
|
efi.memmap.desc_version);
|
|
|
|
if (uefi_init(efi_system_table) < 0) {
|
|
efi_memmap_unmap();
|
|
return;
|
|
}
|
|
|
|
reserve_regions();
|
|
/*
|
|
* For memblock manipulation, the cap should come after the memblock_add().
|
|
* And now, memblock is fully populated, it is time to do capping.
|
|
*/
|
|
early_init_dt_check_for_usable_mem_range();
|
|
efi_esrt_init();
|
|
efi_mokvar_table_init();
|
|
|
|
memblock_reserve(data.phys_map & PAGE_MASK,
|
|
PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK)));
|
|
|
|
init_screen_info();
|
|
|
|
#ifdef CONFIG_ARM
|
|
/* ARM does not permit early mappings to persist across paging_init() */
|
|
efi_memmap_unmap();
|
|
|
|
if (cpu_state_table != EFI_INVALID_TABLE_ADDR) {
|
|
struct efi_arm_entry_state *state;
|
|
bool dump_state = true;
|
|
|
|
state = early_memremap_ro(cpu_state_table,
|
|
sizeof(struct efi_arm_entry_state));
|
|
if (state == NULL) {
|
|
pr_warn("Unable to map CPU entry state table.\n");
|
|
return;
|
|
}
|
|
|
|
if ((state->sctlr_before_ebs & 1) == 0)
|
|
pr_warn(FW_BUG "EFI stub was entered with MMU and Dcache disabled, please fix your firmware!\n");
|
|
else if ((state->sctlr_after_ebs & 1) == 0)
|
|
pr_warn(FW_BUG "ExitBootServices() returned with MMU and Dcache disabled, please fix your firmware!\n");
|
|
else
|
|
dump_state = false;
|
|
|
|
if (dump_state || efi_enabled(EFI_DBG)) {
|
|
pr_info("CPSR at EFI stub entry : 0x%08x\n", state->cpsr_before_ebs);
|
|
pr_info("SCTLR at EFI stub entry : 0x%08x\n", state->sctlr_before_ebs);
|
|
pr_info("CPSR after ExitBootServices() : 0x%08x\n", state->cpsr_after_ebs);
|
|
pr_info("SCTLR after ExitBootServices(): 0x%08x\n", state->sctlr_after_ebs);
|
|
}
|
|
early_memunmap(state, sizeof(struct efi_arm_entry_state));
|
|
}
|
|
#endif
|
|
}
|