From 4971531af319f8bdd9a81a87eecfb6b19f2f8c8e Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 21 Jun 2016 23:11:38 +0100 Subject: [PATCH 01/29] x86/efi: Test for EFI_MEMMAP functionality when iterating EFI memmap Both efi_find_mirror() and efi_fake_memmap() really want to know whether the EFI memory map is available, not just whether the machine was booted using EFI. efi_fake_memmap() even has a check for EFI_MEMMAP at the start of the function. Since we've already got other code that has this dependency, merge everything under one if() conditional, and remove the now superfluous check from efi_fake_memmap(). Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Taku Izumi Cc: Tony Luck Cc: Xishi Qiu Cc: Kamezawa Hiroyuki Signed-off-by: Matt Fleming --- arch/x86/kernel/setup.c | 19 +++++++++---------- drivers/firmware/efi/fake_mem.c | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa60f5f5a16..4fd69e532c15 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1096,19 +1096,18 @@ void __init setup_arch(char **cmdline_p) memblock_set_current_limit(ISA_END_ADDRESS); memblock_x86_fill(); - if (efi_enabled(EFI_BOOT)) { - efi_fake_memmap(); - efi_find_mirror(); - } - reserve_bios_regions(); - /* - * The EFI specification says that boot service code won't be called - * after ExitBootServices(). This is, in fact, a lie. - */ - if (efi_enabled(EFI_MEMMAP)) + if (efi_enabled(EFI_MEMMAP)) { + efi_fake_memmap(); + efi_find_mirror(); + + /* + * The EFI specification says that boot service code won't be + * called after ExitBootServices(). This is, in fact, a lie. + */ efi_reserve_boot_services(); + } /* preallocate 4k for mptable mpc */ early_reserve_e820_mpc_new(); diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 48430aba13c1..c437388a7b85 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -64,7 +64,7 @@ void __init efi_fake_memmap(void) void *old, *new; int i; - if (!nr_fake_mem || !efi_enabled(EFI_MEMMAP)) + if (!nr_fake_mem) return; /* count up the number of EFI memory descriptor */ From ab72a27da4c6c19b0e3d6d7556fdd4afb581c8ac Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 20 Jun 2016 14:36:51 +0100 Subject: [PATCH 02/29] x86/efi: Consolidate region mapping logic EFI regions are currently mapped in two separate places. The bulk of the work is done in efi_map_regions() but when CONFIG_EFI_MIXED is enabled the additional regions that are required when operating in mixed mode are mapping in efi_setup_page_tables(). Pull everything into efi_map_regions() and refactor the test for which regions should be mapped into a should_map_region() function. Generously sprinkle comments to clarify the different cases. Acked-by: Borislav Petkov Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 50 +++++++++++++++++++++++++++++----- arch/x86/platform/efi/efi_64.c | 20 -------------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1fbb408e2e72..625ec729b4e8 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -745,6 +745,46 @@ static void *efi_map_next_entry(void *entry) return entry; } +static bool should_map_region(efi_memory_desc_t *md) +{ + /* + * Runtime regions always require runtime mappings (obviously). + */ + if (md->attribute & EFI_MEMORY_RUNTIME) + return true; + + /* + * 32-bit EFI doesn't suffer from the bug that requires us to + * reserve boot services regions, and mixed mode support + * doesn't exist for 32-bit kernels. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return false; + + /* + * Map all of RAM so that we can access arguments in the 1:1 + * mapping when making EFI runtime calls. + */ + if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) { + if (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_LOADER_DATA || + md->type == EFI_LOADER_CODE) + return true; + } + + /* + * Map boot services regions as a workaround for buggy + * firmware that accesses them even when they shouldn't. + * + * See efi_{reserve,free}_boot_services(). + */ + if (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) + return true; + + return false; +} + /* * Map the efi memory ranges of the runtime services and update new_mmap with * virtual addresses. @@ -761,13 +801,9 @@ static void * __init efi_map_regions(int *count, int *pg_shift) p = NULL; while ((p = efi_map_next_entry(p))) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME)) { -#ifdef CONFIG_X86_64 - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) -#endif - continue; - } + + if (!should_map_region(md)) + continue; efi_map_region(md); get_systab_virt_addr(md); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 677e29e29473..45434ea345e9 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -214,7 +214,6 @@ void efi_sync_low_kernel_mappings(void) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { unsigned long pfn, text; - efi_memory_desc_t *md; struct page *page; unsigned npages; pgd_t *pgd; @@ -248,25 +247,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (!IS_ENABLED(CONFIG_EFI_MIXED)) return 0; - /* - * Map all of RAM so that we can access arguments in the 1:1 - * mapping when making EFI runtime calls. - */ - for_each_efi_memory_desc(md) { - if (md->type != EFI_CONVENTIONAL_MEMORY && - md->type != EFI_LOADER_DATA && - md->type != EFI_LOADER_CODE) - continue; - - pfn = md->phys_addr >> PAGE_SHIFT; - npages = md->num_pages; - - if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, _PAGE_RW)) { - pr_err("Failed to map 1:1 memory\n"); - return 1; - } - } - page = alloc_page(GFP_KERNEL|__GFP_DMA32); if (!page) panic("Unable to allocate EFI runtime stack < 4GB\n"); From 9479c7cebfb568f8b8b424be7f1cac120e9eea95 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 26 Feb 2016 21:22:05 +0000 Subject: [PATCH 03/29] efi: Refactor efi_memmap_init_early() into arch-neutral code Every EFI architecture apart from ia64 needs to setup the EFI memory map at efi.memmap, and the code for doing that is essentially the same across all implementations. Therefore, it makes sense to factor this out into the common code under drivers/firmware/efi/. The only slight variation is the data structure out of which we pull the initial memory map information, such as physical address, memory descriptor size and version, etc. We can address this by passing a generic data structure (struct efi_memory_map_data) as the argument to efi_memmap_init_early() which contains the minimum info required for initialising the memory map. In the process, this patch also fixes a few undesirable implementation differences: - ARM and arm64 were failing to clear the EFI_MEMMAP bit when unmapping the early EFI memory map. EFI_MEMMAP indicates whether the EFI memory map is mapped (not the regions contained within) and can be traversed. It's more correct to set the bit as soon as we memremap() the passed in EFI memmap. - Rename efi_unmmap_memmap() to efi_memmap_unmap() to adhere to the regular naming scheme. This patch also uses a read-write mapping for the memory map instead of the read-only mapping currently used on ARM and arm64. x86 needs the ability to update the memory map in-place when assigning virtual addresses to regions (efi_map_region()) and tagging regions when reserving boot services (efi_reserve_boot_services()). There's no way for the generic fake_mem code to know which mapping to use without introducing some arch-specific constant/hook, so just use read-write since read-only is of dubious value for the EFI memory map. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- arch/x86/include/asm/efi.h | 1 - arch/x86/platform/efi/efi.c | 66 ++++++++---------------------- arch/x86/platform/efi/quirks.c | 4 +- drivers/firmware/efi/arm-init.c | 17 ++++---- drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/efi.c | 46 +++++++++++++++++++++ drivers/firmware/efi/fake_mem.c | 15 ++++--- include/linux/efi.h | 16 ++++++++ 8 files changed, 99 insertions(+), 68 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index d0bb76d81402..4630e2bfa8fb 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -117,7 +117,6 @@ extern int __init efi_memblock_x86_reserve_range(void); extern pgd_t * __init efi_call_phys_prolog(void); extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_print_memmap(void); -extern void __init efi_unmap_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 625ec729b4e8..5ccde8b6cdd1 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -172,7 +172,9 @@ static void __init do_add_efi_memmap(void) int __init efi_memblock_x86_reserve_range(void) { struct efi_info *e = &boot_params.efi_info; + struct efi_memory_map_data data; phys_addr_t pmap; + int rv; if (efi_enabled(EFI_PARAVIRT)) return 0; @@ -187,11 +189,17 @@ int __init efi_memblock_x86_reserve_range(void) #else pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); #endif - efi.memmap.phys_map = pmap; - efi.memmap.nr_map = e->efi_memmap_size / - e->efi_memdesc_size; - efi.memmap.desc_size = e->efi_memdesc_size; - efi.memmap.desc_version = e->efi_memdesc_version; + data.phys_map = pmap; + data.size = e->efi_memmap_size; + data.desc_size = e->efi_memdesc_size; + data.desc_version = e->efi_memdesc_version; + + rv = efi_memmap_init_early(&data); + if (rv) + return rv; + + if (add_efi_memmap) + do_add_efi_memmap(); WARN(efi.memmap.desc_version != 1, "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", @@ -218,19 +226,6 @@ void __init efi_print_memmap(void) } } -void __init efi_unmap_memmap(void) -{ - unsigned long size; - - clear_bit(EFI_MEMMAP, &efi.flags); - - size = efi.memmap.nr_map * efi.memmap.desc_size; - if (efi.memmap.map) { - early_memunmap(efi.memmap.map, size); - efi.memmap.map = NULL; - } -} - static int __init efi_systab_init(void *phys) { if (efi_enabled(EFI_64BIT)) { @@ -414,33 +409,6 @@ static int __init efi_runtime_init(void) return 0; } -static int __init efi_memmap_init(void) -{ - unsigned long addr, size; - - if (efi_enabled(EFI_PARAVIRT)) - return 0; - - /* Map the EFI memory map */ - size = efi.memmap.nr_map * efi.memmap.desc_size; - addr = (unsigned long)efi.memmap.phys_map; - - efi.memmap.map = early_memremap(addr, size); - if (efi.memmap.map == NULL) { - pr_err("Could not map the memory map!\n"); - return -ENOMEM; - } - - efi.memmap.map_end = efi.memmap.map + size; - - if (add_efi_memmap) - do_add_efi_memmap(); - - set_bit(EFI_MEMMAP, &efi.flags); - - return 0; -} - void __init efi_init(void) { efi_char16_t *c16; @@ -498,11 +466,11 @@ void __init efi_init(void) if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else { - if (efi_runtime_disabled() || efi_runtime_init()) + if (efi_runtime_disabled() || efi_runtime_init()) { + efi_memmap_unmap(); return; + } } - if (efi_memmap_init()) - return; if (efi_enabled(EFI_DBG)) efi_print_memmap(); @@ -839,7 +807,7 @@ static void __init kexec_enter_virtual_mode(void) * non-native EFI */ if (!efi_is_native()) { - efi_unmap_memmap(); + efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; } diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 89d1146f5a6f..47b99108ff8e 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -287,7 +287,7 @@ void __init efi_free_boot_services(void) free_bootmem_late(start, size); } - efi_unmap_memmap(); + efi_memmap_unmap(); } /* @@ -365,7 +365,7 @@ void __init efi_apply_memmap_quirks(void) */ if (!efi_runtime_supported()) { pr_info("Setup done, disabling due to 32/64-bit mismatch\n"); - efi_unmap_memmap(); + efi_memmap_unmap(); } /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index c49d50e68aee..5a2df3fefccc 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -211,12 +211,11 @@ static __init void reserve_regions(void) memblock_mark_nomap(paddr, size); } - - set_bit(EFI_MEMMAP, &efi.flags); } void __init efi_init(void) { + struct efi_memory_map_data data; struct efi_fdt_params params; /* Grab UEFI information placed in FDT by stub */ @@ -225,9 +224,12 @@ void __init efi_init(void) efi_system_table = params.system_table; - efi.memmap.phys_map = params.mmap; - efi.memmap.map = early_memremap_ro(params.mmap, params.mmap_size); - if (efi.memmap.map == NULL) { + data.desc_version = params.desc_ver; + data.desc_size = params.desc_size; + data.size = params.mmap_size; + data.phys_map = params.mmap; + + if (efi_memmap_init_early(&data) < 0) { /* * If we are booting via UEFI, the UEFI memory map is the only * description of memory we have, so there is little point in @@ -235,9 +237,6 @@ void __init efi_init(void) */ panic("Unable to map EFI memory map.\n"); } - efi.memmap.map_end = efi.memmap.map + params.mmap_size; - efi.memmap.desc_size = params.desc_size; - efi.memmap.desc_version = params.desc_ver; WARN(efi.memmap.desc_version != 1, "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", @@ -248,7 +247,7 @@ void __init efi_init(void) reserve_regions(); efi_memattr_init(); - early_memunmap(efi.memmap.map, params.mmap_size); + efi_memmap_unmap(); memblock_reserve(params.mmap & PAGE_MASK, PAGE_ALIGN(params.mmap_size + diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index c394b81fe452..eedb30351a68 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -114,7 +114,7 @@ static int __init arm_enable_runtime_services(void) pr_info("Remapping and enabling EFI services.\n"); - mapsize = efi.memmap.map_end - efi.memmap.map; + mapsize = efi.memmap.desc_size * efi.memmap.nr_map; efi.memmap.map = memremap(efi.memmap.phys_map, mapsize, MEMREMAP_WB); if (!efi.memmap.map) { diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 5a2631af7410..c1879999abe7 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -544,6 +544,52 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) return ret; } +/** + * efi_memmap_init_early - Map the EFI memory map data structure + * @data: EFI memory map data + * + * Use early_memremap() to map the passed in EFI memory map and assign + * it to efi.memmap. + */ +int __init efi_memmap_init_early(struct efi_memory_map_data *data) +{ + struct efi_memory_map map; + + if (efi_enabled(EFI_PARAVIRT)) + return 0; + + map.phys_map = data->phys_map; + + map.map = early_memremap(data->phys_map, data->size); + if (!map.map) { + pr_err("Could not map the memory map!\n"); + return -ENOMEM; + } + + map.nr_map = data->size / data->desc_size; + map.map_end = map.map + data->size; + + map.desc_version = data->desc_version; + map.desc_size = data->desc_size; + + set_bit(EFI_MEMMAP, &efi.flags); + + efi.memmap = map; + + return 0; +} + +void __init efi_memmap_unmap(void) +{ + unsigned long size; + + size = efi.memmap.desc_size * efi.memmap.nr_map; + + early_memunmap(efi.memmap.map, size); + efi.memmap.map = NULL; + clear_bit(EFI_MEMMAP, &efi.flags); +} + #ifdef CONFIG_EFI_VARS_MODULE static int __init efi_load_efivars(void) { diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index c437388a7b85..939eec47139f 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -57,6 +57,7 @@ static int __init cmp_fake_mem(const void *x1, const void *x2) void __init efi_fake_memmap(void) { u64 start, end, m_start, m_end, m_attr; + struct efi_memory_map_data data; int new_nr_map = efi.memmap.nr_map; efi_memory_desc_t *md; phys_addr_t new_memmap_phy; @@ -180,12 +181,14 @@ void __init efi_fake_memmap(void) } /* swap into new EFI memmap */ - efi_unmap_memmap(); - efi.memmap.map = new_memmap; - efi.memmap.phys_map = new_memmap_phy; - efi.memmap.nr_map = new_nr_map; - efi.memmap.map_end = efi.memmap.map + efi.memmap.nr_map * efi.memmap.desc_size; - set_bit(EFI_MEMMAP, &efi.flags); + early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map); + efi_memmap_unmap(); + + data.phys_map = new_memmap_phy; + data.size = efi.memmap.desc_size * new_nr_map; + data.desc_version = efi.memmap.desc_version; + data.desc_size = efi.memmap.desc_size; + efi_memmap_init_early(&data); /* print new EFI memmap */ efi_print_memmap(); diff --git a/include/linux/efi.h b/include/linux/efi.h index 7f5a58225385..d862d4998580 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -669,6 +669,18 @@ typedef struct { unsigned long tables; } efi_system_table_t; +/* + * Architecture independent structure for describing a memory map for the + * benefit of efi_memmap_init_early(), saving us the need to pass four + * parameters. + */ +struct efi_memory_map_data { + phys_addr_t phys_map; + unsigned long size; + unsigned long desc_version; + unsigned long desc_size; +}; + struct efi_memory_map { phys_addr_t phys_map; void *map; @@ -900,6 +912,10 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, } #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); + +extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); +extern void __init efi_memmap_unmap(void); + extern int efi_config_init(efi_config_table_type_t *arch_tables); #ifdef CONFIG_EFI_ESRT extern void __init efi_esrt_init(void); From dca0f971ea6fcf2f1bb78f7995adf80da9f4767f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 27 Feb 2016 15:52:50 +0000 Subject: [PATCH 04/29] efi: Add efi_memmap_init_late() for permanent EFI memmap Drivers need a way to access the EFI memory map at runtime. ARM and arm64 currently provide this by remapping the EFI memory map into the vmalloc space before setting up the EFI virtual mappings. x86 does not provide this functionality which has resulted in the code in efi_mem_desc_lookup() where it will manually map individual EFI memmap entries if the memmap has already been torn down on x86, /* * If a driver calls this after efi_free_boot_services, * ->map will be NULL, and the target may also not be mapped. * So just always get our own virtual map on the CPU. * */ md = early_memremap(p, sizeof (*md)); There isn't a good reason for not providing a permanent EFI memory map for runtime queries, especially since the EFI regions are not mapped into the standard kernel page tables. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 44 +++++++--- arch/x86/platform/efi/quirks.c | 2 - drivers/firmware/efi/arm-runtime.c | 4 +- drivers/firmware/efi/efi.c | 135 ++++++++++++++++++++--------- include/linux/efi.h | 2 + 5 files changed, 130 insertions(+), 57 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 5ccde8b6cdd1..33996987ac70 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -827,6 +827,19 @@ static void __init kexec_enter_virtual_mode(void) get_systab_virt_addr(md); } + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map. + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(efi.memmap.phys_map, + efi.memmap.desc_size * efi.memmap.nr_map)) { + pr_err("Failed to remap late EFI memory map\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + save_runtime_map(); BUG_ON(!efi.systab); @@ -888,6 +901,7 @@ static void __init __efi_enter_virtual_mode(void) int count = 0, pg_shift = 0; void *new_memmap = NULL; efi_status_t status; + phys_addr_t pa; efi.systab = NULL; @@ -905,11 +919,26 @@ static void __init __efi_enter_virtual_mode(void) return; } + pa = __pa(new_memmap); + + /* + * Unregister the early EFI memmap from efi_init() and install + * the new EFI memory map that we are about to pass to the + * firmware via SetVirtualAddressMap(). + */ + efi_memmap_unmap(); + + if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { + pr_err("Failed to remap late EFI memory map\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + save_runtime_map(); BUG_ON(!efi.systab); - if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) { + if (efi_setup_page_tables(pa, 1 << pg_shift)) { clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; } @@ -921,14 +950,14 @@ static void __init __efi_enter_virtual_mode(void) efi.memmap.desc_size * count, efi.memmap.desc_size, efi.memmap.desc_version, - (efi_memory_desc_t *)__pa(new_memmap)); + (efi_memory_desc_t *)pa); } else { status = efi_thunk_set_virtual_address_map( efi_phys.set_virtual_address_map, efi.memmap.desc_size * count, efi.memmap.desc_size, efi.memmap.desc_version, - (efi_memory_desc_t *)__pa(new_memmap)); + (efi_memory_desc_t *)pa); } if (status != EFI_SUCCESS) { @@ -960,15 +989,6 @@ static void __init __efi_enter_virtual_mode(void) efi_runtime_update_mappings(); efi_dump_pagetable(); - /* - * We mapped the descriptor array into the EFI pagetable above - * but we're not unmapping it here because if we're running in - * EFI mixed mode we need all of memory to be accessible when - * we pass parameters to the EFI runtime services in the - * thunking code. - */ - free_pages((unsigned long)new_memmap, pg_shift); - /* clean DUMMY object */ efi_delete_dummy_variable(); } diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 47b99108ff8e..9faf18874692 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -286,8 +286,6 @@ void __init efi_free_boot_services(void) free_bootmem_late(start, size); } - - efi_memmap_unmap(); } /* diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index eedb30351a68..ae001450545f 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -116,12 +116,10 @@ static int __init arm_enable_runtime_services(void) mapsize = efi.memmap.desc_size * efi.memmap.nr_map; - efi.memmap.map = memremap(efi.memmap.phys_map, mapsize, MEMREMAP_WB); - if (!efi.memmap.map) { + if (efi_memmap_init_late(efi.memmap.phys_map, mapsize)) { pr_err("Failed to remap EFI memory map\n"); return -ENOMEM; } - efi.memmap.map_end = efi.memmap.map + mapsize; if (!efi_virtmap_init()) { pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n"); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index c1879999abe7..8a5e0db72b8f 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -347,56 +347,31 @@ subsys_initcall(efisubsys_init); /* * Find the efi memory descriptor for a given physical address. Given a - * physicall address, determine if it exists within an EFI Memory Map entry, + * physical address, determine if it exists within an EFI Memory Map entry, * and if so, populate the supplied memory descriptor with the appropriate * data. */ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) { - struct efi_memory_map *map = &efi.memmap; - phys_addr_t p, e; + efi_memory_desc_t *md; if (!efi_enabled(EFI_MEMMAP)) { pr_err_once("EFI_MEMMAP is not enabled.\n"); return -EINVAL; } - if (!map) { - pr_err_once("efi.memmap is not set.\n"); - return -EINVAL; - } if (!out_md) { pr_err_once("out_md is null.\n"); return -EINVAL; } - if (WARN_ON_ONCE(!map->phys_map)) - return -EINVAL; - if (WARN_ON_ONCE(map->nr_map == 0) || WARN_ON_ONCE(map->desc_size == 0)) - return -EINVAL; - e = map->phys_map + map->nr_map * map->desc_size; - for (p = map->phys_map; p < e; p += map->desc_size) { - efi_memory_desc_t *md; + for_each_efi_memory_desc(md) { u64 size; u64 end; - /* - * If a driver calls this after efi_free_boot_services, - * ->map will be NULL, and the target may also not be mapped. - * So just always get our own virtual map on the CPU. - * - */ - md = early_memremap(p, sizeof (*md)); - if (!md) { - pr_err_once("early_memremap(%pa, %zu) failed.\n", - &p, sizeof (*md)); - return -ENOMEM; - } - if (!(md->attribute & EFI_MEMORY_RUNTIME) && md->type != EFI_BOOT_SERVICES_DATA && md->type != EFI_RUNTIME_SERVICES_DATA) { - early_memunmap(md, sizeof (*md)); continue; } @@ -404,11 +379,8 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) end = md->phys_addr + size; if (phys_addr >= md->phys_addr && phys_addr < end) { memcpy(out_md, md, sizeof(*out_md)); - early_memunmap(md, sizeof (*md)); return 0; } - - early_memunmap(md, sizeof (*md)); } pr_err_once("requested map not found.\n"); return -ENOENT; @@ -545,32 +517,49 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) } /** - * efi_memmap_init_early - Map the EFI memory map data structure + * __efi_memmap_init - Common code for mapping the EFI memory map * @data: EFI memory map data + * @late: Use early or late mapping function? * - * Use early_memremap() to map the passed in EFI memory map and assign - * it to efi.memmap. + * This function takes care of figuring out which function to use to + * map the EFI memory map in efi.memmap based on how far into the boot + * we are. + * + * During bootup @late should be %false since we only have access to + * the early_memremap*() functions as the vmalloc space isn't setup. + * Once the kernel is fully booted we can fallback to the more robust + * memremap*() API. + * + * Returns zero on success, a negative error code on failure. */ -int __init efi_memmap_init_early(struct efi_memory_map_data *data) +static int __init +__efi_memmap_init(struct efi_memory_map_data *data, bool late) { struct efi_memory_map map; + phys_addr_t phys_map; if (efi_enabled(EFI_PARAVIRT)) return 0; - map.phys_map = data->phys_map; + phys_map = data->phys_map; + + if (late) + map.map = memremap(phys_map, data->size, MEMREMAP_WB); + else + map.map = early_memremap(phys_map, data->size); - map.map = early_memremap(data->phys_map, data->size); if (!map.map) { pr_err("Could not map the memory map!\n"); return -ENOMEM; } + map.phys_map = data->phys_map; map.nr_map = data->size / data->desc_size; map.map_end = map.map + data->size; map.desc_version = data->desc_version; map.desc_size = data->desc_size; + map.late = late; set_bit(EFI_MEMMAP, &efi.flags); @@ -579,17 +568,83 @@ int __init efi_memmap_init_early(struct efi_memory_map_data *data) return 0; } +/** + * efi_memmap_init_early - Map the EFI memory map data structure + * @data: EFI memory map data + * + * Use early_memremap() to map the passed in EFI memory map and assign + * it to efi.memmap. + */ +int __init efi_memmap_init_early(struct efi_memory_map_data *data) +{ + /* Cannot go backwards */ + WARN_ON(efi.memmap.late); + + return __efi_memmap_init(data, false); +} + void __init efi_memmap_unmap(void) { - unsigned long size; + if (!efi.memmap.late) { + unsigned long size; - size = efi.memmap.desc_size * efi.memmap.nr_map; + size = efi.memmap.desc_size * efi.memmap.nr_map; + early_memunmap(efi.memmap.map, size); + } else { + memunmap(efi.memmap.map); + } - early_memunmap(efi.memmap.map, size); efi.memmap.map = NULL; clear_bit(EFI_MEMMAP, &efi.flags); } +/** + * efi_memmap_init_late - Map efi.memmap with memremap() + * @phys_addr: Physical address of the new EFI memory map + * @size: Size in bytes of the new EFI memory map + * + * Setup a mapping of the EFI memory map using ioremap_cache(). This + * function should only be called once the vmalloc space has been + * setup and is therefore not suitable for calling during early EFI + * initialise, e.g. in efi_init(). Additionally, it expects + * efi_memmap_init_early() to have already been called. + * + * The reason there are two EFI memmap initialisation + * (efi_memmap_init_early() and this late version) is because the + * early EFI memmap should be explicitly unmapped once EFI + * initialisation is complete as the fixmap space used to map the EFI + * memmap (via early_memremap()) is a scarce resource. + * + * This late mapping is intended to persist for the duration of + * runtime so that things like efi_mem_desc_lookup() and + * efi_mem_attributes() always work. + * + * Returns zero on success, a negative error code on failure. + */ +int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) +{ + struct efi_memory_map_data data = { + .phys_map = addr, + .size = size, + }; + + /* Did we forget to unmap the early EFI memmap? */ + WARN_ON(efi.memmap.map); + + /* Were we already called? */ + WARN_ON(efi.memmap.late); + + /* + * It makes no sense to allow callers to register different + * values for the following fields. Copy them out of the + * existing early EFI memmap. + */ + data.desc_version = efi.memmap.desc_version; + data.desc_size = efi.memmap.desc_size; + + return __efi_memmap_init(&data, true); +} + #ifdef CONFIG_EFI_VARS_MODULE static int __init efi_load_efivars(void) { diff --git a/include/linux/efi.h b/include/linux/efi.h index d862d4998580..f149676b2fcd 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -688,6 +688,7 @@ struct efi_memory_map { int nr_map; unsigned long desc_version; unsigned long desc_size; + bool late; }; struct efi_fdt_params { @@ -914,6 +915,7 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); +extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); extern int efi_config_init(efi_config_table_type_t *arch_tables); From c8c1a4c5e4ead0d2dcf0f0bcb8bdbdcf877fb3bb Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 29 Feb 2016 16:58:18 +0000 Subject: [PATCH 05/29] efi/fake_mem: Refactor main two code chunks into functions There is a whole load of generic EFI memory map code inside of the fake_mem driver which is better suited to being grouped with the rest of the generic EFI code for manipulating EFI memory maps. In preparation for that, this patch refactors the core code, so that it's possible to move entire functions later. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Cc: Taku Izumi Signed-off-by: Matt Fleming --- drivers/firmware/efi/fake_mem.c | 227 +++++++++++++++++++------------- 1 file changed, 133 insertions(+), 94 deletions(-) diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 939eec47139f..446c669431c0 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -54,43 +54,151 @@ static int __init cmp_fake_mem(const void *x1, const void *x2) return 0; } +/** + * efi_fake_memmap_split_count - Count number of additional EFI memmap entries + * @md: EFI memory descriptor to split + * @range: Address range (start, end) to split around + * + * Returns the number of additional EFI memmap entries required to + * accomodate @range. + */ +static int efi_fake_memmap_split_count(efi_memory_desc_t *md, struct range *range) +{ + u64 m_start, m_end; + u64 start, end; + int count = 0; + + start = md->phys_addr; + end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + /* modifying range */ + m_start = range->start; + m_end = range->end; + + if (m_start <= start) { + /* split into 2 parts */ + if (start < m_end && m_end < end) + count++; + } + + if (start < m_start && m_start < end) { + /* split into 3 parts */ + if (m_end < end) + count += 2; + /* split into 2 parts */ + if (end <= m_end) + count++; + } + + return count; +} + +/** + * efi_fake_memmap_insert - Insert a fake memory region in an EFI memmap + * @old_memmap: The existing EFI memory map structure + * @buf: Address of buffer to store new map + * @mem: Fake memory map entry to insert + * + * It is suggested that you call efi_fake_memmap_split_count() first + * to see how large @buf needs to be. + */ +static void efi_fake_memmap_insert(struct efi_memory_map *old_memmap, + void *buf, struct fake_mem *mem) +{ + u64 m_start, m_end, m_attr; + efi_memory_desc_t *md; + u64 start, end; + void *old, *new; + + /* modifying range */ + m_start = mem->range.start; + m_end = mem->range.end; + m_attr = mem->attribute; + + for (old = old_memmap->map, new = buf; + old < old_memmap->map_end; + old += old_memmap->desc_size, new += old_memmap->desc_size) { + + /* copy original EFI memory descriptor */ + memcpy(new, old, old_memmap->desc_size); + md = new; + start = md->phys_addr; + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + if (m_start <= start && end <= m_end) + md->attribute |= m_attr; + + if (m_start <= start && + (start < m_end && m_end < end)) { + /* first part */ + md->attribute |= m_attr; + md->num_pages = (m_end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + /* latter part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_end + 1; + md->num_pages = (end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + } + + if ((start < m_start && m_start < end) && m_end < end) { + /* first part */ + md->num_pages = (m_start - md->phys_addr) >> + EFI_PAGE_SHIFT; + /* middle part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->attribute |= m_attr; + md->phys_addr = m_start; + md->num_pages = (m_end - m_start + 1) >> + EFI_PAGE_SHIFT; + /* last part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_end + 1; + md->num_pages = (end - m_end) >> + EFI_PAGE_SHIFT; + } + + if ((start < m_start && m_start < end) && + (end <= m_end)) { + /* first part */ + md->num_pages = (m_start - md->phys_addr) >> + EFI_PAGE_SHIFT; + /* latter part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_start; + md->num_pages = (end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + md->attribute |= m_attr; + } + } +} + void __init efi_fake_memmap(void) { - u64 start, end, m_start, m_end, m_attr; struct efi_memory_map_data data; int new_nr_map = efi.memmap.nr_map; efi_memory_desc_t *md; phys_addr_t new_memmap_phy; void *new_memmap; - void *old, *new; int i; if (!nr_fake_mem) return; /* count up the number of EFI memory descriptor */ - for_each_efi_memory_desc(md) { - start = md->phys_addr; - end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1; + for (i = 0; i < nr_fake_mem; i++) { + for_each_efi_memory_desc(md) { + struct range *r = &fake_mems[i].range; - for (i = 0; i < nr_fake_mem; i++) { - /* modifying range */ - m_start = fake_mems[i].range.start; - m_end = fake_mems[i].range.end; - - if (m_start <= start) { - /* split into 2 parts */ - if (start < m_end && m_end < end) - new_nr_map++; - } - if (start < m_start && m_start < end) { - /* split into 3 parts */ - if (m_end < end) - new_nr_map += 2; - /* split into 2 parts */ - if (end <= m_end) - new_nr_map++; - } + new_nr_map += efi_fake_memmap_split_count(md, r); } } @@ -108,77 +216,8 @@ void __init efi_fake_memmap(void) return; } - for (old = efi.memmap.map, new = new_memmap; - old < efi.memmap.map_end; - old += efi.memmap.desc_size, new += efi.memmap.desc_size) { - - /* copy original EFI memory descriptor */ - memcpy(new, old, efi.memmap.desc_size); - md = new; - start = md->phys_addr; - end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; - - for (i = 0; i < nr_fake_mem; i++) { - /* modifying range */ - m_start = fake_mems[i].range.start; - m_end = fake_mems[i].range.end; - m_attr = fake_mems[i].attribute; - - if (m_start <= start && end <= m_end) - md->attribute |= m_attr; - - if (m_start <= start && - (start < m_end && m_end < end)) { - /* first part */ - md->attribute |= m_attr; - md->num_pages = (m_end - md->phys_addr + 1) >> - EFI_PAGE_SHIFT; - /* latter part */ - new += efi.memmap.desc_size; - memcpy(new, old, efi.memmap.desc_size); - md = new; - md->phys_addr = m_end + 1; - md->num_pages = (end - md->phys_addr + 1) >> - EFI_PAGE_SHIFT; - } - - if ((start < m_start && m_start < end) && m_end < end) { - /* first part */ - md->num_pages = (m_start - md->phys_addr) >> - EFI_PAGE_SHIFT; - /* middle part */ - new += efi.memmap.desc_size; - memcpy(new, old, efi.memmap.desc_size); - md = new; - md->attribute |= m_attr; - md->phys_addr = m_start; - md->num_pages = (m_end - m_start + 1) >> - EFI_PAGE_SHIFT; - /* last part */ - new += efi.memmap.desc_size; - memcpy(new, old, efi.memmap.desc_size); - md = new; - md->phys_addr = m_end + 1; - md->num_pages = (end - m_end) >> - EFI_PAGE_SHIFT; - } - - if ((start < m_start && m_start < end) && - (end <= m_end)) { - /* first part */ - md->num_pages = (m_start - md->phys_addr) >> - EFI_PAGE_SHIFT; - /* latter part */ - new += efi.memmap.desc_size; - memcpy(new, old, efi.memmap.desc_size); - md = new; - md->phys_addr = m_start; - md->num_pages = (end - md->phys_addr + 1) >> - EFI_PAGE_SHIFT; - md->attribute |= m_attr; - } - } - } + for (i = 0; i < nr_fake_mem; i++) + efi_fake_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]); /* swap into new EFI memmap */ early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map); From 60863c0d1a96b740048cc7d94a2d00d6f89ba3d8 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 29 Feb 2016 20:30:39 +0000 Subject: [PATCH 06/29] efi: Split out EFI memory map functions into new file Also move the functions from the EFI fake mem driver since future patches will require access to the memmap insertion code even if CONFIG_EFI_FAKE_MEM isn't enabled. This will be useful when we need to build custom EFI memory maps to allow drivers to mark regions as reserved. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Cc: Taku Izumi Signed-off-by: Matt Fleming --- drivers/firmware/efi/Makefile | 2 +- drivers/firmware/efi/efi.c | 129 --------------- drivers/firmware/efi/fake_mem.c | 143 +---------------- drivers/firmware/efi/memmap.c | 267 ++++++++++++++++++++++++++++++++ include/linux/efi.h | 10 ++ 5 files changed, 284 insertions(+), 267 deletions(-) create mode 100644 drivers/firmware/efi/memmap.c diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index a219640f881f..b3f5e2adc49f 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -10,7 +10,7 @@ KASAN_SANITIZE_runtime-wrappers.o := n obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o -obj-$(CONFIG_EFI) += capsule.o +obj-$(CONFIG_EFI) += capsule.o memmap.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_ESRT) += esrt.o obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 8a5e0db72b8f..d4886fd50c16 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -516,135 +516,6 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) return ret; } -/** - * __efi_memmap_init - Common code for mapping the EFI memory map - * @data: EFI memory map data - * @late: Use early or late mapping function? - * - * This function takes care of figuring out which function to use to - * map the EFI memory map in efi.memmap based on how far into the boot - * we are. - * - * During bootup @late should be %false since we only have access to - * the early_memremap*() functions as the vmalloc space isn't setup. - * Once the kernel is fully booted we can fallback to the more robust - * memremap*() API. - * - * Returns zero on success, a negative error code on failure. - */ -static int __init -__efi_memmap_init(struct efi_memory_map_data *data, bool late) -{ - struct efi_memory_map map; - phys_addr_t phys_map; - - if (efi_enabled(EFI_PARAVIRT)) - return 0; - - phys_map = data->phys_map; - - if (late) - map.map = memremap(phys_map, data->size, MEMREMAP_WB); - else - map.map = early_memremap(phys_map, data->size); - - if (!map.map) { - pr_err("Could not map the memory map!\n"); - return -ENOMEM; - } - - map.phys_map = data->phys_map; - map.nr_map = data->size / data->desc_size; - map.map_end = map.map + data->size; - - map.desc_version = data->desc_version; - map.desc_size = data->desc_size; - map.late = late; - - set_bit(EFI_MEMMAP, &efi.flags); - - efi.memmap = map; - - return 0; -} - -/** - * efi_memmap_init_early - Map the EFI memory map data structure - * @data: EFI memory map data - * - * Use early_memremap() to map the passed in EFI memory map and assign - * it to efi.memmap. - */ -int __init efi_memmap_init_early(struct efi_memory_map_data *data) -{ - /* Cannot go backwards */ - WARN_ON(efi.memmap.late); - - return __efi_memmap_init(data, false); -} - -void __init efi_memmap_unmap(void) -{ - if (!efi.memmap.late) { - unsigned long size; - - size = efi.memmap.desc_size * efi.memmap.nr_map; - early_memunmap(efi.memmap.map, size); - } else { - memunmap(efi.memmap.map); - } - - efi.memmap.map = NULL; - clear_bit(EFI_MEMMAP, &efi.flags); -} - -/** - * efi_memmap_init_late - Map efi.memmap with memremap() - * @phys_addr: Physical address of the new EFI memory map - * @size: Size in bytes of the new EFI memory map - * - * Setup a mapping of the EFI memory map using ioremap_cache(). This - * function should only be called once the vmalloc space has been - * setup and is therefore not suitable for calling during early EFI - * initialise, e.g. in efi_init(). Additionally, it expects - * efi_memmap_init_early() to have already been called. - * - * The reason there are two EFI memmap initialisation - * (efi_memmap_init_early() and this late version) is because the - * early EFI memmap should be explicitly unmapped once EFI - * initialisation is complete as the fixmap space used to map the EFI - * memmap (via early_memremap()) is a scarce resource. - * - * This late mapping is intended to persist for the duration of - * runtime so that things like efi_mem_desc_lookup() and - * efi_mem_attributes() always work. - * - * Returns zero on success, a negative error code on failure. - */ -int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) -{ - struct efi_memory_map_data data = { - .phys_map = addr, - .size = size, - }; - - /* Did we forget to unmap the early EFI memmap? */ - WARN_ON(efi.memmap.map); - - /* Were we already called? */ - WARN_ON(efi.memmap.late); - - /* - * It makes no sense to allow callers to register different - * values for the following fields. Copy them out of the - * existing early EFI memmap. - */ - data.desc_version = efi.memmap.desc_version; - data.desc_size = efi.memmap.desc_size; - - return __efi_memmap_init(&data, true); -} - #ifdef CONFIG_EFI_VARS_MODULE static int __init efi_load_efivars(void) { diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 446c669431c0..0054730f9bae 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -35,17 +35,13 @@ #define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM -struct fake_mem { - struct range range; - u64 attribute; -}; -static struct fake_mem fake_mems[EFI_MAX_FAKEMEM]; +static struct efi_mem_range fake_mems[EFI_MAX_FAKEMEM]; static int nr_fake_mem; static int __init cmp_fake_mem(const void *x1, const void *x2) { - const struct fake_mem *m1 = x1; - const struct fake_mem *m2 = x2; + const struct efi_mem_range *m1 = x1; + const struct efi_mem_range *m2 = x2; if (m1->range.start < m2->range.start) return -1; @@ -54,133 +50,6 @@ static int __init cmp_fake_mem(const void *x1, const void *x2) return 0; } -/** - * efi_fake_memmap_split_count - Count number of additional EFI memmap entries - * @md: EFI memory descriptor to split - * @range: Address range (start, end) to split around - * - * Returns the number of additional EFI memmap entries required to - * accomodate @range. - */ -static int efi_fake_memmap_split_count(efi_memory_desc_t *md, struct range *range) -{ - u64 m_start, m_end; - u64 start, end; - int count = 0; - - start = md->phys_addr; - end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1; - - /* modifying range */ - m_start = range->start; - m_end = range->end; - - if (m_start <= start) { - /* split into 2 parts */ - if (start < m_end && m_end < end) - count++; - } - - if (start < m_start && m_start < end) { - /* split into 3 parts */ - if (m_end < end) - count += 2; - /* split into 2 parts */ - if (end <= m_end) - count++; - } - - return count; -} - -/** - * efi_fake_memmap_insert - Insert a fake memory region in an EFI memmap - * @old_memmap: The existing EFI memory map structure - * @buf: Address of buffer to store new map - * @mem: Fake memory map entry to insert - * - * It is suggested that you call efi_fake_memmap_split_count() first - * to see how large @buf needs to be. - */ -static void efi_fake_memmap_insert(struct efi_memory_map *old_memmap, - void *buf, struct fake_mem *mem) -{ - u64 m_start, m_end, m_attr; - efi_memory_desc_t *md; - u64 start, end; - void *old, *new; - - /* modifying range */ - m_start = mem->range.start; - m_end = mem->range.end; - m_attr = mem->attribute; - - for (old = old_memmap->map, new = buf; - old < old_memmap->map_end; - old += old_memmap->desc_size, new += old_memmap->desc_size) { - - /* copy original EFI memory descriptor */ - memcpy(new, old, old_memmap->desc_size); - md = new; - start = md->phys_addr; - end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; - - if (m_start <= start && end <= m_end) - md->attribute |= m_attr; - - if (m_start <= start && - (start < m_end && m_end < end)) { - /* first part */ - md->attribute |= m_attr; - md->num_pages = (m_end - md->phys_addr + 1) >> - EFI_PAGE_SHIFT; - /* latter part */ - new += old_memmap->desc_size; - memcpy(new, old, old_memmap->desc_size); - md = new; - md->phys_addr = m_end + 1; - md->num_pages = (end - md->phys_addr + 1) >> - EFI_PAGE_SHIFT; - } - - if ((start < m_start && m_start < end) && m_end < end) { - /* first part */ - md->num_pages = (m_start - md->phys_addr) >> - EFI_PAGE_SHIFT; - /* middle part */ - new += old_memmap->desc_size; - memcpy(new, old, old_memmap->desc_size); - md = new; - md->attribute |= m_attr; - md->phys_addr = m_start; - md->num_pages = (m_end - m_start + 1) >> - EFI_PAGE_SHIFT; - /* last part */ - new += old_memmap->desc_size; - memcpy(new, old, old_memmap->desc_size); - md = new; - md->phys_addr = m_end + 1; - md->num_pages = (end - m_end) >> - EFI_PAGE_SHIFT; - } - - if ((start < m_start && m_start < end) && - (end <= m_end)) { - /* first part */ - md->num_pages = (m_start - md->phys_addr) >> - EFI_PAGE_SHIFT; - /* latter part */ - new += old_memmap->desc_size; - memcpy(new, old, old_memmap->desc_size); - md = new; - md->phys_addr = m_start; - md->num_pages = (end - md->phys_addr + 1) >> - EFI_PAGE_SHIFT; - md->attribute |= m_attr; - } - } -} - void __init efi_fake_memmap(void) { struct efi_memory_map_data data; @@ -198,7 +67,7 @@ void __init efi_fake_memmap(void) for_each_efi_memory_desc(md) { struct range *r = &fake_mems[i].range; - new_nr_map += efi_fake_memmap_split_count(md, r); + new_nr_map += efi_memmap_split_count(md, r); } } @@ -217,7 +86,7 @@ void __init efi_fake_memmap(void) } for (i = 0; i < nr_fake_mem; i++) - efi_fake_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]); + efi_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]); /* swap into new EFI memmap */ early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map); @@ -265,7 +134,7 @@ static int __init setup_fake_mem(char *p) p++; } - sort(fake_mems, nr_fake_mem, sizeof(struct fake_mem), + sort(fake_mems, nr_fake_mem, sizeof(struct efi_mem_range), cmp_fake_mem, NULL); for (i = 0; i < nr_fake_mem; i++) diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c new file mode 100644 index 000000000000..2df7238eb44e --- /dev/null +++ b/drivers/firmware/efi/memmap.c @@ -0,0 +1,267 @@ +/* + * Common EFI memory map functions. + */ + +#define pr_fmt(fmt) "efi: " fmt + +#include +#include +#include +#include +#include + +/** + * __efi_memmap_init - Common code for mapping the EFI memory map + * @data: EFI memory map data + * @late: Use early or late mapping function? + * + * This function takes care of figuring out which function to use to + * map the EFI memory map in efi.memmap based on how far into the boot + * we are. + * + * During bootup @late should be %false since we only have access to + * the early_memremap*() functions as the vmalloc space isn't setup. + * Once the kernel is fully booted we can fallback to the more robust + * memremap*() API. + * + * Returns zero on success, a negative error code on failure. + */ +static int __init +__efi_memmap_init(struct efi_memory_map_data *data, bool late) +{ + struct efi_memory_map map; + phys_addr_t phys_map; + + if (efi_enabled(EFI_PARAVIRT)) + return 0; + + phys_map = data->phys_map; + + if (late) + map.map = memremap(phys_map, data->size, MEMREMAP_WB); + else + map.map = early_memremap(phys_map, data->size); + + if (!map.map) { + pr_err("Could not map the memory map!\n"); + return -ENOMEM; + } + + map.phys_map = data->phys_map; + map.nr_map = data->size / data->desc_size; + map.map_end = map.map + data->size; + + map.desc_version = data->desc_version; + map.desc_size = data->desc_size; + map.late = late; + + set_bit(EFI_MEMMAP, &efi.flags); + + efi.memmap = map; + + return 0; +} + +/** + * efi_memmap_init_early - Map the EFI memory map data structure + * @data: EFI memory map data + * + * Use early_memremap() to map the passed in EFI memory map and assign + * it to efi.memmap. + */ +int __init efi_memmap_init_early(struct efi_memory_map_data *data) +{ + /* Cannot go backwards */ + WARN_ON(efi.memmap.late); + + return __efi_memmap_init(data, false); +} + +void __init efi_memmap_unmap(void) +{ + if (!efi.memmap.late) { + unsigned long size; + + size = efi.memmap.desc_size * efi.memmap.nr_map; + early_memunmap(efi.memmap.map, size); + } else { + memunmap(efi.memmap.map); + } + + efi.memmap.map = NULL; + clear_bit(EFI_MEMMAP, &efi.flags); +} + +/** + * efi_memmap_init_late - Map efi.memmap with memremap() + * @phys_addr: Physical address of the new EFI memory map + * @size: Size in bytes of the new EFI memory map + * + * Setup a mapping of the EFI memory map using ioremap_cache(). This + * function should only be called once the vmalloc space has been + * setup and is therefore not suitable for calling during early EFI + * initialise, e.g. in efi_init(). Additionally, it expects + * efi_memmap_init_early() to have already been called. + * + * The reason there are two EFI memmap initialisation + * (efi_memmap_init_early() and this late version) is because the + * early EFI memmap should be explicitly unmapped once EFI + * initialisation is complete as the fixmap space used to map the EFI + * memmap (via early_memremap()) is a scarce resource. + * + * This late mapping is intended to persist for the duration of + * runtime so that things like efi_mem_desc_lookup() and + * efi_mem_attributes() always work. + * + * Returns zero on success, a negative error code on failure. + */ +int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) +{ + struct efi_memory_map_data data = { + .phys_map = addr, + .size = size, + }; + + /* Did we forget to unmap the early EFI memmap? */ + WARN_ON(efi.memmap.map); + + /* Were we already called? */ + WARN_ON(efi.memmap.late); + + /* + * It makes no sense to allow callers to register different + * values for the following fields. Copy them out of the + * existing early EFI memmap. + */ + data.desc_version = efi.memmap.desc_version; + data.desc_size = efi.memmap.desc_size; + + return __efi_memmap_init(&data, true); +} + +/** + * efi_memmap_split_count - Count number of additional EFI memmap entries + * @md: EFI memory descriptor to split + * @range: Address range (start, end) to split around + * + * Returns the number of additional EFI memmap entries required to + * accomodate @range. + */ +int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range) +{ + u64 m_start, m_end; + u64 start, end; + int count = 0; + + start = md->phys_addr; + end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + /* modifying range */ + m_start = range->start; + m_end = range->end; + + if (m_start <= start) { + /* split into 2 parts */ + if (start < m_end && m_end < end) + count++; + } + + if (start < m_start && m_start < end) { + /* split into 3 parts */ + if (m_end < end) + count += 2; + /* split into 2 parts */ + if (end <= m_end) + count++; + } + + return count; +} + +/** + * efi_memmap_insert - Insert a memory region in an EFI memmap + * @old_memmap: The existing EFI memory map structure + * @buf: Address of buffer to store new map + * @mem: Memory map entry to insert + * + * It is suggested that you call efi_memmap_split_count() first + * to see how large @buf needs to be. + */ +void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf, + struct efi_mem_range *mem) +{ + u64 m_start, m_end, m_attr; + efi_memory_desc_t *md; + u64 start, end; + void *old, *new; + + /* modifying range */ + m_start = mem->range.start; + m_end = mem->range.end; + m_attr = mem->attribute; + + for (old = old_memmap->map, new = buf; + old < old_memmap->map_end; + old += old_memmap->desc_size, new += old_memmap->desc_size) { + + /* copy original EFI memory descriptor */ + memcpy(new, old, old_memmap->desc_size); + md = new; + start = md->phys_addr; + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; + + if (m_start <= start && end <= m_end) + md->attribute |= m_attr; + + if (m_start <= start && + (start < m_end && m_end < end)) { + /* first part */ + md->attribute |= m_attr; + md->num_pages = (m_end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + /* latter part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_end + 1; + md->num_pages = (end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + } + + if ((start < m_start && m_start < end) && m_end < end) { + /* first part */ + md->num_pages = (m_start - md->phys_addr) >> + EFI_PAGE_SHIFT; + /* middle part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->attribute |= m_attr; + md->phys_addr = m_start; + md->num_pages = (m_end - m_start + 1) >> + EFI_PAGE_SHIFT; + /* last part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_end + 1; + md->num_pages = (end - m_end) >> + EFI_PAGE_SHIFT; + } + + if ((start < m_start && m_start < end) && + (end <= m_end)) { + /* first part */ + md->num_pages = (m_start - md->phys_addr) >> + EFI_PAGE_SHIFT; + /* latter part */ + new += old_memmap->desc_size; + memcpy(new, old, old_memmap->desc_size); + md = new; + md->phys_addr = m_start; + md->num_pages = (end - md->phys_addr + 1) >> + EFI_PAGE_SHIFT; + md->attribute |= m_attr; + } + } +} diff --git a/include/linux/efi.h b/include/linux/efi.h index f149676b2fcd..84c8638c7a8b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -691,6 +692,11 @@ struct efi_memory_map { bool late; }; +struct efi_mem_range { + struct range range; + u64 attribute; +}; + struct efi_fdt_params { u64 system_table; u64 mmap; @@ -917,6 +923,10 @@ extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); +extern int __init efi_memmap_split_count(efi_memory_desc_t *md, + struct range *range); +extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, + void *buf, struct efi_mem_range *mem); extern int efi_config_init(efi_config_table_type_t *arch_tables); #ifdef CONFIG_EFI_ESRT From c45f4da33a297f85435f8dccb26a24852ea01bb9 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 22 Jun 2016 16:54:00 +0100 Subject: [PATCH 07/29] efi: Add efi_memmap_install() for installing new EFI memory maps While efi_memmap_init_{early,late}() exist for architecture code to install memory maps from firmware data and for the virtual memory regions respectively, drivers don't care which stage of the boot we're at and just want to swap the existing memmap for a modified one. efi_memmap_install() abstracts the details of how the new memory map should be mapped and the existing one unmapped. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Cc: Taku Izumi Signed-off-by: Matt Fleming --- drivers/firmware/efi/fake_mem.c | 8 +------- drivers/firmware/efi/memmap.c | 25 +++++++++++++++++++++++++ include/linux/efi.h | 1 + 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c index 0054730f9bae..520a40e5e0e4 100644 --- a/drivers/firmware/efi/fake_mem.c +++ b/drivers/firmware/efi/fake_mem.c @@ -52,7 +52,6 @@ static int __init cmp_fake_mem(const void *x1, const void *x2) void __init efi_fake_memmap(void) { - struct efi_memory_map_data data; int new_nr_map = efi.memmap.nr_map; efi_memory_desc_t *md; phys_addr_t new_memmap_phy; @@ -90,13 +89,8 @@ void __init efi_fake_memmap(void) /* swap into new EFI memmap */ early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map); - efi_memmap_unmap(); - data.phys_map = new_memmap_phy; - data.size = efi.memmap.desc_size * new_nr_map; - data.desc_version = efi.memmap.desc_version; - data.desc_size = efi.memmap.desc_size; - efi_memmap_init_early(&data); + efi_memmap_install(new_memmap_phy, new_nr_map); /* print new EFI memmap */ efi_print_memmap(); diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 2df7238eb44e..cd96086fd851 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -139,6 +139,31 @@ int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size) return __efi_memmap_init(&data, true); } +/** + * efi_memmap_install - Install a new EFI memory map in efi.memmap + * @addr: Physical address of the memory map + * @nr_map: Number of entries in the memory map + * + * Unlike efi_memmap_init_*(), this function does not allow the caller + * to switch from early to late mappings. It simply uses the existing + * mapping function and installs the new memmap. + * + * Returns zero on success, a negative error code on failure. + */ +int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map) +{ + struct efi_memory_map_data data; + + efi_memmap_unmap(); + + data.phys_map = addr; + data.size = efi.memmap.desc_size * nr_map; + data.desc_version = efi.memmap.desc_version; + data.desc_size = efi.memmap.desc_size; + + return __efi_memmap_init(&data, efi.memmap.late); +} + /** * efi_memmap_split_count - Count number of additional EFI memmap entries * @md: EFI memory descriptor to split diff --git a/include/linux/efi.h b/include/linux/efi.h index 84c8638c7a8b..987c18f6fcae 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -923,6 +923,7 @@ extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); +extern int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map); extern int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range); extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, From 816e76129ed5fadd28e526c43397c79775194b5c Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 29 Feb 2016 21:22:52 +0000 Subject: [PATCH 08/29] efi: Allow drivers to reserve boot services forever Today, it is not possible for drivers to reserve EFI boot services for access after efi_free_boot_services() has been called on x86. For ARM/arm64 it can be done simply by calling memblock_reserve(). Having this ability for all three architectures is desirable for a couple of reasons, 1) It saves drivers copying data out of those regions 2) kexec reboot can now make use of things like ESRT Instead of using the standard memblock_reserve() which is insufficient to reserve the region on x86 (see efi_reserve_boot_services()), a new API is introduced in this patch; efi_mem_reserve(). efi.memmap now always represents which EFI memory regions are available. On x86 the EFI boot services regions that have not been reserved via efi_mem_reserve() will be removed from efi.memmap during efi_free_boot_services(). This has implications for kexec, since it is not possible for a newly kexec'd kernel to access the same boot services regions that the initial boot kernel had access to unless they are reserved by every kexec kernel in the chain. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- arch/x86/platform/efi/quirks.c | 121 ++++++++++++++++++++++++++++++--- drivers/firmware/efi/efi.c | 30 ++++++++ include/linux/efi.h | 1 + 3 files changed, 141 insertions(+), 11 deletions(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 9faf18874692..f14b7a9da24b 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -163,6 +163,71 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, } EXPORT_SYMBOL_GPL(efi_query_variable_store); +/* + * The UEFI specification makes it clear that the operating system is + * free to do whatever it wants with boot services code after + * ExitBootServices() has been called. Ignoring this recommendation a + * significant bunch of EFI implementations continue calling into boot + * services code (SetVirtualAddressMap). In order to work around such + * buggy implementations we reserve boot services region during EFI + * init and make sure it stays executable. Then, after + * SetVirtualAddressMap(), it is discarded. + * + * However, some boot services regions contain data that is required + * by drivers, so we need to track which memory ranges can never be + * freed. This is done by tagging those regions with the + * EFI_MEMORY_RUNTIME attribute. + * + * Any driver that wants to mark a region as reserved must use + * efi_mem_reserve() which will insert a new EFI memory descriptor + * into efi.memmap (splitting existing regions if necessary) and tag + * it with EFI_MEMORY_RUNTIME. + */ +void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) +{ + phys_addr_t new_phys, new_size; + struct efi_mem_range mr; + efi_memory_desc_t md; + int num_entries; + void *new; + + if (efi_mem_desc_lookup(addr, &md)) { + pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); + return; + } + + if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { + pr_err("Region spans EFI memory descriptors, %pa\n", &addr); + return; + } + + mr.range.start = addr; + mr.range.end = addr + size; + mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; + + num_entries = efi_memmap_split_count(&md, &mr.range); + num_entries += efi.memmap.nr_map; + + new_size = efi.memmap.desc_size * num_entries; + + new_phys = memblock_alloc(new_size, 0); + if (!new_phys) { + pr_err("Could not allocate boot services memmap\n"); + return; + } + + new = early_memremap(new_phys, new_size); + if (!new) { + pr_err("Failed to map new boot services memmap\n"); + return; + } + + efi_memmap_insert(&efi.memmap, new, &mr); + early_memunmap(new, new_size); + + efi_memmap_install(new_phys, num_entries); +} + /* * Helper function for efi_reserve_boot_services() to figure out if we * can free regions in efi_free_boot_services(). @@ -184,15 +249,6 @@ static bool can_free_region(u64 start, u64 size) return true; } -/* - * The UEFI specification makes it clear that the operating system is free to do - * whatever it wants with boot services code after ExitBootServices() has been - * called. Ignoring this recommendation a significant bunch of EFI implementations - * continue calling into boot services code (SetVirtualAddressMap). In order to - * work around such buggy implementations we reserve boot services region during - * EFI init and make sure it stays executable. Then, after SetVirtualAddressMap(), it -* is discarded. -*/ void __init efi_reserve_boot_services(void) { efi_memory_desc_t *md; @@ -249,7 +305,10 @@ void __init efi_reserve_boot_services(void) void __init efi_free_boot_services(void) { + phys_addr_t new_phys, new_size; efi_memory_desc_t *md; + int num_entries = 0; + void *new, *new_md; for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; @@ -257,12 +316,16 @@ void __init efi_free_boot_services(void) size_t rm_size; if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) + md->type != EFI_BOOT_SERVICES_DATA) { + num_entries++; continue; + } /* Do not free, someone else owns it: */ - if (md->attribute & EFI_MEMORY_RUNTIME) + if (md->attribute & EFI_MEMORY_RUNTIME) { + num_entries++; continue; + } /* * Nasty quirk: if all sub-1MB memory is used for boot @@ -286,6 +349,42 @@ void __init efi_free_boot_services(void) free_bootmem_late(start, size); } + + new_size = efi.memmap.desc_size * num_entries; + new_phys = memblock_alloc(new_size, 0); + if (!new_phys) { + pr_err("Failed to allocate new EFI memmap\n"); + return; + } + + new = memremap(new_phys, new_size, MEMREMAP_WB); + if (!new) { + pr_err("Failed to map new EFI memmap\n"); + return; + } + + /* + * Build a new EFI memmap that excludes any boot services + * regions that are not tagged EFI_MEMORY_RUNTIME, since those + * regions have now been freed. + */ + new_md = new; + for_each_efi_memory_desc(md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + (md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA)) + continue; + + memcpy(new_md, md, efi.memmap.desc_size); + new_md += efi.memmap.desc_size; + } + + memunmap(new); + + if (efi_memmap_install(new_phys, num_entries)) { + pr_err("Could not install new EFI memmap\n"); + return; + } } /* diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index d4886fd50c16..dfe07316cae5 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -396,6 +397,35 @@ u64 __init efi_mem_desc_end(efi_memory_desc_t *md) return end; } +void __init __weak efi_arch_mem_reserve(phys_addr_t addr, u64 size) {} + +/** + * efi_mem_reserve - Reserve an EFI memory region + * @addr: Physical address to reserve + * @size: Size of reservation + * + * Mark a region as reserved from general kernel allocation and + * prevent it being released by efi_free_boot_services(). + * + * This function should be called drivers once they've parsed EFI + * configuration tables to figure out where their data lives, e.g. + * efi_esrt_init(). + */ +void __init efi_mem_reserve(phys_addr_t addr, u64 size) +{ + if (!memblock_is_region_reserved(addr, size)) + memblock_reserve(addr, size); + + /* + * Some architectures (x86) reserve all boot services ranges + * until efi_free_boot_services() because of buggy firmware + * implementations. This means the above memblock_reserve() is + * superfluous on x86 and instead what it needs to do is + * ensure the @start, @size is not freed. + */ + efi_arch_mem_reserve(addr, size); +} + static __initdata efi_config_table_type_t common_tables[] = { {ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20}, {ACPI_TABLE_GUID, "ACPI", &efi.acpi}, diff --git a/include/linux/efi.h b/include/linux/efi.h index 987c18f6fcae..3fe4f3c47834 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -944,6 +944,7 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); extern u64 efi_mem_desc_end(efi_memory_desc_t *md); extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); +extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern void efi_reserve_boot_services(void); From 31ce8cc68180803aa481c0c1daac29d8eaceca9d Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 1 Mar 2016 23:02:56 +0000 Subject: [PATCH 09/29] efi/runtime-map: Use efi.memmap directly instead of a copy Now that efi.memmap is available all of the time there's no need to allocate and build a separate copy of the EFI memory map. Furthermore, efi.memmap contains boot services regions but only those regions that have been reserved via efi_mem_reserve(). Using efi.memmap allows us to pass boot services across kexec reboot so that the ESRT and BGRT drivers will now work. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 40 ------------------------------ drivers/firmware/efi/runtime-map.c | 35 ++++++++++---------------- include/linux/efi.h | 4 --- 3 files changed, 13 insertions(+), 66 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 33996987ac70..342cebd1e17c 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -592,42 +592,6 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) } } -static void __init save_runtime_map(void) -{ -#ifdef CONFIG_KEXEC_CORE - unsigned long desc_size; - efi_memory_desc_t *md; - void *tmp, *q = NULL; - int count = 0; - - if (efi_enabled(EFI_OLD_MEMMAP)) - return; - - desc_size = efi.memmap.desc_size; - - for_each_efi_memory_desc(md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME) || - (md->type == EFI_BOOT_SERVICES_CODE) || - (md->type == EFI_BOOT_SERVICES_DATA)) - continue; - tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL); - if (!tmp) - goto out; - q = tmp; - - memcpy(q + count * desc_size, md, desc_size); - count++; - } - - efi_runtime_map_setup(q, count, desc_size); - return; - -out: - kfree(q); - pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); -#endif -} - static void *realloc_pages(void *old_memmap, int old_shift) { void *ret; @@ -840,8 +804,6 @@ static void __init kexec_enter_virtual_mode(void) return; } - save_runtime_map(); - BUG_ON(!efi.systab); num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE); @@ -934,8 +896,6 @@ static void __init __efi_enter_virtual_mode(void) return; } - save_runtime_map(); - BUG_ON(!efi.systab); if (efi_setup_page_tables(pa, 1 << pg_shift)) { diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c index 5c55227a34c8..8e64b77aeac9 100644 --- a/drivers/firmware/efi/runtime-map.c +++ b/drivers/firmware/efi/runtime-map.c @@ -14,10 +14,6 @@ #include -static void *efi_runtime_map; -static int nr_efi_runtime_map; -static u32 efi_memdesc_size; - struct efi_runtime_map_entry { efi_memory_desc_t md; struct kobject kobj; /* kobject for each entry */ @@ -106,7 +102,8 @@ static struct kobj_type __refdata map_ktype = { static struct kset *map_kset; static struct efi_runtime_map_entry * -add_sysfs_runtime_map_entry(struct kobject *kobj, int nr) +add_sysfs_runtime_map_entry(struct kobject *kobj, int nr, + efi_memory_desc_t *md) { int ret; struct efi_runtime_map_entry *entry; @@ -124,8 +121,7 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr) return ERR_PTR(-ENOMEM); } - memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size, - sizeof(efi_memory_desc_t)); + memcpy(&entry->md, md, sizeof(efi_memory_desc_t)); kobject_init(&entry->kobj, &map_ktype); entry->kobj.kset = map_kset; @@ -142,12 +138,12 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr) int efi_get_runtime_map_size(void) { - return nr_efi_runtime_map * efi_memdesc_size; + return efi.memmap.nr_map * efi.memmap.desc_size; } int efi_get_runtime_map_desc_size(void) { - return efi_memdesc_size; + return efi.memmap.desc_size; } int efi_runtime_map_copy(void *buf, size_t bufsz) @@ -157,38 +153,33 @@ int efi_runtime_map_copy(void *buf, size_t bufsz) if (sz > bufsz) sz = bufsz; - memcpy(buf, efi_runtime_map, sz); + memcpy(buf, efi.memmap.map, sz); return 0; } -void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) -{ - efi_runtime_map = map; - nr_efi_runtime_map = nr_entries; - efi_memdesc_size = desc_size; -} - int __init efi_runtime_map_init(struct kobject *efi_kobj) { int i, j, ret = 0; struct efi_runtime_map_entry *entry; + efi_memory_desc_t *md; - if (!efi_runtime_map) + if (!efi_enabled(EFI_MEMMAP)) return 0; - map_entries = kzalloc(nr_efi_runtime_map * sizeof(entry), GFP_KERNEL); + map_entries = kzalloc(efi.memmap.nr_map * sizeof(entry), GFP_KERNEL); if (!map_entries) { ret = -ENOMEM; goto out; } - for (i = 0; i < nr_efi_runtime_map; i++) { - entry = add_sysfs_runtime_map_entry(efi_kobj, i); + i = 0; + for_each_efi_memory_desc(md) { + entry = add_sysfs_runtime_map_entry(efi_kobj, i, md); if (IS_ERR(entry)) { ret = PTR_ERR(entry); goto out_add_entry; } - *(map_entries + i) = entry; + *(map_entries + i++) = entry; } return 0; diff --git a/include/linux/efi.h b/include/linux/efi.h index 3fe4f3c47834..d8b555db81c7 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1357,7 +1357,6 @@ extern int efi_capsule_update(efi_capsule_header_t *capsule, #ifdef CONFIG_EFI_RUNTIME_MAP int efi_runtime_map_init(struct kobject *); -void efi_runtime_map_setup(void *, int, u32); int efi_get_runtime_map_size(void); int efi_get_runtime_map_desc_size(void); int efi_runtime_map_copy(void *buf, size_t bufsz); @@ -1367,9 +1366,6 @@ static inline int efi_runtime_map_init(struct kobject *kobj) return 0; } -static inline void -efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {} - static inline int efi_get_runtime_map_size(void) { return 0; From 8e80632fb23f021ce5a6957f2edcdae4645a7030 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 1 Mar 2016 23:08:03 +0000 Subject: [PATCH 10/29] efi/esrt: Use efi_mem_reserve() and avoid a kmalloc() We can use the new efi_mem_reserve() API to mark the ESRT table as reserved forever and save ourselves the trouble of copying the data out into a kmalloc buffer. The added advantage is that now the ESRT driver will work across kexec reboot. Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Signed-off-by: Matt Fleming --- drivers/firmware/efi/esrt.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 75feb3f5829b..b93cd11f9bcc 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -235,7 +235,7 @@ static struct attribute_group esrt_attr_group = { }; /* - * remap the table, copy it to kmalloced pages, and unmap it. + * remap the table, validate it, mark it reserved and unmap it. */ void __init efi_esrt_init(void) { @@ -335,7 +335,7 @@ void __init efi_esrt_init(void) end = esrt_data + size; pr_info("Reserving ESRT space from %pa to %pa.\n", &esrt_data, &end); - memblock_reserve(esrt_data, esrt_data_size); + efi_mem_reserve(esrt_data, esrt_data_size); pr_debug("esrt-init: loaded.\n"); err_memunmap: @@ -382,28 +382,18 @@ static void cleanup_entry_list(void) static int __init esrt_sysfs_init(void) { int error; - struct efi_system_resource_table __iomem *ioesrt; pr_debug("esrt-sysfs: loading.\n"); if (!esrt_data || !esrt_data_size) return -ENOSYS; - ioesrt = ioremap(esrt_data, esrt_data_size); - if (!ioesrt) { + esrt = ioremap(esrt_data, esrt_data_size); + if (!esrt) { pr_err("ioremap(%pa, %zu) failed.\n", &esrt_data, esrt_data_size); return -ENOMEM; } - esrt = kmalloc(esrt_data_size, GFP_KERNEL); - if (!esrt) { - pr_err("kmalloc failed. (wanted %zu bytes)\n", esrt_data_size); - iounmap(ioesrt); - return -ENOMEM; - } - - memcpy_fromio(esrt, ioesrt, esrt_data_size); - esrt_kobj = kobject_create_and_add("esrt", efi_kobj); if (!esrt_kobj) { pr_err("Firmware table registration failed.\n"); @@ -429,8 +419,6 @@ static int __init esrt_sysfs_init(void) if (error) goto err_cleanup_list; - memblock_remove(esrt_data, esrt_data_size); - pr_debug("esrt-sysfs: loaded.\n"); return 0; From 4bc9f92e64c81192dcca1c495354bcc7c3b43e7d Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 23 Jun 2016 11:36:32 +0100 Subject: [PATCH 11/29] x86/efi-bgrt: Use efi_mem_reserve() to avoid copying image data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit efi_mem_reserve() allows us to permanently mark EFI boot services regions as reserved, which means we no longer need to copy the image data out and into a separate buffer. Leaving the data in the original boot services region has the added benefit that BGRT images can now be passed across kexec reboot. Reviewed-by: Josh Triplett Tested-by: Dave Young [kexec/kdump] Tested-by: Ard Biesheuvel [arm] Acked-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Peter Jones Cc: Borislav Petkov Cc: Mark Rutland Cc: Josh Boyer Cc: Andy Lutomirski Cc: Môshe van der Sterre Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi-bgrt.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 6a2f5691b1ab..6aad870e8962 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -82,21 +82,12 @@ void __init efi_bgrt_init(void) } bgrt_image_size = bmp_header.size; - bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); + bgrt_image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); if (!bgrt_image) { - pr_notice("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n", - bgrt_image_size); - return; - } - - image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); - if (!image) { pr_notice("Ignoring BGRT: failed to map image memory\n"); - kfree(bgrt_image); bgrt_image = NULL; return; } - memcpy(bgrt_image, image, bgrt_image_size); - memunmap(image); + efi_mem_reserve(bgrt_tab->image_address, bgrt_image_size); } From f58a37b2e01f91c23af457a7662f6b5a1e9f41e0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Jul 2016 21:00:45 +0200 Subject: [PATCH 12/29] efi/esrt: Use memremap not ioremap to access ESRT table in memory On ARM and arm64, ioremap() and memremap() are not interchangeable like on x86, and the use of ioremap() on ordinary RAM is typically flagged as an error if the memory region being mapped is also covered by the linear mapping, since that would lead to aliases with conflicting cacheability attributes. Since what we are dealing with is not an I/O region with side effects, using ioremap() here is arguably incorrect anyway, so let's replace it with memremap() instead. Acked-by: Peter Jones Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Mark Rutland Signed-off-by: Matt Fleming --- drivers/firmware/efi/esrt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index b93cd11f9bcc..14914074f716 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -387,9 +388,9 @@ static int __init esrt_sysfs_init(void) if (!esrt_data || !esrt_data_size) return -ENOSYS; - esrt = ioremap(esrt_data, esrt_data_size); + esrt = memremap(esrt_data, esrt_data_size, MEMREMAP_WB); if (!esrt) { - pr_err("ioremap(%pa, %zu) failed.\n", &esrt_data, + pr_err("memremap(%pa, %zu) failed.\n", &esrt_data, esrt_data_size); return -ENOMEM; } From 2ead3084e3fc37d42f379cca8753b458d8f9ba25 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 11 Jul 2016 21:00:46 +0200 Subject: [PATCH 13/29] efi/arm*: esrt: Add missing call to efi_esrt_init() ESRT support is built by default for all architectures that define CONFIG_EFI. However, this support was not wired up yet for ARM/arm64, since efi_esrt_init() was never called. So add the missing call. Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Mark Rutland Cc: Peter Jones Signed-off-by: Matt Fleming --- drivers/firmware/efi/arm-init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 5a2df3fefccc..e0a511d4074f 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -247,6 +247,7 @@ void __init efi_init(void) reserve_regions(); efi_memattr_init(); + efi_esrt_init(); efi_memmap_unmap(); memblock_reserve(params.mmap & PAGE_MASK, From 217b27d4671a0a3f34147f1b341683d36b7457db Mon Sep 17 00:00:00 2001 From: Sylvain Chouleur Date: Fri, 15 Jul 2016 21:36:29 +0200 Subject: [PATCH 14/29] efi: Use a file local lock for efivars This patch replaces the spinlock in the efivars struct with a single lock for the whole vars.c file. The goal of this lock is to protect concurrent calls to efi variable services, registering and unregistering. This allows us to register new efivars operations without having in-progress call. Signed-off-by: Sylvain Chouleur Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Mark Rutland Cc: Sylvain Chouleur Signed-off-by: Matt Fleming --- drivers/firmware/efi/vars.c | 83 +++++++++++++++++++++---------------- include/linux/efi.h | 6 --- 2 files changed, 47 insertions(+), 42 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index d3b751383286..d0d807e1287e 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -37,6 +37,14 @@ /* Private pointer to registered efivars */ static struct efivars *__efivars; +/* + * efivars_lock protects three things: + * 1) efivarfs_list and efivars_sysfs_list + * 2) ->ops calls + * 3) (un)registration of __efivars + */ +static DEFINE_SPINLOCK(efivars_lock); + static bool efivar_wq_enabled = true; DECLARE_WORK(efivar_work, NULL); EXPORT_SYMBOL_GPL(efivar_work); @@ -434,7 +442,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), return -ENOMEM; } - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); /* * Per EFI spec, the maximum storage allocated for both @@ -450,7 +458,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), switch (status) { case EFI_SUCCESS: if (duplicates) - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); variable_name_size = var_name_strnsize(variable_name, variable_name_size); @@ -477,7 +485,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), } if (duplicates) - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); break; case EFI_NOT_FOUND: @@ -491,7 +499,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), } while (status != EFI_NOT_FOUND); - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); kfree(variable_name); @@ -506,9 +514,9 @@ EXPORT_SYMBOL_GPL(efivar_init); */ void efivar_entry_add(struct efivar_entry *entry, struct list_head *head) { - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); list_add(&entry->list, head); - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); } EXPORT_SYMBOL_GPL(efivar_entry_add); @@ -518,9 +526,9 @@ EXPORT_SYMBOL_GPL(efivar_entry_add); */ void efivar_entry_remove(struct efivar_entry *entry) { - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); list_del(&entry->list); - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); } EXPORT_SYMBOL_GPL(efivar_entry_remove); @@ -537,10 +545,10 @@ EXPORT_SYMBOL_GPL(efivar_entry_remove); */ static void efivar_entry_list_del_unlock(struct efivar_entry *entry) { - lockdep_assert_held(&__efivars->lock); + lockdep_assert_held(&efivars_lock); list_del(&entry->list); - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); } /** @@ -563,7 +571,7 @@ int __efivar_entry_delete(struct efivar_entry *entry) const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - lockdep_assert_held(&__efivars->lock); + lockdep_assert_held(&efivars_lock); status = ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, @@ -589,12 +597,12 @@ int efivar_entry_delete(struct efivar_entry *entry) const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); status = ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, 0, 0, NULL); if (!(status == EFI_SUCCESS || status == EFI_NOT_FOUND)) { - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); return efi_status_to_err(status); } @@ -632,10 +640,10 @@ int efivar_entry_set(struct efivar_entry *entry, u32 attributes, efi_char16_t *name = entry->var.VariableName; efi_guid_t vendor = entry->var.VendorGuid; - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); if (head && efivar_entry_find(name, vendor, head, false)) { - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); return -EEXIST; } @@ -644,7 +652,7 @@ int efivar_entry_set(struct efivar_entry *entry, u32 attributes, status = ops->set_variable(name, &vendor, attributes, size, data); - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); return efi_status_to_err(status); @@ -658,7 +666,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_set); * from crash/panic handlers. * * Crucially, this function will not block if it cannot acquire - * __efivars->lock. Instead, it returns -EBUSY. + * efivars_lock. Instead, it returns -EBUSY. */ static int efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, @@ -668,20 +676,20 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, unsigned long flags; efi_status_t status; - if (!spin_trylock_irqsave(&__efivars->lock, flags)) + if (!spin_trylock_irqsave(&efivars_lock, flags)) return -EBUSY; status = check_var_size_nonblocking(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { - spin_unlock_irqrestore(&__efivars->lock, flags); + spin_unlock_irqrestore(&efivars_lock, flags); return -ENOSPC; } status = ops->set_variable_nonblocking(name, &vendor, attributes, size, data); - spin_unlock_irqrestore(&__efivars->lock, flags); + spin_unlock_irqrestore(&efivars_lock, flags); return efi_status_to_err(status); } @@ -727,21 +735,21 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, size, data); if (!block) { - if (!spin_trylock_irqsave(&__efivars->lock, flags)) + if (!spin_trylock_irqsave(&efivars_lock, flags)) return -EBUSY; } else { - spin_lock_irqsave(&__efivars->lock, flags); + spin_lock_irqsave(&efivars_lock, flags); } status = check_var_size(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { - spin_unlock_irqrestore(&__efivars->lock, flags); + spin_unlock_irqrestore(&efivars_lock, flags); return -ENOSPC; } status = ops->set_variable(name, &vendor, attributes, size, data); - spin_unlock_irqrestore(&__efivars->lock, flags); + spin_unlock_irqrestore(&efivars_lock, flags); return efi_status_to_err(status); } @@ -771,7 +779,7 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, int strsize1, strsize2; bool found = false; - lockdep_assert_held(&__efivars->lock); + lockdep_assert_held(&efivars_lock); list_for_each_entry_safe(entry, n, head, list) { strsize1 = ucs2_strsize(name, 1024); @@ -814,10 +822,10 @@ int efivar_entry_size(struct efivar_entry *entry, unsigned long *size) *size = 0; - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, NULL, size, NULL); - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); if (status != EFI_BUFFER_TOO_SMALL) return efi_status_to_err(status); @@ -843,7 +851,7 @@ int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - lockdep_assert_held(&__efivars->lock); + lockdep_assert_held(&efivars_lock); status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, @@ -866,11 +874,11 @@ int efivar_entry_get(struct efivar_entry *entry, u32 *attributes, const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, attributes, size, data); - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); return efi_status_to_err(status); } @@ -917,7 +925,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, * set_variable call, and removal of the variable from the efivars * list (in the case of an authenticated delete). */ - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); /* * Ensure that the available space hasn't shrunk below the safe level @@ -957,7 +965,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, if (status == EFI_NOT_FOUND) efivar_entry_list_del_unlock(entry); else - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); if (status && status != EFI_BUFFER_TOO_SMALL) return efi_status_to_err(status); @@ -965,7 +973,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, return 0; out: - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); return err; } @@ -980,7 +988,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_set_get_size); */ void efivar_entry_iter_begin(void) { - spin_lock_irq(&__efivars->lock); + spin_lock_irq(&efivars_lock); } EXPORT_SYMBOL_GPL(efivar_entry_iter_begin); @@ -991,7 +999,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_iter_begin); */ void efivar_entry_iter_end(void) { - spin_unlock_irq(&__efivars->lock); + spin_unlock_irq(&efivars_lock); } EXPORT_SYMBOL_GPL(efivar_entry_iter_end); @@ -1112,11 +1120,12 @@ int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject) { - spin_lock_init(&efivars->lock); + spin_lock_irq(&efivars_lock); efivars->ops = ops; efivars->kobject = kobject; __efivars = efivars; + spin_unlock_irq(&efivars_lock); return 0; } @@ -1133,6 +1142,7 @@ int efivars_unregister(struct efivars *efivars) { int rv; + spin_lock_irq(&efivars_lock); if (!__efivars) { printk(KERN_ERR "efivars not registered\n"); rv = -EINVAL; @@ -1148,6 +1158,7 @@ int efivars_unregister(struct efivars *efivars) rv = 0; out: + spin_unlock_irq(&efivars_lock); return rv; } EXPORT_SYMBOL_GPL(efivars_unregister); diff --git a/include/linux/efi.h b/include/linux/efi.h index d8b555db81c7..deecb2902715 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1157,12 +1157,6 @@ struct efivar_operations { }; struct efivars { - /* - * ->lock protects two things: - * 1) efivarfs_list and efivars_sysfs_list - * 2) ->ops calls - */ - spinlock_t lock; struct kset *kset; struct kobject *kobject; const struct efivar_operations *ops; From 21b3ddd39feecd2f4d6c52bcd30f0a4fa14f125a Mon Sep 17 00:00:00 2001 From: Sylvain Chouleur Date: Fri, 15 Jul 2016 21:36:30 +0200 Subject: [PATCH 15/29] efi: Don't use spinlocks for efi vars All efivars operations are protected by a spinlock which prevents interruptions and preemption. This is too restricted, we just need a lock preventing concurrency. The idea is to use a semaphore of count 1 and to have two ways of locking, depending on the context: - In interrupt context, we call down_trylock(), if it fails we return an error - In normal context, we call down_interruptible() We don't use a mutex here because the mutex_trylock() function must not be called from interrupt context, whereas the down_trylock() can. Signed-off-by: Sylvain Chouleur Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Mark Rutland Cc: Sylvain Chouleur Signed-off-by: Matt Fleming --- drivers/firmware/efi/efi-pstore.c | 36 ++++++-- drivers/firmware/efi/efivars.c | 22 ++++- drivers/firmware/efi/vars.c | 137 +++++++++++++++++------------- fs/efivarfs/inode.c | 5 +- fs/efivarfs/super.c | 9 +- include/linux/efi.h | 6 +- 6 files changed, 139 insertions(+), 76 deletions(-) diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 30a24d09ea6c..1c33d7469e4a 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -125,16 +125,19 @@ static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos, * @entry: deleting entry * @turn_off_scanning: Check if a scanning flag should be turned off */ -static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry, +static inline int __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry, bool turn_off_scanning) { if (entry->deleting) { list_del(&entry->list); efivar_entry_iter_end(); efivar_unregister(entry); - efivar_entry_iter_begin(); + if (efivar_entry_iter_begin()) + return -EINTR; } else if (turn_off_scanning) entry->scanning = false; + + return 0; } /** @@ -144,13 +147,18 @@ static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry, * @head: list head * @stop: a flag checking if scanning will stop */ -static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos, +static int efi_pstore_scan_sysfs_exit(struct efivar_entry *pos, struct efivar_entry *next, struct list_head *head, bool stop) { - __efi_pstore_scan_sysfs_exit(pos, true); + int ret = __efi_pstore_scan_sysfs_exit(pos, true); + + if (ret) + return ret; + if (stop) - __efi_pstore_scan_sysfs_exit(next, &next->list != head); + ret = __efi_pstore_scan_sysfs_exit(next, &next->list != head); + return ret; } /** @@ -172,13 +180,17 @@ static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos) struct efivar_entry *entry, *n; struct list_head *head = &efivar_sysfs_list; int size = 0; + int ret; if (!*pos) { list_for_each_entry_safe(entry, n, head, list) { efi_pstore_scan_sysfs_enter(entry, n, head); size = efi_pstore_read_func(entry, data); - efi_pstore_scan_sysfs_exit(entry, n, head, size < 0); + ret = efi_pstore_scan_sysfs_exit(entry, n, head, + size < 0); + if (ret) + return ret; if (size) break; } @@ -190,7 +202,9 @@ static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos) efi_pstore_scan_sysfs_enter((*pos), n, head); size = efi_pstore_read_func((*pos), data); - efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0); + ret = efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0); + if (ret) + return ret; if (size) break; } @@ -232,7 +246,10 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, if (!*data.buf) return -ENOMEM; - efivar_entry_iter_begin(); + if (efivar_entry_iter_begin()) { + kfree(*data.buf); + return -EINTR; + } size = efi_pstore_sysfs_entry_iter(&data, (struct efivar_entry **)&psi->data); efivar_entry_iter_end(); @@ -347,7 +364,8 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count, edata.time = time; edata.name = efi_name; - efivar_entry_iter_begin(); + if (efivar_entry_iter_begin()) + return -EINTR; found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry); if (found && !entry->scanning) { diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 116b244dee68..3e626fd9bd4e 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -510,7 +510,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, vendor = del_var->VendorGuid; } - efivar_entry_iter_begin(); + if (efivar_entry_iter_begin()) + return -EINTR; entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true); if (!entry) err = -EINVAL; @@ -575,7 +576,10 @@ efivar_create_sysfs_entry(struct efivar_entry *new_var) return ret; kobject_uevent(&new_var->kobj, KOBJ_ADD); - efivar_entry_add(new_var, &efivar_sysfs_list); + if (efivar_entry_add(new_var, &efivar_sysfs_list)) { + efivar_unregister(new_var); + return -EINTR; + } return 0; } @@ -690,7 +694,10 @@ static int efivars_sysfs_callback(efi_char16_t *name, efi_guid_t vendor, static int efivar_sysfs_destroy(struct efivar_entry *entry, void *data) { - efivar_entry_remove(entry); + int err = efivar_entry_remove(entry); + + if (err) + return err; efivar_unregister(entry); return 0; } @@ -698,7 +705,14 @@ static int efivar_sysfs_destroy(struct efivar_entry *entry, void *data) static void efivars_sysfs_exit(void) { /* Remove all entries and destroy */ - __efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list, NULL, NULL); + int err; + + err = __efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list, + NULL, NULL); + if (err) { + pr_err("efivars: Failed to destroy sysfs entries\n"); + return; + } if (efivars_new_var) sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var); diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index d0d807e1287e..9336ffdf6e2c 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -43,7 +43,7 @@ static struct efivars *__efivars; * 2) ->ops calls * 3) (un)registration of __efivars */ -static DEFINE_SPINLOCK(efivars_lock); +static DEFINE_SEMAPHORE(efivars_lock); static bool efivar_wq_enabled = true; DECLARE_WORK(efivar_work, NULL); @@ -442,7 +442,10 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), return -ENOMEM; } - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) { + err = -EINTR; + goto free; + } /* * Per EFI spec, the maximum storage allocated for both @@ -458,7 +461,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), switch (status) { case EFI_SUCCESS: if (duplicates) - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); variable_name_size = var_name_strnsize(variable_name, variable_name_size); @@ -484,8 +487,12 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), status = EFI_NOT_FOUND; } - if (duplicates) - spin_lock_irq(&efivars_lock); + if (duplicates) { + if (down_interruptible(&efivars_lock)) { + err = -EINTR; + goto free; + } + } break; case EFI_NOT_FOUND: @@ -499,8 +506,8 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), } while (status != EFI_NOT_FOUND); - spin_unlock_irq(&efivars_lock); - + up(&efivars_lock); +free: kfree(variable_name); return err; @@ -511,24 +518,34 @@ EXPORT_SYMBOL_GPL(efivar_init); * efivar_entry_add - add entry to variable list * @entry: entry to add to list * @head: list head + * + * Returns 0 on success, or a kernel error code on failure. */ -void efivar_entry_add(struct efivar_entry *entry, struct list_head *head) +int efivar_entry_add(struct efivar_entry *entry, struct list_head *head) { - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) + return -EINTR; list_add(&entry->list, head); - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); + + return 0; } EXPORT_SYMBOL_GPL(efivar_entry_add); /** * efivar_entry_remove - remove entry from variable list * @entry: entry to remove from list + * + * Returns 0 on success, or a kernel error code on failure. */ -void efivar_entry_remove(struct efivar_entry *entry) +int efivar_entry_remove(struct efivar_entry *entry) { - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) + return -EINTR; list_del(&entry->list); - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); + + return 0; } EXPORT_SYMBOL_GPL(efivar_entry_remove); @@ -545,10 +562,8 @@ EXPORT_SYMBOL_GPL(efivar_entry_remove); */ static void efivar_entry_list_del_unlock(struct efivar_entry *entry) { - lockdep_assert_held(&efivars_lock); - list_del(&entry->list); - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); } /** @@ -571,8 +586,6 @@ int __efivar_entry_delete(struct efivar_entry *entry) const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - lockdep_assert_held(&efivars_lock); - status = ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, 0, 0, NULL); @@ -589,20 +602,22 @@ EXPORT_SYMBOL_GPL(__efivar_entry_delete); * variable list. It is the caller's responsibility to free @entry * once we return. * - * Returns 0 on success, or a converted EFI status code if - * set_variable() fails. + * Returns 0 on success, -EINTR if we can't grab the semaphore, + * converted EFI status code if set_variable() fails. */ int efivar_entry_delete(struct efivar_entry *entry) { const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) + return -EINTR; + status = ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, 0, 0, NULL); if (!(status == EFI_SUCCESS || status == EFI_NOT_FOUND)) { - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); return efi_status_to_err(status); } @@ -628,9 +643,9 @@ EXPORT_SYMBOL_GPL(efivar_entry_delete); * If @head is not NULL a lookup is performed to determine whether * the entry is already on the list. * - * Returns 0 on success, -EEXIST if a lookup is performed and the entry - * already exists on the list, or a converted EFI status code if - * set_variable() fails. + * Returns 0 on success, -EINTR if we can't grab the semaphore, + * -EEXIST if a lookup is performed and the entry already exists on + * the list, or a converted EFI status code if set_variable() fails. */ int efivar_entry_set(struct efivar_entry *entry, u32 attributes, unsigned long size, void *data, struct list_head *head) @@ -640,10 +655,10 @@ int efivar_entry_set(struct efivar_entry *entry, u32 attributes, efi_char16_t *name = entry->var.VariableName; efi_guid_t vendor = entry->var.VendorGuid; - spin_lock_irq(&efivars_lock); - + if (down_interruptible(&efivars_lock)) + return -EINTR; if (head && efivar_entry_find(name, vendor, head, false)) { - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); return -EEXIST; } @@ -652,7 +667,7 @@ int efivar_entry_set(struct efivar_entry *entry, u32 attributes, status = ops->set_variable(name, &vendor, attributes, size, data); - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); return efi_status_to_err(status); @@ -673,23 +688,22 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor, u32 attributes, unsigned long size, void *data) { const struct efivar_operations *ops = __efivars->ops; - unsigned long flags; efi_status_t status; - if (!spin_trylock_irqsave(&efivars_lock, flags)) + if (down_trylock(&efivars_lock)) return -EBUSY; status = check_var_size_nonblocking(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { - spin_unlock_irqrestore(&efivars_lock, flags); + up(&efivars_lock); return -ENOSPC; } status = ops->set_variable_nonblocking(name, &vendor, attributes, size, data); - spin_unlock_irqrestore(&efivars_lock, flags); + up(&efivars_lock); return efi_status_to_err(status); } @@ -714,7 +728,6 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, bool block, unsigned long size, void *data) { const struct efivar_operations *ops = __efivars->ops; - unsigned long flags; efi_status_t status; if (!ops->query_variable_store) @@ -735,21 +748,22 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, size, data); if (!block) { - if (!spin_trylock_irqsave(&efivars_lock, flags)) + if (down_trylock(&efivars_lock)) return -EBUSY; } else { - spin_lock_irqsave(&efivars_lock, flags); + if (down_interruptible(&efivars_lock)) + return -EINTR; } status = check_var_size(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { - spin_unlock_irqrestore(&efivars_lock, flags); + up(&efivars_lock); return -ENOSPC; } status = ops->set_variable(name, &vendor, attributes, size, data); - spin_unlock_irqrestore(&efivars_lock, flags); + up(&efivars_lock); return efi_status_to_err(status); } @@ -779,8 +793,6 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, int strsize1, strsize2; bool found = false; - lockdep_assert_held(&efivars_lock); - list_for_each_entry_safe(entry, n, head, list) { strsize1 = ucs2_strsize(name, 1024); strsize2 = ucs2_strsize(entry->var.VariableName, 1024); @@ -822,10 +834,11 @@ int efivar_entry_size(struct efivar_entry *entry, unsigned long *size) *size = 0; - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) + return -EINTR; status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, NULL, size, NULL); - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); if (status != EFI_BUFFER_TOO_SMALL) return efi_status_to_err(status); @@ -851,8 +864,6 @@ int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - lockdep_assert_held(&efivars_lock); - status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, attributes, size, data); @@ -874,11 +885,12 @@ int efivar_entry_get(struct efivar_entry *entry, u32 *attributes, const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) + return -EINTR; status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, attributes, size, data); - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); return efi_status_to_err(status); } @@ -925,7 +937,8 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, * set_variable call, and removal of the variable from the efivars * list (in the case of an authenticated delete). */ - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) + return -EINTR; /* * Ensure that the available space hasn't shrunk below the safe level @@ -965,7 +978,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, if (status == EFI_NOT_FOUND) efivar_entry_list_del_unlock(entry); else - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); if (status && status != EFI_BUFFER_TOO_SMALL) return efi_status_to_err(status); @@ -973,7 +986,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, return 0; out: - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); return err; } @@ -986,9 +999,9 @@ EXPORT_SYMBOL_GPL(efivar_entry_set_get_size); * efivar_entry_iter_end() is called. This function is usually used in * conjunction with __efivar_entry_iter() or efivar_entry_iter(). */ -void efivar_entry_iter_begin(void) +int efivar_entry_iter_begin(void) { - spin_lock_irq(&efivars_lock); + return down_interruptible(&efivars_lock); } EXPORT_SYMBOL_GPL(efivar_entry_iter_begin); @@ -999,7 +1012,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_iter_begin); */ void efivar_entry_iter_end(void) { - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); } EXPORT_SYMBOL_GPL(efivar_entry_iter_end); @@ -1075,7 +1088,9 @@ int efivar_entry_iter(int (*func)(struct efivar_entry *, void *), { int err = 0; - efivar_entry_iter_begin(); + err = efivar_entry_iter_begin(); + if (err) + return err; err = __efivar_entry_iter(func, head, data, NULL); efivar_entry_iter_end(); @@ -1120,12 +1135,17 @@ int efivars_register(struct efivars *efivars, const struct efivar_operations *ops, struct kobject *kobject) { - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) + return -EINTR; + efivars->ops = ops; efivars->kobject = kobject; __efivars = efivars; - spin_unlock_irq(&efivars_lock); + + pr_info("Registered efivars operations\n"); + + up(&efivars_lock); return 0; } @@ -1142,7 +1162,9 @@ int efivars_unregister(struct efivars *efivars) { int rv; - spin_lock_irq(&efivars_lock); + if (down_interruptible(&efivars_lock)) + return -EINTR; + if (!__efivars) { printk(KERN_ERR "efivars not registered\n"); rv = -EINVAL; @@ -1154,11 +1176,12 @@ int efivars_unregister(struct efivars *efivars) goto out; } + pr_info("Unregistered efivars operations\n"); __efivars = NULL; rv = 0; out: - spin_unlock_irq(&efivars_lock); + up(&efivars_lock); return rv; } EXPORT_SYMBOL_GPL(efivars_unregister); diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 1d73fc6dba13..cbb50cadcffc 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -105,7 +105,10 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, inode->i_private = var; - efivar_entry_add(var, &efivarfs_list); + err = efivar_entry_add(var, &efivarfs_list); + if (err) + goto out; + d_instantiate(dentry, inode); dget(dentry); out: diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 688ccc16b702..01e3d6e53944 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -161,7 +161,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, kfree(name); efivar_entry_size(entry, &size); - efivar_entry_add(entry, &efivarfs_list); + err = efivar_entry_add(entry, &efivarfs_list); + if (err) + goto fail_inode; inode_lock(inode); inode->i_private = entry; @@ -182,7 +184,10 @@ fail: static int efivarfs_destroy(struct efivar_entry *entry, void *data) { - efivar_entry_remove(entry); + int err = efivar_entry_remove(entry); + + if (err) + return err; kfree(entry); return 0; } diff --git a/include/linux/efi.h b/include/linux/efi.h index deecb2902715..4d6da7b66c19 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1297,8 +1297,8 @@ struct kobject *efivars_kobject(void); int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), void *data, bool duplicates, struct list_head *head); -void efivar_entry_add(struct efivar_entry *entry, struct list_head *head); -void efivar_entry_remove(struct efivar_entry *entry); +int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); +int efivar_entry_remove(struct efivar_entry *entry); int __efivar_entry_delete(struct efivar_entry *entry); int efivar_entry_delete(struct efivar_entry *entry); @@ -1315,7 +1315,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, bool block, unsigned long size, void *data); -void efivar_entry_iter_begin(void); +int efivar_entry_iter_begin(void); void efivar_entry_iter_end(void); int __efivar_entry_iter(int (*func)(struct efivar_entry *, void *), From dce48e351c0d42014e5fb16ac3eb099e11b7e716 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Jul 2016 21:36:31 +0200 Subject: [PATCH 16/29] efi: Replace runtime services spinlock with semaphore The purpose of the efi_runtime_lock is to prevent concurrent calls into the firmware. There is no need to use spinlocks here, as long as we ensure that runtime service invocations from an atomic context (i.e., EFI pstore) cannot block. So use a semaphore instead, and use down_trylock() in the nonblocking case. We don't use a mutex here because the mutex_trylock() function must not be called from interrupt context, whereas the down_trylock() can. Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Mark Rutland Cc: Sylvain Chouleur Signed-off-by: Matt Fleming --- drivers/firmware/efi/efi.c | 3 + drivers/firmware/efi/runtime-wrappers.c | 81 +++++++++++++++---------- include/linux/efi.h | 1 + 3 files changed, 53 insertions(+), 32 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index dfe07316cae5..97d98e82f0f4 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -810,6 +810,9 @@ int efi_status_to_err(efi_status_t status) case EFI_NOT_FOUND: err = -ENOENT; break; + case EFI_ABORTED: + err = -EINTR; + break; default: err = -EINVAL; } diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 41958774cde3..ae54870b2788 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -14,11 +14,13 @@ * This file is released under the GPLv2. */ +#define pr_fmt(fmt) "efi: " fmt + #include #include #include #include -#include +#include #include #include @@ -81,20 +83,21 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call) * +------------------------------------+-------------------------------+ * * Due to the fact that the EFI pstore may write to the variable store in - * interrupt context, we need to use a spinlock for at least the groups that + * interrupt context, we need to use a lock for at least the groups that * contain SetVariable() and QueryVariableInfo(). That leaves little else, as * none of the remaining functions are actually ever called at runtime. - * So let's just use a single spinlock to serialize all Runtime Services calls. + * So let's just use a single lock to serialize all Runtime Services calls. */ -static DEFINE_SPINLOCK(efi_runtime_lock); +static DEFINE_SEMAPHORE(efi_runtime_lock); static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { efi_status_t status; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(get_time, tm, tc); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -102,9 +105,10 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm) { efi_status_t status; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(set_time, tm); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -114,9 +118,10 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled, { efi_status_t status; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(get_wakeup_time, enabled, pending, tm); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -124,9 +129,10 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { efi_status_t status; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(set_wakeup_time, enabled, tm); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -138,10 +144,11 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name, { efi_status_t status; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(get_variable, name, vendor, attr, data_size, data); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -151,9 +158,10 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, { efi_status_t status; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(get_next_variable, name_size, name, vendor); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -165,10 +173,11 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, { efi_status_t status; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(set_variable, name, vendor, attr, data_size, data); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -179,12 +188,12 @@ virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, { efi_status_t status; - if (!spin_trylock(&efi_runtime_lock)) + if (down_trylock(&efi_runtime_lock)) return EFI_NOT_READY; status = efi_call_virt(set_variable, name, vendor, attr, data_size, data); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -199,10 +208,11 @@ static efi_status_t virt_efi_query_variable_info(u32 attr, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(query_variable_info, attr, storage_space, remaining_space, max_variable_size); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -217,12 +227,12 @@ virt_efi_query_variable_info_nonblocking(u32 attr, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - if (!spin_trylock(&efi_runtime_lock)) + if (down_trylock(&efi_runtime_lock)) return EFI_NOT_READY; status = efi_call_virt(query_variable_info, attr, storage_space, remaining_space, max_variable_size); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -230,9 +240,10 @@ static efi_status_t virt_efi_get_next_high_mono_count(u32 *count) { efi_status_t status; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(get_next_high_mono_count, count); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -241,9 +252,13 @@ static void virt_efi_reset_system(int reset_type, unsigned long data_size, efi_char16_t *data) { - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) { + pr_warn("failed to invoke the reset_system() runtime service:\n" + "could not get exclusive access to the firmware\n"); + return; + } __efi_call_virt(reset_system, reset_type, status, data_size, data); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); } static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, @@ -255,9 +270,10 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(update_capsule, capsules, count, sg_list); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } @@ -271,10 +287,11 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules, if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) return EFI_UNSUPPORTED; - spin_lock(&efi_runtime_lock); + if (down_interruptible(&efi_runtime_lock)) + return EFI_ABORTED; status = efi_call_virt(query_capsule_caps, capsules, count, max_size, reset_type); - spin_unlock(&efi_runtime_lock); + up(&efi_runtime_lock); return status; } diff --git a/include/linux/efi.h b/include/linux/efi.h index 4d6da7b66c19..4c92c0630c45 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -38,6 +38,7 @@ #define EFI_WRITE_PROTECTED ( 8 | (1UL << (BITS_PER_LONG-1))) #define EFI_OUT_OF_RESOURCES ( 9 | (1UL << (BITS_PER_LONG-1))) #define EFI_NOT_FOUND (14 | (1UL << (BITS_PER_LONG-1))) +#define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) typedef unsigned long efi_status_t; From ac0e94b63e65f9c6d2f3c49107118e2228236db4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 20 Jul 2016 11:11:06 +0100 Subject: [PATCH 17/29] x86/efi: Initialize status to ensure garbage is not returned on small size Although very unlikey, if size is too small or zero, then we end up with status not being set and returning garbage. Instead, initializing status to EFI_INVALID_PARAMETER to indicate that size is invalid in the calls to setup_uga32 and setup_uga64. Signed-off-by: Colin Ian King Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ff574dad95cc..ec6d2ef12baf 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -578,7 +578,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; u32 *handles = (u32 *)uga_handle;; - efi_status_t status; + efi_status_t status = EFI_INVALID_PARAMETER; int i; first_uga = NULL; @@ -623,7 +623,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; u64 *handles = (u64 *)uga_handle;; - efi_status_t status; + efi_status_t status = EFI_INVALID_PARAMETER; int i; first_uga = NULL; From d520dd1f348dcaafcb8ce804b2a5ebb1be004719 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 24 Jul 2016 10:16:56 +0200 Subject: [PATCH 18/29] firmware-gsmi: Delete an unnecessary check before the function call "dma_pool_destroy" The dma_pool_destroy() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Cc: Greg KH Cc: Julia Lawall Cc: Mike Waychison Cc: Michel Lespinasse Signed-off-by: Matt Fleming --- drivers/firmware/google/gsmi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index f1ab05ea56bb..c46387160976 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -910,8 +910,7 @@ out_err: gsmi_buf_free(gsmi_dev.param_buf); gsmi_buf_free(gsmi_dev.data_buf); gsmi_buf_free(gsmi_dev.name_buf); - if (gsmi_dev.dma_pool) - dma_pool_destroy(gsmi_dev.dma_pool); + dma_pool_destroy(gsmi_dev.dma_pool); platform_device_unregister(gsmi_dev.pdev); pr_info("gsmi: failed to load: %d\n", ret); return ret; From cf289cefbfde519bbc179a86cdc5e8cc91a0a08d Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 3 Aug 2016 10:16:02 +0200 Subject: [PATCH 19/29] lib/ucs2_string: Speed up ucs2_utf8size() No need to calculate the string length on every loop iteration. Signed-off-by: Lukas Wunner Cc: Peter Jones Signed-off-by: Matt Fleming --- lib/ucs2_string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index f0b323abb4c6..ae8d2491133c 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -56,7 +56,7 @@ ucs2_utf8size(const ucs2_char_t *src) unsigned long i; unsigned long j = 0; - for (i = 0; i < ucs2_strlen(src); i++) { + for (i = 0; src[i]; i++) { u16 c = src[i]; if (c >= 0x800) From 0513fe1d28e45deb39159dbeedf0660c3f0effd2 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Fri, 5 Aug 2016 18:59:35 -0500 Subject: [PATCH 20/29] x86/efi: Map in physical addresses in efi_map_region_fixed This is a simple change to add in the physical mappings as well as the virtual mappings in efi_map_region_fixed. The motivation here is to get access to EFI runtime code that is only available via the 1:1 mappings on a kexec'd kernel. The added call is essentially the kexec analog of the first __map_region that Boris put in efi_map_region in commit d2f7cbe7b26a ("x86/efi: Runtime services virtual mapping"). Signed-off-by: Alex Thorlton Cc: Russ Anderson Cc: Dimitri Sivanich Cc: Mike Travis Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Dave Young Cc: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 45434ea345e9..e1ca71259468 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -339,6 +339,7 @@ void __init efi_map_region(efi_memory_desc_t *md) */ void __init efi_map_region_fixed(efi_memory_desc_t *md) { + __map_region(md, md->phys_addr); __map_region(md, md->virt_addr); } From 22c2b77f419bdc9317f00b395283abd33157368e Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 15 Aug 2016 15:29:20 +0100 Subject: [PATCH 21/29] fs/efivarfs: Fix double kfree() in error path Julia reported that we may double free 'name' in efivarfs_callback(), and that this bug was introduced by commit 0d22f33bc37c ("efi: Don't use spinlocks for efi vars"). Move one of the kfree()s until after the point at which we know we are definitely on the success path. Reported-by: Julia Lawall Acked-by: Julia Lawall Cc: Ard Biesheuvel Cc: Sylvain Chouleur Signed-off-by: Matt Fleming --- fs/efivarfs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 01e3d6e53944..d7a7c53803c1 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -157,14 +157,14 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, goto fail_inode; } - /* copied by the above to local storage in the dentry. */ - kfree(name); - efivar_entry_size(entry, &size); err = efivar_entry_add(entry, &efivarfs_list); if (err) goto fail_inode; + /* copied by the above to local storage in the dentry. */ + kfree(name); + inode_lock(inode); inode->i_private = entry; i_size_write(inode, size + sizeof(entry->var.Attributes)); From 15cf7cae087a2eaf5e1feeef2bbba1b5a94c7639 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 15 Aug 2016 13:52:34 +0200 Subject: [PATCH 22/29] x86/efi: Remove unused find_bits() function Left behind by commit fc37206427ce ("efi/libstub: Move Graphics Output Protocol handling to generic code"). Signed-off-by: Lukas Wunner Cc: Ard Biesheuvel Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ec6d2ef12baf..f7fc85bf8221 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -286,29 +286,6 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) } } -static void find_bits(unsigned long mask, u8 *pos, u8 *size) -{ - u8 first, len; - - first = 0; - len = 0; - - if (mask) { - while (!(mask & 0x1)) { - mask = mask >> 1; - first++; - } - - while (mask & 0x1) { - mask = mask >> 1; - len++; - } - } - - *pos = first; - *size = len; -} - static efi_status_t __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) { From 9d80448ac92b720512c415265597d349d8b5c3e8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Aug 2016 14:13:21 +0200 Subject: [PATCH 23/29] efi/arm64: Add debugfs node to dump UEFI runtime page tables Register the debugfs node 'efi_page_tables' to allow the UEFI runtime page tables to be inspected. Note that ARM does not have 'asm/ptdump.h' [yet] so for now, this is arm64 only. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Cc: Leif Lindholm Signed-off-by: Matt Fleming --- drivers/firmware/efi/arm-runtime.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index ae001450545f..7c75a8d9091a 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -39,6 +39,26 @@ static struct mm_struct efi_mm = { .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), }; +#ifdef CONFIG_ARM64_PTDUMP +#include + +static struct ptdump_info efi_ptdump_info = { + .mm = &efi_mm, + .markers = (struct addr_marker[]){ + { 0, "UEFI runtime start" }, + { TASK_SIZE_64, "UEFI runtime end" } + }, + .base_addr = 0, +}; + +static int __init ptdump_init(void) +{ + return ptdump_register(&efi_ptdump_info, "efi_page_tables"); +} +device_initcall(ptdump_init); + +#endif + static bool __init efi_virtmap_init(void) { efi_memory_desc_t *md; From 3dad6f7f6975387f53f1a772f29f54335563d93d Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Tue, 16 Aug 2016 17:32:31 -0700 Subject: [PATCH 24/29] x86/efi: Defer efi_esrt_init until after memblock_x86_fill Commit 7b02d53e7852 ("efi: Allow drivers to reserve boot services forever") introduced a new efi_mem_reserve to reserve the boot services memory regions forever. This reservation involves allocating a new EFI memory range descriptor. However, allocation can only succeed if there is memory available for the allocation. Otherwise, error such as the following may occur: esrt: Reserving ESRT space from 0x000000003dd6a000 to 0x000000003dd6a010. Kernel panic - not syncing: ERROR: Failed to allocate 0x9f0 bytes below \ 0x0. CPU: 0 PID: 0 Comm: swapper Not tainted 4.7.0-rc5+ #503 0000000000000000 ffffffff81e03ce0 ffffffff8131dae8 ffffffff81bb6c50 ffffffff81e03d70 ffffffff81e03d60 ffffffff8111f4df 0000000000000018 ffffffff81e03d70 ffffffff81e03d08 00000000000009f0 00000000000009f0 Call Trace: [] dump_stack+0x4d/0x65 [] panic+0xc5/0x206 [] memblock_alloc_base+0x29/0x2e [] memblock_alloc+0xb/0xd [] efi_arch_mem_reserve+0xbc/0x134 [] efi_mem_reserve+0x2c/0x31 [] ? efi_mem_reserve+0x2c/0x31 [] efi_esrt_init+0x19e/0x1b4 [] efi_init+0x398/0x44a [] setup_arch+0x415/0xc30 [] start_kernel+0x5b/0x3ef [] x86_64_start_reservations+0x2f/0x31 [] x86_64_start_kernel+0xea/0xed ---[ end Kernel panic - not syncing: ERROR: Failed to allocate 0x9f0 bytes below 0x0. An inspection of the memblock configuration reveals that there is no memory available for the allocation: MEMBLOCK configuration: memory size = 0x0 reserved size = 0x4f339c0 memory.cnt = 0x1 memory[0x0] [0x00000000000000-0xffffffffffffffff], 0x0 bytes on node 0\ flags: 0x0 reserved.cnt = 0x4 reserved[0x0] [0x0000000008c000-0x0000000008c9bf], 0x9c0 bytes flags: 0x0 reserved[0x1] [0x0000000009f000-0x000000000fffff], 0x61000 bytes\ flags: 0x0 reserved[0x2] [0x00000002800000-0x0000000394bfff], 0x114c000 bytes\ flags: 0x0 reserved[0x3] [0x000000304e4000-0x00000034269fff], 0x3d86000 bytes\ flags: 0x0 This situation can be avoided if we call efi_esrt_init after memblock has memory regions for the allocation. Also, the EFI ESRT driver makes use of early_memremap'pings. Therfore, we do not want to defer efi_esrt_init for too long. We must call such function while calls to early_memremap are still valid. A good place to meet the two aforementioned conditions is right after memblock_x86_fill, grouped with other EFI-related functions. Reported-by: Scott Lawson Signed-off-by: Ricardo Neri Cc: Ard Biesheuvel Cc: Peter Jones Signed-off-by: Matt Fleming --- arch/x86/kernel/setup.c | 1 + arch/x86/platform/efi/efi.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4fd69e532c15..528b8eb24a04 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1101,6 +1101,7 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_MEMMAP)) { efi_fake_memmap(); efi_find_mirror(); + efi_esrt_init(); /* * The EFI specification says that boot service code won't be diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 342cebd1e17c..0955c70897ae 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -474,8 +474,6 @@ void __init efi_init(void) if (efi_enabled(EFI_DBG)) efi_print_memmap(); - - efi_esrt_init(); } void __init efi_late_init(void) From ff6301dabc3ca20ab8f50f8d0252ac05da610d89 Mon Sep 17 00:00:00 2001 From: Ivan Hu Date: Thu, 25 Aug 2016 11:15:31 +0800 Subject: [PATCH 25/29] efi: Add efi_test driver for exporting UEFI runtime service interfaces This driver is used by the Firmware Test Suite (FWTS) for testing the UEFI runtime interfaces readiness of the firmware. This driver exports UEFI runtime service interfaces into userspace, which allows to use and test UEFI runtime services provided by the firmware. This driver uses the efi. function pointers directly instead of going through the efivar API to allow for direct testing of the UEFI runtime service interfaces provided by the firmware. Details for FWTS are available from, Signed-off-by: Ivan Hu Cc: joeyli Cc: Ricardo Neri Cc: Ard Biesheuvel Signed-off-by: Matt Fleming --- MAINTAINERS | 7 + drivers/firmware/efi/Kconfig | 17 + drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/test/Makefile | 1 + drivers/firmware/efi/test/efi_test.c | 749 +++++++++++++++++++++++++++ drivers/firmware/efi/test/efi_test.h | 110 ++++ 6 files changed, 885 insertions(+) create mode 100644 drivers/firmware/efi/test/Makefile create mode 100644 drivers/firmware/efi/test/efi_test.c create mode 100644 drivers/firmware/efi/test/efi_test.h diff --git a/MAINTAINERS b/MAINTAINERS index db814a89599c..007d05acbb5f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4583,6 +4583,13 @@ M: Peter Jones S: Maintained F: drivers/video/fbdev/efifb.c +EFI TEST DRIVER +L: linux-efi@vger.kernel.org +M: Ivan Hu +M: Matt Fleming +S: Maintained +F: drivers/firmware/efi/test/ + EFS FILESYSTEM W: http://aeschi.ch.eu.org/efs/ S: Orphan diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 6394152f648f..c981be17d3c0 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -112,6 +112,23 @@ config EFI_CAPSULE_LOADER Most users should say N. +config EFI_TEST + tristate "EFI Runtime Service Tests Support" + depends on EFI + default n + help + This driver uses the efi. function pointers directly instead + of going through the efivar API, because it is not trying to test the + kernel subsystem, just for testing the UEFI runtime service + interfaces which are provided by the firmware. This driver is used + by the Firmware Test Suite (FWTS) for testing the UEFI runtime + interfaces readiness of the firmware. + Details for FWTS are available from: + + + Say Y here to enable the runtime services support via /dev/efi_test. + If unsure, say N. + endmenu config UEFI_CPER diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index b3f5e2adc49f..c8a439f6d715 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o obj-$(CONFIG_EFI_STUB) += libstub/ obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o +obj-$(CONFIG_EFI_TEST) += test/ arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o obj-$(CONFIG_ARM) += $(arm-obj-y) diff --git a/drivers/firmware/efi/test/Makefile b/drivers/firmware/efi/test/Makefile new file mode 100644 index 000000000000..bcd4577d40e6 --- /dev/null +++ b/drivers/firmware/efi/test/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_EFI_TEST) += efi_test.o diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c new file mode 100644 index 000000000000..f61bb52be318 --- /dev/null +++ b/drivers/firmware/efi/test/efi_test.c @@ -0,0 +1,749 @@ +/* + * EFI Test Driver for Runtime Services + * + * Copyright(C) 2012-2016 Canonical Ltd. + * + * This driver exports EFI runtime services interfaces into userspace, which + * allow to use and test UEFI runtime services provided by firmware. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "efi_test.h" + +MODULE_AUTHOR("Ivan Hu "); +MODULE_DESCRIPTION("EFI Test Driver"); +MODULE_LICENSE("GPL"); + +/* + * Count the bytes in 'str', including the terminating NULL. + * + * Note this function returns the number of *bytes*, not the number of + * ucs2 characters. + */ +static inline size_t user_ucs2_strsize(efi_char16_t __user *str) +{ + efi_char16_t *s = str, c; + size_t len; + + if (!str) + return 0; + + /* Include terminating NULL */ + len = sizeof(efi_char16_t); + + if (get_user(c, s++)) { + /* Can't read userspace memory for size */ + return 0; + } + + while (c != 0) { + if (get_user(c, s++)) { + /* Can't read userspace memory for size */ + return 0; + } + len += sizeof(efi_char16_t); + } + return len; +} + +/* + * Allocate a buffer and copy a ucs2 string from user space into it. + */ +static inline int +copy_ucs2_from_user_len(efi_char16_t **dst, efi_char16_t __user *src, + size_t len) +{ + efi_char16_t *buf; + + if (!src) { + *dst = NULL; + return 0; + } + + if (!access_ok(VERIFY_READ, src, 1)) + return -EFAULT; + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + *dst = NULL; + return -ENOMEM; + } + *dst = buf; + + if (copy_from_user(*dst, src, len)) { + kfree(buf); + return -EFAULT; + } + + return 0; +} + +/* + * Count the bytes in 'str', including the terminating NULL. + * + * Just a wrap for user_ucs2_strsize + */ +static inline int +get_ucs2_strsize_from_user(efi_char16_t __user *src, size_t *len) +{ + if (!access_ok(VERIFY_READ, src, 1)) + return -EFAULT; + + *len = user_ucs2_strsize(src); + if (*len == 0) + return -EFAULT; + + return 0; +} + +/* + * Calculate the required buffer allocation size and copy a ucs2 string + * from user space into it. + * + * This function differs from copy_ucs2_from_user_len() because it + * calculates the size of the buffer to allocate by taking the length of + * the string 'src'. + * + * If a non-zero value is returned, the caller MUST NOT access 'dst'. + * + * It is the caller's responsibility to free 'dst'. + */ +static inline int +copy_ucs2_from_user(efi_char16_t **dst, efi_char16_t __user *src) +{ + size_t len; + + if (!access_ok(VERIFY_READ, src, 1)) + return -EFAULT; + + len = user_ucs2_strsize(src); + if (len == 0) + return -EFAULT; + return copy_ucs2_from_user_len(dst, src, len); +} + +/* + * Copy a ucs2 string to a user buffer. + * + * This function is a simple wrapper around copy_to_user() that does + * nothing if 'src' is NULL, which is useful for reducing the amount of + * NULL checking the caller has to do. + * + * 'len' specifies the number of bytes to copy. + */ +static inline int +copy_ucs2_to_user_len(efi_char16_t __user *dst, efi_char16_t *src, size_t len) +{ + if (!src) + return 0; + + if (!access_ok(VERIFY_WRITE, dst, 1)) + return -EFAULT; + + return copy_to_user(dst, src, len); +} + +static long efi_runtime_get_variable(unsigned long arg) +{ + struct efi_getvariable __user *getvariable_user; + struct efi_getvariable getvariable; + unsigned long datasize, prev_datasize, *dz; + efi_guid_t vendor_guid, *vd = NULL; + efi_status_t status; + efi_char16_t *name = NULL; + u32 attr, *at; + void *data = NULL; + int rv = 0; + + getvariable_user = (struct efi_getvariable __user *)arg; + + if (copy_from_user(&getvariable, getvariable_user, + sizeof(getvariable))) + return -EFAULT; + if (getvariable.data_size && + get_user(datasize, getvariable.data_size)) + return -EFAULT; + if (getvariable.vendor_guid) { + if (copy_from_user(&vendor_guid, getvariable.vendor_guid, + sizeof(vendor_guid))) + return -EFAULT; + vd = &vendor_guid; + } + + if (getvariable.variable_name) { + rv = copy_ucs2_from_user(&name, getvariable.variable_name); + if (rv) + return rv; + } + + at = getvariable.attributes ? &attr : NULL; + dz = getvariable.data_size ? &datasize : NULL; + + if (getvariable.data_size && getvariable.data) { + data = kmalloc(datasize, GFP_KERNEL); + if (!data) { + kfree(name); + return -ENOMEM; + } + } + + prev_datasize = datasize; + status = efi.get_variable(name, vd, at, dz, data); + kfree(name); + + if (put_user(status, getvariable.status)) { + rv = -EFAULT; + goto out; + } + + if (status != EFI_SUCCESS) { + if (status == EFI_BUFFER_TOO_SMALL) { + if (dz && put_user(datasize, getvariable.data_size)) { + rv = -EFAULT; + goto out; + } + } + rv = -EINVAL; + goto out; + } + + if (prev_datasize < datasize) { + rv = -EINVAL; + goto out; + } + + if (data) { + if (copy_to_user(getvariable.data, data, datasize)) { + rv = -EFAULT; + goto out; + } + } + + if (at && put_user(attr, getvariable.attributes)) { + rv = -EFAULT; + goto out; + } + + if (dz && put_user(datasize, getvariable.data_size)) + rv = -EFAULT; + +out: + kfree(data); + return rv; + +} + +static long efi_runtime_set_variable(unsigned long arg) +{ + struct efi_setvariable __user *setvariable_user; + struct efi_setvariable setvariable; + efi_guid_t vendor_guid; + efi_status_t status; + efi_char16_t *name = NULL; + void *data; + int rv = 0; + + setvariable_user = (struct efi_setvariable __user *)arg; + + if (copy_from_user(&setvariable, setvariable_user, sizeof(setvariable))) + return -EFAULT; + if (copy_from_user(&vendor_guid, setvariable.vendor_guid, + sizeof(vendor_guid))) + return -EFAULT; + + if (setvariable.variable_name) { + rv = copy_ucs2_from_user(&name, setvariable.variable_name); + if (rv) + return rv; + } + + data = kmalloc(setvariable.data_size, GFP_KERNEL); + if (!data) { + kfree(name); + return -ENOMEM; + } + if (copy_from_user(data, setvariable.data, setvariable.data_size)) { + rv = -EFAULT; + goto out; + } + + status = efi.set_variable(name, &vendor_guid, + setvariable.attributes, + setvariable.data_size, data); + + if (put_user(status, setvariable.status)) { + rv = -EFAULT; + goto out; + } + + rv = status == EFI_SUCCESS ? 0 : -EINVAL; + +out: + kfree(data); + kfree(name); + + return rv; +} + +static long efi_runtime_get_time(unsigned long arg) +{ + struct efi_gettime __user *gettime_user; + struct efi_gettime gettime; + efi_status_t status; + efi_time_cap_t cap; + efi_time_t efi_time; + + gettime_user = (struct efi_gettime __user *)arg; + if (copy_from_user(&gettime, gettime_user, sizeof(gettime))) + return -EFAULT; + + status = efi.get_time(gettime.time ? &efi_time : NULL, + gettime.capabilities ? &cap : NULL); + + if (put_user(status, gettime.status)) + return -EFAULT; + + if (status != EFI_SUCCESS) + return -EINVAL; + + if (gettime.capabilities) { + efi_time_cap_t __user *cap_local; + + cap_local = (efi_time_cap_t *)gettime.capabilities; + if (put_user(cap.resolution, &(cap_local->resolution)) || + put_user(cap.accuracy, &(cap_local->accuracy)) || + put_user(cap.sets_to_zero, &(cap_local->sets_to_zero))) + return -EFAULT; + } + if (gettime.time) { + if (copy_to_user(gettime.time, &efi_time, sizeof(efi_time_t))) + return -EFAULT; + } + + return 0; +} + +static long efi_runtime_set_time(unsigned long arg) +{ + struct efi_settime __user *settime_user; + struct efi_settime settime; + efi_status_t status; + efi_time_t efi_time; + + settime_user = (struct efi_settime __user *)arg; + if (copy_from_user(&settime, settime_user, sizeof(settime))) + return -EFAULT; + if (copy_from_user(&efi_time, settime.time, + sizeof(efi_time_t))) + return -EFAULT; + status = efi.set_time(&efi_time); + + if (put_user(status, settime.status)) + return -EFAULT; + + return status == EFI_SUCCESS ? 0 : -EINVAL; +} + +static long efi_runtime_get_waketime(unsigned long arg) +{ + struct efi_getwakeuptime __user *getwakeuptime_user; + struct efi_getwakeuptime getwakeuptime; + efi_bool_t enabled, pending; + efi_status_t status; + efi_time_t efi_time; + + getwakeuptime_user = (struct efi_getwakeuptime __user *)arg; + if (copy_from_user(&getwakeuptime, getwakeuptime_user, + sizeof(getwakeuptime))) + return -EFAULT; + + status = efi.get_wakeup_time( + getwakeuptime.enabled ? (efi_bool_t *)&enabled : NULL, + getwakeuptime.pending ? (efi_bool_t *)&pending : NULL, + getwakeuptime.time ? &efi_time : NULL); + + if (put_user(status, getwakeuptime.status)) + return -EFAULT; + + if (status != EFI_SUCCESS) + return -EINVAL; + + if (getwakeuptime.enabled && put_user(enabled, + getwakeuptime.enabled)) + return -EFAULT; + + if (getwakeuptime.time) { + if (copy_to_user(getwakeuptime.time, &efi_time, + sizeof(efi_time_t))) + return -EFAULT; + } + + return 0; +} + +static long efi_runtime_set_waketime(unsigned long arg) +{ + struct efi_setwakeuptime __user *setwakeuptime_user; + struct efi_setwakeuptime setwakeuptime; + efi_bool_t enabled; + efi_status_t status; + efi_time_t efi_time; + + setwakeuptime_user = (struct efi_setwakeuptime __user *)arg; + + if (copy_from_user(&setwakeuptime, setwakeuptime_user, + sizeof(setwakeuptime))) + return -EFAULT; + + enabled = setwakeuptime.enabled; + if (setwakeuptime.time) { + if (copy_from_user(&efi_time, setwakeuptime.time, + sizeof(efi_time_t))) + return -EFAULT; + + status = efi.set_wakeup_time(enabled, &efi_time); + } else + status = efi.set_wakeup_time(enabled, NULL); + + if (put_user(status, setwakeuptime.status)) + return -EFAULT; + + return status == EFI_SUCCESS ? 0 : -EINVAL; +} + +static long efi_runtime_get_nextvariablename(unsigned long arg) +{ + struct efi_getnextvariablename __user *getnextvariablename_user; + struct efi_getnextvariablename getnextvariablename; + unsigned long name_size, prev_name_size = 0, *ns = NULL; + efi_status_t status; + efi_guid_t *vd = NULL; + efi_guid_t vendor_guid; + efi_char16_t *name = NULL; + int rv; + + getnextvariablename_user = (struct efi_getnextvariablename __user *)arg; + + if (copy_from_user(&getnextvariablename, getnextvariablename_user, + sizeof(getnextvariablename))) + return -EFAULT; + + if (getnextvariablename.variable_name_size) { + if (get_user(name_size, getnextvariablename.variable_name_size)) + return -EFAULT; + ns = &name_size; + prev_name_size = name_size; + } + + if (getnextvariablename.vendor_guid) { + if (copy_from_user(&vendor_guid, + getnextvariablename.vendor_guid, + sizeof(vendor_guid))) + return -EFAULT; + vd = &vendor_guid; + } + + if (getnextvariablename.variable_name) { + size_t name_string_size = 0; + + rv = get_ucs2_strsize_from_user( + getnextvariablename.variable_name, + &name_string_size); + if (rv) + return rv; + /* + * The name_size may be smaller than the real buffer size where + * variable name located in some use cases. The most typical + * case is passing a 0 to get the required buffer size for the + * 1st time call. So we need to copy the content from user + * space for at least the string size of variable name, or else + * the name passed to UEFI may not be terminated as we expected. + */ + rv = copy_ucs2_from_user_len(&name, + getnextvariablename.variable_name, + prev_name_size > name_string_size ? + prev_name_size : name_string_size); + if (rv) + return rv; + } + + status = efi.get_next_variable(ns, name, vd); + + if (put_user(status, getnextvariablename.status)) { + rv = -EFAULT; + goto out; + } + + if (status != EFI_SUCCESS) { + if (status == EFI_BUFFER_TOO_SMALL) { + if (ns && put_user(*ns, + getnextvariablename.variable_name_size)) { + rv = -EFAULT; + goto out; + } + } + rv = -EINVAL; + goto out; + } + + if (name) { + if (copy_ucs2_to_user_len(getnextvariablename.variable_name, + name, prev_name_size)) { + rv = -EFAULT; + goto out; + } + } + + if (ns) { + if (put_user(*ns, getnextvariablename.variable_name_size)) { + rv = -EFAULT; + goto out; + } + } + + if (vd) { + if (copy_to_user(getnextvariablename.vendor_guid, vd, + sizeof(efi_guid_t))) + rv = -EFAULT; + } + +out: + kfree(name); + return rv; +} + +static long efi_runtime_get_nexthighmonocount(unsigned long arg) +{ + struct efi_getnexthighmonotoniccount __user *getnexthighmonocount_user; + struct efi_getnexthighmonotoniccount getnexthighmonocount; + efi_status_t status; + u32 count; + + getnexthighmonocount_user = (struct + efi_getnexthighmonotoniccount __user *)arg; + + if (copy_from_user(&getnexthighmonocount, + getnexthighmonocount_user, + sizeof(getnexthighmonocount))) + return -EFAULT; + + status = efi.get_next_high_mono_count( + getnexthighmonocount.high_count ? &count : NULL); + + if (put_user(status, getnexthighmonocount.status)) + return -EFAULT; + + if (status != EFI_SUCCESS) + return -EINVAL; + + if (getnexthighmonocount.high_count && + put_user(count, getnexthighmonocount.high_count)) + return -EFAULT; + + return 0; +} + +static long efi_runtime_query_variableinfo(unsigned long arg) +{ + struct efi_queryvariableinfo __user *queryvariableinfo_user; + struct efi_queryvariableinfo queryvariableinfo; + efi_status_t status; + u64 max_storage, remaining, max_size; + + queryvariableinfo_user = (struct efi_queryvariableinfo __user *)arg; + + if (copy_from_user(&queryvariableinfo, queryvariableinfo_user, + sizeof(queryvariableinfo))) + return -EFAULT; + + status = efi.query_variable_info(queryvariableinfo.attributes, + &max_storage, &remaining, &max_size); + + if (put_user(status, queryvariableinfo.status)) + return -EFAULT; + + if (status != EFI_SUCCESS) + return -EINVAL; + + if (put_user(max_storage, + queryvariableinfo.maximum_variable_storage_size)) + return -EFAULT; + + if (put_user(remaining, + queryvariableinfo.remaining_variable_storage_size)) + return -EFAULT; + + if (put_user(max_size, queryvariableinfo.maximum_variable_size)) + return -EFAULT; + + return 0; +} + +static long efi_runtime_query_capsulecaps(unsigned long arg) +{ + struct efi_querycapsulecapabilities __user *qcaps_user; + struct efi_querycapsulecapabilities qcaps; + efi_capsule_header_t *capsules; + efi_status_t status; + u64 max_size; + int i, reset_type; + int rv = 0; + + qcaps_user = (struct efi_querycapsulecapabilities __user *)arg; + + if (copy_from_user(&qcaps, qcaps_user, sizeof(qcaps))) + return -EFAULT; + + capsules = kcalloc(qcaps.capsule_count + 1, + sizeof(efi_capsule_header_t), GFP_KERNEL); + if (!capsules) + return -ENOMEM; + + for (i = 0; i < qcaps.capsule_count; i++) { + efi_capsule_header_t *c; + /* + * We cannot dereference qcaps.capsule_header_array directly to + * obtain the address of the capsule as it resides in the + * user space + */ + if (get_user(c, qcaps.capsule_header_array + i)) { + rv = -EFAULT; + goto out; + } + if (copy_from_user(&capsules[i], c, + sizeof(efi_capsule_header_t))) { + rv = -EFAULT; + goto out; + } + } + + qcaps.capsule_header_array = &capsules; + + status = efi.query_capsule_caps((efi_capsule_header_t **) + qcaps.capsule_header_array, + qcaps.capsule_count, + &max_size, &reset_type); + + if (put_user(status, qcaps.status)) { + rv = -EFAULT; + goto out; + } + + if (status != EFI_SUCCESS) { + rv = -EINVAL; + goto out; + } + + if (put_user(max_size, qcaps.maximum_capsule_size)) { + rv = -EFAULT; + goto out; + } + + if (put_user(reset_type, qcaps.reset_type)) + rv = -EFAULT; + +out: + kfree(capsules); + return rv; +} + +static long efi_test_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case EFI_RUNTIME_GET_VARIABLE: + return efi_runtime_get_variable(arg); + + case EFI_RUNTIME_SET_VARIABLE: + return efi_runtime_set_variable(arg); + + case EFI_RUNTIME_GET_TIME: + return efi_runtime_get_time(arg); + + case EFI_RUNTIME_SET_TIME: + return efi_runtime_set_time(arg); + + case EFI_RUNTIME_GET_WAKETIME: + return efi_runtime_get_waketime(arg); + + case EFI_RUNTIME_SET_WAKETIME: + return efi_runtime_set_waketime(arg); + + case EFI_RUNTIME_GET_NEXTVARIABLENAME: + return efi_runtime_get_nextvariablename(arg); + + case EFI_RUNTIME_GET_NEXTHIGHMONOTONICCOUNT: + return efi_runtime_get_nexthighmonocount(arg); + + case EFI_RUNTIME_QUERY_VARIABLEINFO: + return efi_runtime_query_variableinfo(arg); + + case EFI_RUNTIME_QUERY_CAPSULECAPABILITIES: + return efi_runtime_query_capsulecaps(arg); + } + + return -ENOTTY; +} + +static int efi_test_open(struct inode *inode, struct file *file) +{ + /* + * nothing special to do here + * We do accept multiple open files at the same time as we + * synchronize on the per call operation. + */ + return 0; +} + +static int efi_test_close(struct inode *inode, struct file *file) +{ + return 0; +} + +/* + * The various file operations we support. + */ +static const struct file_operations efi_test_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = efi_test_ioctl, + .open = efi_test_open, + .release = efi_test_close, + .llseek = no_llseek, +}; + +static struct miscdevice efi_test_dev = { + MISC_DYNAMIC_MINOR, + "efi_test", + &efi_test_fops +}; + +static int __init efi_test_init(void) +{ + int ret; + + ret = misc_register(&efi_test_dev); + if (ret) { + pr_err("efi_test: can't misc_register on minor=%d\n", + MISC_DYNAMIC_MINOR); + return ret; + } + + return 0; +} + +static void __exit efi_test_exit(void) +{ + misc_deregister(&efi_test_dev); +} + +module_init(efi_test_init); +module_exit(efi_test_exit); diff --git a/drivers/firmware/efi/test/efi_test.h b/drivers/firmware/efi/test/efi_test.h new file mode 100644 index 000000000000..a33a6c633852 --- /dev/null +++ b/drivers/firmware/efi/test/efi_test.h @@ -0,0 +1,110 @@ +/* + * EFI Test driver Header + * + * Copyright(C) 2012-2016 Canonical Ltd. + * + */ + +#ifndef _DRIVERS_FIRMWARE_EFI_TEST_H_ +#define _DRIVERS_FIRMWARE_EFI_TEST_H_ + +#include + +struct efi_getvariable { + efi_char16_t *variable_name; + efi_guid_t *vendor_guid; + u32 *attributes; + unsigned long *data_size; + void *data; + efi_status_t *status; +} __packed; + +struct efi_setvariable { + efi_char16_t *variable_name; + efi_guid_t *vendor_guid; + u32 attributes; + unsigned long data_size; + void *data; + efi_status_t *status; +} __packed; + +struct efi_getnextvariablename { + unsigned long *variable_name_size; + efi_char16_t *variable_name; + efi_guid_t *vendor_guid; + efi_status_t *status; +} __packed; + +struct efi_queryvariableinfo { + u32 attributes; + u64 *maximum_variable_storage_size; + u64 *remaining_variable_storage_size; + u64 *maximum_variable_size; + efi_status_t *status; +} __packed; + +struct efi_gettime { + efi_time_t *time; + efi_time_cap_t *capabilities; + efi_status_t *status; +} __packed; + +struct efi_settime { + efi_time_t *time; + efi_status_t *status; +} __packed; + +struct efi_getwakeuptime { + efi_bool_t *enabled; + efi_bool_t *pending; + efi_time_t *time; + efi_status_t *status; +} __packed; + +struct efi_setwakeuptime { + efi_bool_t enabled; + efi_time_t *time; + efi_status_t *status; +} __packed; + +struct efi_getnexthighmonotoniccount { + u32 *high_count; + efi_status_t *status; +} __packed; + +struct efi_querycapsulecapabilities { + efi_capsule_header_t **capsule_header_array; + unsigned long capsule_count; + u64 *maximum_capsule_size; + int *reset_type; + efi_status_t *status; +} __packed; + +#define EFI_RUNTIME_GET_VARIABLE \ + _IOWR('p', 0x01, struct efi_getvariable) +#define EFI_RUNTIME_SET_VARIABLE \ + _IOW('p', 0x02, struct efi_setvariable) + +#define EFI_RUNTIME_GET_TIME \ + _IOR('p', 0x03, struct efi_gettime) +#define EFI_RUNTIME_SET_TIME \ + _IOW('p', 0x04, struct efi_settime) + +#define EFI_RUNTIME_GET_WAKETIME \ + _IOR('p', 0x05, struct efi_getwakeuptime) +#define EFI_RUNTIME_SET_WAKETIME \ + _IOW('p', 0x06, struct efi_setwakeuptime) + +#define EFI_RUNTIME_GET_NEXTVARIABLENAME \ + _IOWR('p', 0x07, struct efi_getnextvariablename) + +#define EFI_RUNTIME_QUERY_VARIABLEINFO \ + _IOR('p', 0x08, struct efi_queryvariableinfo) + +#define EFI_RUNTIME_GET_NEXTHIGHMONOTONICCOUNT \ + _IOR('p', 0x09, struct efi_getnexthighmonotoniccount) + +#define EFI_RUNTIME_QUERY_CAPSULECAPABILITIES \ + _IOR('p', 0x0A, struct efi_querycapsulecapabilities) + +#endif /* _DRIVERS_FIRMWARE_EFI_TEST_H_ */ From cb82cce7035ec22a69ab3bd4d2fe6729527ce1ca Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 25 Aug 2016 18:17:09 +0200 Subject: [PATCH 26/29] efi/arm64: Treat regions with WT/WC set but WB cleared as memory Currently, memory regions are only recorded in the memblock memory table if they have the EFI_MEMORY_WB memory type attribute set. In case the region is of a reserved type, it is also marked as MEMBLOCK_NOMAP, which will leave it out of the linear mapping. However, memory regions may legally have the EFI_MEMORY_WT or EFI_MEMORY_WC attributes set, and the EFI_MEMORY_WB cleared, in which case the region in question is obviously backed by normal memory, but is not recorded in the memblock memory table at all. Since it would be useful to be able to identify any UEFI reported memory region using memblock_is_memory(), it makes sense to add all memory to the memblock memory table, and simply mark it as MEMBLOCK_NOMAP if it lacks the EFI_MEMORY_WB attribute. While implementing this, let's refactor the code slightly to make it easier to understand: replace is_normal_ram() with is_memory(), and make it return true for each region that has any of the WB|WT|WC bits set. (This follows the AArch64 bindings in the UEFI spec, which state that those are the attributes that map to normal memory) Also, replace is_reserve_region() with is_usable_memory(), and only invoke it if the region in question was identified as memory by is_memory() in the first place. The net result is the same (only reserved regions that are backed by memory end up in the memblock memory table with the MEMBLOCK_NOMAP flag set) but carried out in a more straightforward way. Finally, we remove the trailing asterisk in the EFI debug output. Keeping it clutters the code, and it serves no real purpose now that we no longer temporarily reserve BootServices code and data regions like we did in the early days of EFI support on arm64 Linux (which it inherited from the x86 implementation) Signed-off-by: Ard Biesheuvel Reviewed-by: Leif Lindholm Tested-by: James Morse Reviewed-by: James Morse Signed-off-by: Matt Fleming --- drivers/firmware/efi/arm-init.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index e0a511d4074f..8efe13075c92 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -26,9 +26,9 @@ u64 efi_system_table; -static int __init is_normal_ram(efi_memory_desc_t *md) +static int __init is_memory(efi_memory_desc_t *md) { - if (md->attribute & EFI_MEMORY_WB) + if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC)) return 1; return 0; } @@ -152,9 +152,9 @@ out: } /* - * Return true for RAM regions we want to permanently reserve. + * Return true for regions that can be used as System RAM. */ -static __init int is_reserve_region(efi_memory_desc_t *md) +static __init int is_usable_memory(efi_memory_desc_t *md) { switch (md->type) { case EFI_LOADER_CODE: @@ -163,18 +163,22 @@ static __init int is_reserve_region(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - return 0; + /* + * According to the spec, these regions are no longer reserved + * after calling ExitBootServices(). However, we can only use + * them as System RAM if they can be mapped writeback cacheable. + */ + return (md->attribute & EFI_MEMORY_WB); default: break; } - return is_normal_ram(md); + return false; } static __init void reserve_regions(void) { efi_memory_desc_t *md; u64 paddr, npages, size; - int resv; if (efi_enabled(EFI_DBG)) pr_info("Processing EFI memory map:\n"); @@ -191,25 +195,23 @@ static __init void reserve_regions(void) paddr = md->phys_addr; npages = md->num_pages; - resv = is_reserve_region(md); if (efi_enabled(EFI_DBG)) { char buf[64]; - pr_info(" 0x%012llx-0x%012llx %s%s\n", + pr_info(" 0x%012llx-0x%012llx %s\n", paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, - efi_md_typeattr_format(buf, sizeof(buf), md), - resv ? "*" : ""); + efi_md_typeattr_format(buf, sizeof(buf), md)); } memrange_efi_to_native(&paddr, &npages); size = npages << PAGE_SHIFT; - if (is_normal_ram(md)) + if (is_memory(md)) { early_init_dt_add_memory_arch(paddr, size); - if (resv) - memblock_mark_nomap(paddr, size); - + if (!is_usable_memory(md)) + memblock_mark_nomap(paddr, size); + } } } From 20ebc15e6c8f9772804fa10110bf074a7b1d25fa Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 25 Aug 2016 11:34:03 +0200 Subject: [PATCH 27/29] x86/efi: Use kmalloc_array() in efi_call_phys_prolog() * A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus reuse the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. * Replace the specification of a data type by a pointer dereference to make the corresponding size determination a bit safer according to the Linux coding style convention. Signed-off-by: Markus Elfring Reviewed-by: Paolo Bonzini Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Julia Lawall Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e1ca71259468..d65cdadaa6b6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -85,7 +85,7 @@ pgd_t * __init efi_call_phys_prolog(void) early_code_mapping_set_exec(1); n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); - save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); + save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); for (pgd = 0; pgd < n_pgds; pgd++) { save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); From 27571616385af9c2d6a3e570b06baf86f5aa04b1 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 6 Sep 2016 08:05:32 +0200 Subject: [PATCH 28/29] x86/efi: Optimize away setup_gop32/64 if unused Commit 2c23b73c2d02 ("x86/efi: Prepare GOP handling code for reuse as generic code") introduced an efi_is_64bit() macro to x86 which previously only existed for arm arches. The macro is used to choose between the 64 bit or 32 bit code path in gop.c at runtime. However the code path that's going to be taken is known at compile time when compiling for x86_32 or for x86_64 with mixed mode disabled. Amend the macro to eliminate the unused code path in those cases. Size of gop.o text section: CONFIG_X86_32: 1758 before, 1299 after CONFIG_X86_64 && !CONFIG_EFI_MIXED: 2201 before, 1406 after CONFIG_X86_64 && CONFIG_EFI_MIXED: 2201 before and after Signed-off-by: Lukas Wunner Reviewed-by: Ard Biesheuvel Signed-off-by: Matt Fleming --- arch/x86/include/asm/efi.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 4630e2bfa8fb..f14655e7726a 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -206,14 +206,23 @@ struct efi_config { __pure const struct efi_config *__efi_early(void); +static inline bool efi_is_64bit(void) +{ + if (!IS_ENABLED(CONFIG_X86_64)) + return false; + + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return true; + + return __efi_early()->is64; +} + #define efi_call_early(f, ...) \ __efi_early()->call(__efi_early()->f, __VA_ARGS__); #define __efi_call_early(f, ...) \ __efi_early()->call((unsigned long)f, __VA_ARGS__); -#define efi_is_64bit() __efi_early()->is64 - extern bool efi_reboot_required(void); #else From 0a637ee61247bd4bed9b2a07568ef7a1cfc76187 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 22 Aug 2016 12:01:21 +0200 Subject: [PATCH 29/29] x86/efi: Allow invocation of arbitrary boot services We currently allow invocation of 8 boot services with efi_call_early(). Not included are LocateHandleBuffer and LocateProtocol in particular. For graphics output or to retrieve PCI ROMs and Apple device properties, we're thus forced to use the LocateHandle + AllocatePool + LocateHandle combo, which is cumbersome and needs more code. The ARM folks allow invocation of the full set of boot services but are restricted to our 8 boot services in functions shared across arches. Thus, rather than adding just LocateHandleBuffer and LocateProtocol to struct efi_config, let's rework efi_call_early() to allow invocation of arbitrary boot services by selecting the 64 bit vs 32 bit code path in the macro itself. When compiling for 32 bit or for 64 bit without mixed mode, the unused code path is optimized away and the binary code is the same as before. But on 64 bit with mixed mode enabled, this commit adds one compare instruction to each invocation of a boot service and, depending on the code path selected, two jump instructions. (Most of the time gcc arranges the jumps in the 32 bit code path.) The result is a minuscule performance penalty and the binary code becomes slightly larger and more difficult to read when disassembled. This isn't a hot path, so these drawbacks are arguably outweighed by the attainable simplification of the C code. We have some overhead anyway for thunking or conversion between calling conventions. The 8 boot services can consequently be removed from struct efi_config. No functional change intended (for now). Example -- invocation of free_pool before (64 bit code path): 0x2d4 movq %ds:efi_early, %rdx ; efi_early 0x2db movq %ss:arg_0-0x20(%rsp), %rsi 0x2e0 xorl %eax, %eax 0x2e2 movq %ds:0x28(%rdx), %rdi ; efi_early->free_pool 0x2e6 callq *%ds:0x58(%rdx) ; efi_early->call() Example -- invocation of free_pool after (64 / 32 bit mixed code path): 0x0dc movq %ds:efi_early, %rax ; efi_early 0x0e3 cmpb $0, %ds:0x28(%rax) ; !efi_early->is64 ? 0x0e7 movq %ds:0x20(%rax), %rdx ; efi_early->call() 0x0eb movq %ds:0x10(%rax), %rax ; efi_early->boot_services 0x0ef je $0x150 0x0f1 movq %ds:0x48(%rax), %rdi ; free_pool (64 bit) 0x0f5 xorl %eax, %eax 0x0f7 callq *%rdx ... 0x150 movl %ds:0x30(%rax), %edi ; free_pool (32 bit) 0x153 jmp $0x0f5 Size of eboot.o text section: CONFIG_X86_32: 6464 before, 6318 after CONFIG_X86_64 && !CONFIG_EFI_MIXED: 7670 before, 7573 after CONFIG_X86_64 && CONFIG_EFI_MIXED: 7670 before, 8319 after Signed-off-by: Lukas Wunner Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 13 +------------ arch/x86/boot/compressed/head_32.S | 6 +++--- arch/x86/boot/compressed/head_64.S | 8 ++++---- arch/x86/include/asm/efi.h | 15 ++++++--------- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index f7fc85bf8221..447a6a2df5ae 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -29,22 +29,11 @@ __pure const struct efi_config *__efi_early(void) static void setup_boot_services##bits(struct efi_config *c) \ { \ efi_system_table_##bits##_t *table; \ - efi_boot_services_##bits##_t *bt; \ \ table = (typeof(table))sys_table; \ \ + c->boot_services = table->boottime; \ c->text_output = table->con_out; \ - \ - bt = (typeof(bt))(unsigned long)(table->boottime); \ - \ - c->allocate_pool = bt->allocate_pool; \ - c->allocate_pages = bt->allocate_pages; \ - c->get_memory_map = bt->get_memory_map; \ - c->free_pool = bt->free_pool; \ - c->free_pages = bt->free_pages; \ - c->locate_handle = bt->locate_handle; \ - c->handle_protocol = bt->handle_protocol; \ - c->exit_boot_services = bt->exit_boot_services; \ } BOOT_SERVICES(32); BOOT_SERVICES(64); diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 1038524270e7..fd0b6a272dd5 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -82,7 +82,7 @@ ENTRY(efi_pe_entry) /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 88(%eax) + add %esi, 32(%eax) pushl %eax call make_boot_params @@ -108,7 +108,7 @@ ENTRY(efi32_stub_entry) /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 88(%eax) + add %esi, 32(%eax) pushl %eax 2: call efi_main @@ -264,7 +264,7 @@ relocated: #ifdef CONFIG_EFI_STUB .data efi32_config: - .fill 11,8,0 + .fill 4,8,0 .long efi_call_phys .long 0 .byte 0 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 0d80a7ad65cd..efdfba21a5b2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -265,7 +265,7 @@ ENTRY(efi_pe_entry) /* * Relocate efi_config->call(). */ - addq %rbp, efi64_config+88(%rip) + addq %rbp, efi64_config+32(%rip) movq %rax, %rdi call make_boot_params @@ -285,7 +285,7 @@ handover_entry: * Relocate efi_config->call(). */ movq efi_config(%rip), %rax - addq %rbp, 88(%rax) + addq %rbp, 32(%rax) 2: movq efi_config(%rip), %rdi call efi_main @@ -457,14 +457,14 @@ efi_config: #ifdef CONFIG_EFI_MIXED .global efi32_config efi32_config: - .fill 11,8,0 + .fill 4,8,0 .quad efi64_thunk .byte 0 #endif .global efi64_config efi64_config: - .fill 11,8,0 + .fill 4,8,0 .quad efi_call .byte 1 #endif /* CONFIG_EFI_STUB */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index f14655e7726a..389d700b961e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -191,14 +191,7 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( struct efi_config { u64 image_handle; u64 table; - u64 allocate_pool; - u64 allocate_pages; - u64 get_memory_map; - u64 free_pool; - u64 free_pages; - u64 locate_handle; - u64 handle_protocol; - u64 exit_boot_services; + u64 boot_services; u64 text_output; efi_status_t (*call)(unsigned long, ...); bool is64; @@ -218,7 +211,11 @@ static inline bool efi_is_64bit(void) } #define efi_call_early(f, ...) \ - __efi_early()->call(__efi_early()->f, __VA_ARGS__); + __efi_early()->call(efi_is_64bit() ? \ + ((efi_boot_services_64_t *)(unsigned long) \ + __efi_early()->boot_services)->f : \ + ((efi_boot_services_32_t *)(unsigned long) \ + __efi_early()->boot_services)->f, __VA_ARGS__) #define __efi_call_early(f, ...) \ __efi_early()->call((unsigned long)f, __VA_ARGS__);