efi: Allow drivers to reserve boot services forever
Today, it is not possible for drivers to reserve EFI boot services for access after efi_free_boot_services() has been called on x86. For ARM/arm64 it can be done simply by calling memblock_reserve(). Having this ability for all three architectures is desirable for a couple of reasons, 1) It saves drivers copying data out of those regions 2) kexec reboot can now make use of things like ESRT Instead of using the standard memblock_reserve() which is insufficient to reserve the region on x86 (see efi_reserve_boot_services()), a new API is introduced in this patch; efi_mem_reserve(). efi.memmap now always represents which EFI memory regions are available. On x86 the EFI boot services regions that have not been reserved via efi_mem_reserve() will be removed from efi.memmap during efi_free_boot_services(). This has implications for kexec, since it is not possible for a newly kexec'd kernel to access the same boot services regions that the initial boot kernel had access to unless they are reserved by every kexec kernel in the chain. Tested-by: Dave Young <dyoung@redhat.com> [kexec/kdump] Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> [arm] Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: Leif Lindholm <leif.lindholm@linaro.org> Cc: Peter Jones <pjones@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
This commit is contained in:
		
							parent
							
								
									c45f4da33a
								
							
						
					
					
						commit
						816e76129e
					
				| @ -163,6 +163,71 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(efi_query_variable_store); | ||||
| 
 | ||||
| /*
 | ||||
|  * The UEFI specification makes it clear that the operating system is | ||||
|  * free to do whatever it wants with boot services code after | ||||
|  * ExitBootServices() has been called. Ignoring this recommendation a | ||||
|  * significant bunch of EFI implementations continue calling into boot | ||||
|  * services code (SetVirtualAddressMap). In order to work around such | ||||
|  * buggy implementations we reserve boot services region during EFI | ||||
|  * init and make sure it stays executable. Then, after | ||||
|  * SetVirtualAddressMap(), it is discarded. | ||||
|  * | ||||
|  * However, some boot services regions contain data that is required | ||||
|  * by drivers, so we need to track which memory ranges can never be | ||||
|  * freed. This is done by tagging those regions with the | ||||
|  * EFI_MEMORY_RUNTIME attribute. | ||||
|  * | ||||
|  * Any driver that wants to mark a region as reserved must use | ||||
|  * efi_mem_reserve() which will insert a new EFI memory descriptor | ||||
|  * into efi.memmap (splitting existing regions if necessary) and tag | ||||
|  * it with EFI_MEMORY_RUNTIME. | ||||
|  */ | ||||
| void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) | ||||
| { | ||||
| 	phys_addr_t new_phys, new_size; | ||||
| 	struct efi_mem_range mr; | ||||
| 	efi_memory_desc_t md; | ||||
| 	int num_entries; | ||||
| 	void *new; | ||||
| 
 | ||||
| 	if (efi_mem_desc_lookup(addr, &md)) { | ||||
| 		pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) { | ||||
| 		pr_err("Region spans EFI memory descriptors, %pa\n", &addr); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	mr.range.start = addr; | ||||
| 	mr.range.end = addr + size; | ||||
| 	mr.attribute = md.attribute | EFI_MEMORY_RUNTIME; | ||||
| 
 | ||||
| 	num_entries = efi_memmap_split_count(&md, &mr.range); | ||||
| 	num_entries += efi.memmap.nr_map; | ||||
| 
 | ||||
| 	new_size = efi.memmap.desc_size * num_entries; | ||||
| 
 | ||||
| 	new_phys = memblock_alloc(new_size, 0); | ||||
| 	if (!new_phys) { | ||||
| 		pr_err("Could not allocate boot services memmap\n"); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	new = early_memremap(new_phys, new_size); | ||||
| 	if (!new) { | ||||
| 		pr_err("Failed to map new boot services memmap\n"); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	efi_memmap_insert(&efi.memmap, new, &mr); | ||||
| 	early_memunmap(new, new_size); | ||||
| 
 | ||||
| 	efi_memmap_install(new_phys, num_entries); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Helper function for efi_reserve_boot_services() to figure out if we | ||||
|  * can free regions in efi_free_boot_services(). | ||||
| @ -184,15 +249,6 @@ static bool can_free_region(u64 start, u64 size) | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * The UEFI specification makes it clear that the operating system is free to do | ||||
|  * whatever it wants with boot services code after ExitBootServices() has been | ||||
|  * called. Ignoring this recommendation a significant bunch of EFI implementations  | ||||
|  * continue calling into boot services code (SetVirtualAddressMap). In order to  | ||||
|  * work around such buggy implementations we reserve boot services region during  | ||||
|  * EFI init and make sure it stays executable. Then, after SetVirtualAddressMap(), it | ||||
| * is discarded. | ||||
| */ | ||||
| void __init efi_reserve_boot_services(void) | ||||
| { | ||||
| 	efi_memory_desc_t *md; | ||||
| @ -249,7 +305,10 @@ void __init efi_reserve_boot_services(void) | ||||
| 
 | ||||
| void __init efi_free_boot_services(void) | ||||
| { | ||||
| 	phys_addr_t new_phys, new_size; | ||||
| 	efi_memory_desc_t *md; | ||||
| 	int num_entries = 0; | ||||
| 	void *new, *new_md; | ||||
| 
 | ||||
| 	for_each_efi_memory_desc(md) { | ||||
| 		unsigned long long start = md->phys_addr; | ||||
| @ -257,12 +316,16 @@ void __init efi_free_boot_services(void) | ||||
| 		size_t rm_size; | ||||
| 
 | ||||
| 		if (md->type != EFI_BOOT_SERVICES_CODE && | ||||
| 		    md->type != EFI_BOOT_SERVICES_DATA) | ||||
| 		    md->type != EFI_BOOT_SERVICES_DATA) { | ||||
| 			num_entries++; | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		/* Do not free, someone else owns it: */ | ||||
| 		if (md->attribute & EFI_MEMORY_RUNTIME) | ||||
| 		if (md->attribute & EFI_MEMORY_RUNTIME) { | ||||
| 			num_entries++; | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Nasty quirk: if all sub-1MB memory is used for boot | ||||
| @ -286,6 +349,42 @@ void __init efi_free_boot_services(void) | ||||
| 
 | ||||
| 		free_bootmem_late(start, size); | ||||
| 	} | ||||
| 
 | ||||
| 	new_size = efi.memmap.desc_size * num_entries; | ||||
| 	new_phys = memblock_alloc(new_size, 0); | ||||
| 	if (!new_phys) { | ||||
| 		pr_err("Failed to allocate new EFI memmap\n"); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	new = memremap(new_phys, new_size, MEMREMAP_WB); | ||||
| 	if (!new) { | ||||
| 		pr_err("Failed to map new EFI memmap\n"); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Build a new EFI memmap that excludes any boot services | ||||
| 	 * regions that are not tagged EFI_MEMORY_RUNTIME, since those | ||||
| 	 * regions have now been freed. | ||||
| 	 */ | ||||
| 	new_md = new; | ||||
| 	for_each_efi_memory_desc(md) { | ||||
| 		if (!(md->attribute & EFI_MEMORY_RUNTIME) && | ||||
| 		    (md->type == EFI_BOOT_SERVICES_CODE || | ||||
| 		     md->type == EFI_BOOT_SERVICES_DATA)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		memcpy(new_md, md, efi.memmap.desc_size); | ||||
| 		new_md += efi.memmap.desc_size; | ||||
| 	} | ||||
| 
 | ||||
| 	memunmap(new); | ||||
| 
 | ||||
| 	if (efi_memmap_install(new_phys, num_entries)) { | ||||
| 		pr_err("Could not install new EFI memmap\n"); | ||||
| 		return; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | ||||
| @ -27,6 +27,7 @@ | ||||
| #include <linux/slab.h> | ||||
| #include <linux/acpi.h> | ||||
| #include <linux/ucs2_string.h> | ||||
| #include <linux/memblock.h> | ||||
| 
 | ||||
| #include <asm/early_ioremap.h> | ||||
| 
 | ||||
| @ -396,6 +397,35 @@ u64 __init efi_mem_desc_end(efi_memory_desc_t *md) | ||||
| 	return end; | ||||
| } | ||||
| 
 | ||||
| void __init __weak efi_arch_mem_reserve(phys_addr_t addr, u64 size) {} | ||||
| 
 | ||||
| /**
 | ||||
|  * efi_mem_reserve - Reserve an EFI memory region | ||||
|  * @addr: Physical address to reserve | ||||
|  * @size: Size of reservation | ||||
|  * | ||||
|  * Mark a region as reserved from general kernel allocation and | ||||
|  * prevent it being released by efi_free_boot_services(). | ||||
|  * | ||||
|  * This function should be called drivers once they've parsed EFI | ||||
|  * configuration tables to figure out where their data lives, e.g. | ||||
|  * efi_esrt_init(). | ||||
|  */ | ||||
| void __init efi_mem_reserve(phys_addr_t addr, u64 size) | ||||
| { | ||||
| 	if (!memblock_is_region_reserved(addr, size)) | ||||
| 		memblock_reserve(addr, size); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Some architectures (x86) reserve all boot services ranges | ||||
| 	 * until efi_free_boot_services() because of buggy firmware | ||||
| 	 * implementations. This means the above memblock_reserve() is | ||||
| 	 * superfluous on x86 and instead what it needs to do is | ||||
| 	 * ensure the @start, @size is not freed. | ||||
| 	 */ | ||||
| 	efi_arch_mem_reserve(addr, size); | ||||
| } | ||||
| 
 | ||||
| static __initdata efi_config_table_type_t common_tables[] = { | ||||
| 	{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20}, | ||||
| 	{ACPI_TABLE_GUID, "ACPI", &efi.acpi}, | ||||
|  | ||||
| @ -944,6 +944,7 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); | ||||
| extern int __init efi_uart_console_only (void); | ||||
| extern u64 efi_mem_desc_end(efi_memory_desc_t *md); | ||||
| extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); | ||||
| extern void efi_mem_reserve(phys_addr_t addr, u64 size); | ||||
| extern void efi_initialize_iomem_resources(struct resource *code_resource, | ||||
| 		struct resource *data_resource, struct resource *bss_resource); | ||||
| extern void efi_reserve_boot_services(void); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user