From e1e1fddae74b72d0415965821ad00fe39aac6f13 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 14:02:15 +0200 Subject: [PATCH 01/47] arm64/mm: add explicit struct_mm argument to __create_mapping() Currently, swapper_pg_dir and idmap_pg_dir share the init_mm mm_struct instance. To allow the introduction of other pg_dir instances, for instance, for UEFI's mapping of Runtime Services, make the struct_mm instance an explicit argument that gets passed down to the pmd and pte instantiation functions. Note that the consumers (pmd_populate/pgd_populate) of the mm_struct argument don't actually inspect it, but let's fix it for correctness' sake. Acked-by: Steve Capper Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/mm/mmu.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6032f3e3056a..7d5dfe2d3de0 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -156,9 +156,9 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + phys_addr_t phys, int map_io) { pmd_t *pmd; unsigned long next; @@ -178,7 +178,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, */ if (pud_none(*pud) || pud_bad(*pud)) { pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); - pud_populate(&init_mm, pud, pmd); + pud_populate(mm, pud, pmd); } pmd = pmd_offset(pud, addr); @@ -202,16 +202,16 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, } while (pmd++, addr = next, addr != end); } -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + phys_addr_t phys, int map_io) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); - pgd_populate(&init_mm, pgd, pud); + pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -240,7 +240,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, flush_tlb_all(); } } else { - alloc_init_pmd(pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, map_io); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -250,9 +250,9 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - int map_io) +static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, + phys_addr_t phys, unsigned long virt, + phys_addr_t size, int map_io) { unsigned long addr, length, end, next; @@ -262,7 +262,7 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, map_io); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -275,7 +275,8 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0); + __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + size, 0); } void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) @@ -284,7 +285,7 @@ void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) pr_warn("BUG: not creating id mapping for %pa\n", &addr); return; } - __create_mapping(&idmap_pg_dir[pgd_index(addr)], + __create_mapping(&init_mm, &idmap_pg_dir[pgd_index(addr)], addr, addr, size, map_io); } From 8ce837cee8f51fb0eacb32c85461ea2f0fafc9f8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 15:42:07 +0200 Subject: [PATCH 02/47] arm64/mm: add create_pgd_mapping() to create private page tables For UEFI, we need to install the memory mappings used for Runtime Services in a dedicated set of page tables. Add create_pgd_mapping(), which allows us to allocate and install those page table entries early. Reviewed-by: Will Deacon Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/mmu.h | 3 +++ arch/arm64/include/asm/pgtable.h | 5 ++++ arch/arm64/mm/mmu.c | 43 ++++++++++++++++---------------- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index c2f006c48bdb..5fd40c43be80 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -33,5 +33,8 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); /* create an identity mapping for memory (or io if map_io is true) */ extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); +extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot); #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 210d632aa5ad..59079248529d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -264,6 +264,11 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } +static inline pgprot_t mk_sect_prot(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); +} + /* * THP definitions. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7d5dfe2d3de0..3f3d5aa4a8b1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -158,20 +158,10 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, int map_io) + phys_addr_t phys, pgprot_t prot) { pmd_t *pmd; unsigned long next; - pmdval_t prot_sect; - pgprot_t prot_pte; - - if (map_io) { - prot_sect = PROT_SECT_DEVICE_nGnRE; - prot_pte = __pgprot(PROT_DEVICE_nGnRE); - } else { - prot_sect = PROT_SECT_NORMAL_EXEC; - prot_pte = PAGE_KERNEL_EXEC; - } /* * Check for initial section mappings in the pgd/pud and remove them. @@ -187,7 +177,8 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | prot_sect)); + set_pmd(pmd, __pmd(phys | + pgprot_val(mk_sect_prot(prot)))); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -196,7 +187,7 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, flush_tlb_all(); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot_pte); + prot); } phys += next - addr; } while (pmd++, addr = next, addr != end); @@ -204,7 +195,7 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys, int map_io) + phys_addr_t phys, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -222,10 +213,11 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, /* * For 4K granule only, attempt to put down a 1GB block */ - if (!map_io && (PAGE_SHIFT == 12) && + if ((PAGE_SHIFT == 12) && ((addr | next | phys) & ~PUD_MASK) == 0) { pud_t old_pud = *pud; - set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC)); + set_pud(pud, __pud(phys | + pgprot_val(mk_sect_prot(prot)))); /* * If we have an old value for a pud, it will @@ -240,7 +232,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, flush_tlb_all(); } } else { - alloc_init_pmd(mm, pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, prot); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -252,7 +244,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, */ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, int map_io) + phys_addr_t size, pgprot_t prot) { unsigned long addr, length, end, next; @@ -262,7 +254,7 @@ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, prot); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -276,7 +268,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, - size, 0); + size, PAGE_KERNEL_EXEC); } void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) @@ -286,7 +278,16 @@ void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) return; } __create_mapping(&init_mm, &idmap_pg_dir[pgd_index(addr)], - addr, addr, size, map_io); + addr, addr, size, + map_io ? __pgprot(PROT_DEVICE_nGnRE) + : PAGE_KERNEL_EXEC); +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot) +{ + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot); } static void __init map_mem(void) From 7bb68410ef22067b08fd52887875b8f337f89dcc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Oct 2014 15:04:15 +0200 Subject: [PATCH 03/47] efi: split off remapping code from efi_config_init() Split of the remapping code from efi_config_init() so that the caller can perform its own remapping. This is necessary to correctly handle virtually remapped UEFI memory regions under kexec, as efi.systab will have been updated to a virtual address. Acked-by: Matt Fleming Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 80 +++++++++++++++++++++----------------- include/linux/efi.h | 2 + 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 9035c1b74d58..b7ba9d8ec4b0 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -293,10 +293,49 @@ static __init int match_config_table(efi_guid_t *guid, return 0; } +int __init efi_config_parse_tables(void *config_tables, int count, int sz, + efi_config_table_type_t *arch_tables) +{ + void *tablep; + int i; + + tablep = config_tables; + pr_info(""); + for (i = 0; i < count; i++) { + efi_guid_t guid; + unsigned long table; + + if (efi_enabled(EFI_64BIT)) { + u64 table64; + guid = ((efi_config_table_64_t *)tablep)->guid; + table64 = ((efi_config_table_64_t *)tablep)->table; + table = table64; +#ifndef CONFIG_64BIT + if (table64 >> 32) { + pr_cont("\n"); + pr_err("Table located above 4GB, disabling EFI.\n"); + return -EINVAL; + } +#endif + } else { + guid = ((efi_config_table_32_t *)tablep)->guid; + table = ((efi_config_table_32_t *)tablep)->table; + } + + if (!match_config_table(&guid, table, common_tables)) + match_config_table(&guid, table, arch_tables); + + tablep += sz; + } + pr_cont("\n"); + set_bit(EFI_CONFIG_TABLES, &efi.flags); + return 0; +} + int __init efi_config_init(efi_config_table_type_t *arch_tables) { - void *config_tables, *tablep; - int i, sz; + void *config_tables; + int sz, ret; if (efi_enabled(EFI_64BIT)) sz = sizeof(efi_config_table_64_t); @@ -313,42 +352,11 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) return -ENOMEM; } - tablep = config_tables; - pr_info(""); - for (i = 0; i < efi.systab->nr_tables; i++) { - efi_guid_t guid; - unsigned long table; + ret = efi_config_parse_tables(config_tables, efi.systab->nr_tables, sz, + arch_tables); - if (efi_enabled(EFI_64BIT)) { - u64 table64; - guid = ((efi_config_table_64_t *)tablep)->guid; - table64 = ((efi_config_table_64_t *)tablep)->table; - table = table64; -#ifndef CONFIG_64BIT - if (table64 >> 32) { - pr_cont("\n"); - pr_err("Table located above 4GB, disabling EFI.\n"); - early_memunmap(config_tables, - efi.systab->nr_tables * sz); - return -EINVAL; - } -#endif - } else { - guid = ((efi_config_table_32_t *)tablep)->guid; - table = ((efi_config_table_32_t *)tablep)->table; - } - - if (!match_config_table(&guid, table, common_tables)) - match_config_table(&guid, table, arch_tables); - - tablep += sz; - } - pr_cont("\n"); early_memunmap(config_tables, efi.systab->nr_tables * sz); - - set_bit(EFI_CONFIG_TABLES, &efi.flags); - - return 0; + return ret; } #ifdef CONFIG_EFI_VARS_MODULE diff --git a/include/linux/efi.h b/include/linux/efi.h index 0238d612750e..5ffe5115951f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -875,6 +875,8 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int efi_config_init(efi_config_table_type_t *arch_tables); +extern int efi_config_parse_tables(void *config_tables, int count, int sz, + efi_config_table_type_t *arch_tables); extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); From cf2b0f102cdf912eedb87b10271fa0ad582cf2c1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Nov 2014 13:46:44 +0100 Subject: [PATCH 04/47] efi: efistub: allow allocation alignment larger than EFI_PAGE_SIZE On systems with 64 KB pages, it is preferable for UEFI memory map entries to be 64 KB aligned multiples of 64 KB, because it relieves us of having to deal with the residues. So, if EFI_ALLOC_ALIGN is #define'd by the platform, use it to round up all memory allocations made. Acked-by: Matt Fleming Acked-by: Borislav Petkov Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- .../firmware/efi/libstub/efi-stub-helper.c | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index a920fec8fe88..e766df60fbfb 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -32,6 +32,15 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; +/* + * Allow the platform to override the allocation granularity: this allows + * systems that have the capability to run with a larger page size to deal + * with the allocations for initrd and fdt more efficiently. + */ +#ifndef EFI_ALLOC_ALIGN +#define EFI_ALLOC_ALIGN EFI_PAGE_SIZE +#endif + struct file_info { efi_file_handle_t *handle; u64 size; @@ -150,10 +159,10 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, * a specific address. We are doing page-based allocations, * so we must be aligned to a page. */ - if (align < EFI_PAGE_SIZE) - align = EFI_PAGE_SIZE; + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; - nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; again: for (i = 0; i < map_size / desc_size; i++) { efi_memory_desc_t *desc; @@ -235,10 +244,10 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, * a specific address. We are doing page-based allocations, * so we must be aligned to a page. */ - if (align < EFI_PAGE_SIZE) - align = EFI_PAGE_SIZE; + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; - nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; for (i = 0; i < map_size / desc_size; i++) { efi_memory_desc_t *desc; unsigned long m = (unsigned long)map; @@ -292,7 +301,7 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, if (!size) return; - nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; efi_call_early(free_pages, addr, nr_pages); } @@ -561,7 +570,7 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, * to the preferred address. If that fails, allocate as low * as possible while respecting the required alignment. */ - nr_pages = round_up(alloc_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, nr_pages, &efi_addr); From 1bd0abb0c924a8b28c6466cdd6bb34ea053541dc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Nov 2014 13:50:21 +0100 Subject: [PATCH 05/47] arm64/efi: set EFI_ALLOC_ALIGN to 64 KB Set EFI_ALLOC_ALIGN to 64 KB so that all allocations done by the stub are naturally compatible with a 64 KB granule kernel. Acked-by: Leif Lindholm Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/efi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index a34fd3b12e2b..71291253114f 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -44,4 +44,6 @@ extern void efi_idmap_init(void); #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define EFI_ALLOC_ALIGN SZ_64K + #endif /* _ASM_EFI_H */ From f3cdfd239da56a4cea75a2920dc326f0f45f67e3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 16:27:26 +0200 Subject: [PATCH 06/47] arm64/efi: move SetVirtualAddressMap() to UEFI stub In order to support kexec, the kernel needs to be able to deal with the state of the UEFI firmware after SetVirtualAddressMap() has been called. To avoid having separate code paths for non-kexec and kexec, let's move the call to SetVirtualAddressMap() to the stub: this will guarantee us that it will only be called once (since the stub is not executed during kexec), and ensures that the UEFI state is identical between kexec and normal boot. This implies that the layout of the virtual mapping needs to be created by the stub as well. All regions are rounded up to a naturally aligned multiple of 64 KB (for compatibility with 64k pages kernels) and recorded in the UEFI memory map. The kernel proper reads those values and installs the mappings in a dedicated set of page tables that are swapped in during UEFI Runtime Services calls. Acked-by: Leif Lindholm Acked-by: Matt Fleming Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/efi.h | 34 +++- arch/arm64/kernel/efi.c | 230 +++++++++++++----------- arch/arm64/kernel/setup.c | 1 + drivers/firmware/efi/libstub/arm-stub.c | 59 ++++++ drivers/firmware/efi/libstub/efistub.h | 4 + drivers/firmware/efi/libstub/fdt.c | 62 ++++++- 6 files changed, 282 insertions(+), 108 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 71291253114f..effef3713c5a 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -7,28 +7,36 @@ #ifdef CONFIG_EFI extern void efi_init(void); extern void efi_idmap_init(void); +extern void efi_virtmap_init(void); #else #define efi_init() #define efi_idmap_init() +#define efi_virtmap_init() #endif #define efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ efi_status_t __s; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __s = __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ __s; \ }) #define __efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ }) @@ -46,4 +54,26 @@ extern void efi_idmap_init(void); #define EFI_ALLOC_ALIGN SZ_64K +/* + * On ARM systems, virtually remapped UEFI runtime services are set up in three + * distinct stages: + * - The stub retrieves the final version of the memory map from UEFI, populates + * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime + * service to communicate the new mapping to the firmware (Note that the new + * mapping is not live at this time) + * - During early boot, the page tables are allocated and populated based on the + * virt_addr fields in the memory map, but only if all descriptors with the + * EFI_MEMORY_RUNTIME attribute have a non-zero value for virt_addr. If this + * succeeds, the EFI_VIRTMAP flag is set to indicate that the virtual mappings + * have been installed successfully. + * - During an early initcall(), the UEFI Runtime Services are enabled and the + * EFI_RUNTIME_SERVICES bit set if some conditions are met, i.e., we need a + * non-early mapping of the UEFI system table, and we need to have the virtmap + * installed. + */ +#define EFI_VIRTMAP EFI_ARCH_1 + +void efi_virtmap_load(void); +void efi_virtmap_unload(void); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 2bb4347d0edf..755e545144ea 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,25 +11,31 @@ * */ +#include #include #include #include #include +#include #include #include #include +#include +#include +#include #include #include +#include #include #include #include #include +#include +#include struct efi_memory_map memmap; -static efi_runtime_services_t *runtime; - static u64 efi_system_table; static int uefi_debug __initdata; @@ -69,9 +75,33 @@ static void __init efi_setup_idmap(void) } } +/* + * Translate a EFI virtual address into a physical address: this is necessary, + * as some data members of the EFI system table are virtually remapped after + * SetVirtualAddressMap() has been called. + */ +static phys_addr_t efi_to_phys(unsigned long addr) +{ + efi_memory_desc_t *md; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + /* no virtual mapping has been installed by the stub */ + break; + if (md->virt_addr <= addr && + (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) + return md->phys_addr + addr - md->virt_addr; + } + return addr; +} + static int __init uefi_init(void) { efi_char16_t *c16; + void *config_tables; + u64 table_size; char vendor[100] = "unknown"; int i, retval; @@ -99,7 +129,7 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff); /* Show what we know for posterity */ - c16 = early_memremap(efi.systab->fw_vendor, + c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), sizeof(vendor)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) @@ -112,8 +142,14 @@ static int __init uefi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - retval = efi_config_init(NULL); + table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; + config_tables = early_memremap(efi_to_phys(efi.systab->tables), + table_size); + retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, + sizeof(efi_config_table_64_t), NULL); + + early_memunmap(config_tables, table_size); out: early_memunmap(efi.systab, sizeof(efi_system_table_t)); return retval; @@ -329,51 +365,14 @@ void __init efi_idmap_init(void) early_memunmap(memmap.map, memmap.map_end - memmap.map); } -static int __init remap_region(efi_memory_desc_t *md, void **new) -{ - u64 paddr, vaddr, npages, size; - - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - vaddr = (__force u64)ioremap_cache(paddr, size); - else - vaddr = (__force u64)ioremap(paddr, size); - - if (!vaddr) { - pr_err("Unable to remap 0x%llx pages @ %p\n", - npages, (void *)paddr); - return 0; - } - - /* adjust for any rounding when EFI and system pagesize differs */ - md->virt_addr = vaddr + (md->phys_addr - paddr); - - if (uefi_debug) - pr_info(" EFI remap 0x%012llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - memcpy(*new, md, memmap.desc_size); - *new += memmap.desc_size; - - return 1; -} - /* - * Switch UEFI from an identity map to a kernel virtual map + * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., + * non-early mapping of the UEFI system table and virtual mappings for all + * EFI_MEMORY_RUNTIME regions. */ -static int __init arm64_enter_virtual_mode(void) +static int __init arm64_enable_runtime_services(void) { - efi_memory_desc_t *md; - phys_addr_t virtmap_phys; - void *virtmap, *virt_md; - efi_status_t status; u64 mapsize; - int count = 0; - unsigned long flags; if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); @@ -395,81 +394,30 @@ static int __init arm64_enter_virtual_mode(void) efi.memmap = &memmap; - /* Map the runtime regions */ - virtmap = kmalloc(mapsize, GFP_KERNEL); - if (!virtmap) { - pr_err("Failed to allocate EFI virtual memmap\n"); - return -1; - } - virtmap_phys = virt_to_phys(virtmap); - virt_md = virtmap; - - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (!remap_region(md, &virt_md)) - goto err_unmap; - ++count; - } - - efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + efi.systab = (__force void *)ioremap_cache(efi_system_table, + sizeof(efi_system_table_t)); if (!efi.systab) { - /* - * If we have no virtual mapping for the System Table at this - * point, the memory map doesn't cover the physical offset where - * it resides. This means the System Table will be inaccessible - * to Runtime Services themselves once the virtual mapping is - * installed. - */ - pr_err("Failed to remap EFI System Table -- buggy firmware?\n"); - goto err_unmap; + pr_err("Failed to remap EFI System Table\n"); + return -1; } set_bit(EFI_SYSTEM_TABLES, &efi.flags); - local_irq_save(flags); - cpu_switch_mm(idmap_pg_dir, &init_mm); - - /* Call SetVirtualAddressMap with the physical address of the map */ - runtime = efi.systab->runtime; - efi.set_virtual_address_map = runtime->set_virtual_address_map; - - status = efi.set_virtual_address_map(count * memmap.desc_size, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)virtmap_phys); - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - local_irq_restore(flags); - - kfree(virtmap); - free_boot_services(); - if (status != EFI_SUCCESS) { - pr_err("Failed to set EFI virtual address map! [%lx]\n", - status); + if (!efi_enabled(EFI_VIRTMAP)) { + pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); return -1; } /* Set up runtime services function pointers */ - runtime = efi.systab->runtime; efi_native_runtime_setup(); set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi.runtime_version = efi.systab->hdr.revision; return 0; - -err_unmap: - /* unmap all mappings that succeeded: there are 'count' of those */ - for (virt_md = virtmap; count--; virt_md += memmap.desc_size) { - md = virt_md; - iounmap((__force void __iomem *)md->virt_addr); - } - kfree(virtmap); - return -1; } -early_initcall(arm64_enter_virtual_mode); +early_initcall(arm64_enable_runtime_services); static int __init arm64_dmi_init(void) { @@ -484,3 +432,79 @@ static int __init arm64_dmi_init(void) return 0; } core_initcall(arm64_dmi_init); + +static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .pgd = efi_pgd, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + INIT_MM_CONTEXT(efi_mm) +}; + +static void efi_set_pgd(struct mm_struct *mm) +{ + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + if (icache_is_aivivt()) + __flush_icache_all(); +} + +void efi_virtmap_load(void) +{ + preempt_disable(); + efi_set_pgd(&efi_mm); +} + +void efi_virtmap_unload(void) +{ + efi_set_pgd(current->active_mm); + preempt_enable(); +} + +void __init efi_virtmap_init(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_BOOT)) + return; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + pgprot_t prot; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (WARN(md->virt_addr == 0, + "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n", + md->phys_addr)) + return; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + pr_info(" EFI remap 0x%016llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + /* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. + */ + if (!is_normal_ram(md)) + prot = __pgprot(PROT_DEVICE_nGnRE); + else if (md->type == EFI_RUNTIME_SERVICES_CODE) + prot = PAGE_KERNEL_EXEC; + else + prot = PAGE_KERNEL; + + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); + } + set_bit(EFI_VIRTMAP, &efi.flags); +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 20fe2932ad0c..beac8188fdbd 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -401,6 +401,7 @@ void __init setup_arch(char **cmdline_p) paging_init(); request_standard_resources(); + efi_virtmap_init(); efi_idmap_init(); early_ioremap_reset(); diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index eb48a1a1a576..e2432b39b6df 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -295,3 +295,62 @@ fail_free_image: fail: return EFI_ERROR; } + +/* + * This is the base address at which to start allocating virtual memory ranges + * for UEFI Runtime Services. This is in the low TTBR0 range so that we can use + * any allocation we choose, and eliminate the risk of a conflict after kexec. + * The value chosen is the largest non-zero power of 2 suitable for this purpose + * both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can + * be mapped efficiently. + */ +#define EFI_RT_VIRTUAL_BASE 0x40000000 + +/* + * efi_get_virtmap() - create a virtual mapping for the EFI memory map + * + * This function populates the virt_addr fields of all memory region descriptors + * in @memory_map whose EFI_MEMORY_RUNTIME attribute is set. Those descriptors + * are also copied to @runtime_map, and their total count is returned in @count. + */ +void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, + unsigned long desc_size, efi_memory_desc_t *runtime_map, + int *count) +{ + u64 efi_virt_base = EFI_RT_VIRTUAL_BASE; + efi_memory_desc_t *out = runtime_map; + int l; + + for (l = 0; l < map_size; l += desc_size) { + efi_memory_desc_t *in = (void *)memory_map + l; + u64 paddr, size; + + if (!(in->attribute & EFI_MEMORY_RUNTIME)) + continue; + + /* + * Make the mapping compatible with 64k pages: this allows + * a 4k page size kernel to kexec a 64k page size kernel and + * vice versa. + */ + paddr = round_down(in->phys_addr, SZ_64K); + size = round_up(in->num_pages * EFI_PAGE_SIZE + + in->phys_addr - paddr, SZ_64K); + + /* + * Avoid wasting memory on PTEs by choosing a virtual base that + * is compatible with section mappings if this region has the + * appropriate size and physical alignment. (Sections are 2 MB + * on 4k granule kernels) + */ + if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M) + efi_virt_base = round_up(efi_virt_base, SZ_2M); + + in->virt_addr = efi_virt_base + in->phys_addr - paddr; + efi_virt_base += size; + + memcpy(out, in, desc_size); + out = (void *)out + desc_size; + ++*count; + } +} diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 304ab295ca1a..2be10984a67a 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -39,4 +39,8 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, void *get_fdt(efi_system_table_t *sys_table); +void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, + unsigned long desc_size, efi_memory_desc_t *runtime_map, + int *count); + #endif diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index c846a9608cbd..91da56c4fd54 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -14,6 +14,8 @@ #include #include +#include "efistub.h" + efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, unsigned long orig_fdt_size, void *fdt, int new_fdt_size, char *cmdline_ptr, @@ -193,9 +195,26 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, unsigned long map_size, desc_size; u32 desc_ver; unsigned long mmap_key; - efi_memory_desc_t *memory_map; + efi_memory_desc_t *memory_map, *runtime_map; unsigned long new_fdt_size; efi_status_t status; + int runtime_entry_count = 0; + + /* + * Get a copy of the current memory map that we will use to prepare + * the input for SetVirtualAddressMap(). We don't have to worry about + * subsequent allocations adding entries, since they could not affect + * the number of EFI_MEMORY_RUNTIME regions. + */ + status = efi_get_memory_map(sys_table, &runtime_map, &map_size, + &desc_size, &desc_ver, &mmap_key); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n"); + return status; + } + + pr_efi(sys_table, + "Exiting boot services and installing virtual address map...\n"); /* * Estimate size of new FDT, and allocate memory for it. We @@ -248,12 +267,48 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, } } + /* + * Update the memory map with virtual addresses. The function will also + * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME + * entries so that we can pass it straight into SetVirtualAddressMap() + */ + efi_get_virtmap(memory_map, map_size, desc_size, runtime_map, + &runtime_entry_count); + /* Now we are ready to exit_boot_services.*/ status = sys_table->boottime->exit_boot_services(handle, mmap_key); + if (status == EFI_SUCCESS) { + efi_set_virtual_address_map_t *svam; - if (status == EFI_SUCCESS) - return status; + /* Install the new virtual address map */ + svam = sys_table->runtime->set_virtual_address_map; + status = svam(runtime_entry_count * desc_size, desc_size, + desc_ver, runtime_map); + + /* + * We are beyond the point of no return here, so if the call to + * SetVirtualAddressMap() failed, we need to signal that to the + * incoming kernel but proceed normally otherwise. + */ + if (status != EFI_SUCCESS) { + int l; + + /* + * Set the virtual address field of all + * EFI_MEMORY_RUNTIME entries to 0. This will signal + * the incoming kernel that no virtual translation has + * been installed. + */ + for (l = 0; l < map_size; l += desc_size) { + efi_memory_desc_t *p = (void *)memory_map + l; + + if (p->attribute & EFI_MEMORY_RUNTIME) + p->virt_addr = 0; + } + } + return EFI_SUCCESS; + } pr_efi_err(sys_table, "Exit boot services failed.\n"); @@ -264,6 +319,7 @@ fail_free_new_fdt: efi_free(sys_table, new_fdt_size, *new_fdt_addr); fail: + sys_table->boottime->free_pool(runtime_map); return EFI_LOAD_ERROR; } From 3033b84596eaec0093b68c5711c265738eb0745d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 16:35:21 +0200 Subject: [PATCH 07/47] arm64/efi: remove free_boot_services() and friends Now that we are calling SetVirtualAddressMap() from the stub, there is no need to reserve boot-only memory regions, which implies that there is also no reason to free them again later. Acked-by: Leif Lindholm Acked-by: Will Deacon Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi.c | 123 +--------------------------------------- 1 file changed, 1 insertion(+), 122 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 755e545144ea..4a5d7343dddd 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -199,9 +199,7 @@ static __init void reserve_regions(void) if (is_normal_ram(md)) early_init_dt_add_memory_arch(paddr, size); - if (is_reserve_region(md) || - md->type == EFI_BOOT_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_DATA) { + if (is_reserve_region(md)) { memblock_reserve(paddr, size); if (uefi_debug) pr_cont("*"); @@ -214,123 +212,6 @@ static __init void reserve_regions(void) set_bit(EFI_MEMMAP, &efi.flags); } - -static u64 __init free_one_region(u64 start, u64 end) -{ - u64 size = end - start; - - if (uefi_debug) - pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); - - free_bootmem_late(start, size); - return size; -} - -static u64 __init free_region(u64 start, u64 end) -{ - u64 map_start, map_end, total = 0; - - if (end <= start) - return total; - - map_start = (u64)memmap.phys_map; - map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); - map_start &= PAGE_MASK; - - if (start < map_end && end > map_start) { - /* region overlaps UEFI memmap */ - if (start < map_start) - total += free_one_region(start, map_start); - - if (map_end < end) - total += free_one_region(map_end, end); - } else - total += free_one_region(start, end); - - return total; -} - -static void __init free_boot_services(void) -{ - u64 total_freed = 0; - u64 keep_end, free_start, free_end; - efi_memory_desc_t *md; - - /* - * If kernel uses larger pages than UEFI, we have to be careful - * not to inadvertantly free memory we want to keep if there is - * overlap at the kernel page size alignment. We do not want to - * free is_reserve_region() memory nor the UEFI memmap itself. - * - * The memory map is sorted, so we keep track of the end of - * any previous region we want to keep, remember any region - * we want to free and defer freeing it until we encounter - * the next region we want to keep. This way, before freeing - * it, we can clip it as needed to avoid freeing memory we - * want to keep for UEFI. - */ - - keep_end = 0; - free_start = 0; - - for_each_efi_memory_desc(&memmap, md) { - u64 paddr, npages, size; - - if (is_reserve_region(md)) { - /* - * We don't want to free any memory from this region. - */ - if (free_start) { - /* adjust free_end then free region */ - if (free_end > md->phys_addr) - free_end -= PAGE_SIZE; - total_freed += free_region(free_start, free_end); - free_start = 0; - } - keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - continue; - } - - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) { - /* no need to free this region */ - continue; - } - - /* - * We want to free memory from this region. - */ - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (free_start) { - if (paddr <= free_end) - free_end = paddr + size; - else { - total_freed += free_region(free_start, free_end); - free_start = paddr; - free_end = paddr + size; - } - } else { - free_start = paddr; - free_end = paddr + size; - } - if (free_start < keep_end) { - free_start += PAGE_SIZE; - if (free_start >= free_end) - free_start = 0; - } - } - if (free_start) - total_freed += free_region(free_start, free_end); - - if (total_freed) - pr_info("Freed 0x%llx bytes of EFI boot services memory", - total_freed); -} - void __init efi_init(void) { struct efi_fdt_params params; @@ -402,8 +283,6 @@ static int __init arm64_enable_runtime_services(void) } set_bit(EFI_SYSTEM_TABLES, &efi.flags); - free_boot_services(); - if (!efi_enabled(EFI_VIRTMAP)) { pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); return -1; From 9679be103108926cfe9e6fd2f6829cefa77e47b0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 16:41:38 +0200 Subject: [PATCH 08/47] arm64/efi: remove idmap manipulations from UEFI code Now that we have moved the call to SetVirtualAddressMap() to the stub, UEFI has no use for the ID map, so we can drop the code that installs ID mappings for UEFI memory regions. Acked-by: Leif Lindholm Acked-by: Will Deacon Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/efi.h | 2 -- arch/arm64/include/asm/mmu.h | 2 -- arch/arm64/kernel/efi.c | 32 +------------------------------- arch/arm64/kernel/setup.c | 1 - arch/arm64/mm/mmu.c | 12 ------------ 5 files changed, 1 insertion(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index effef3713c5a..7baf2cc04e1e 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -6,11 +6,9 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efi_idmap_init(void); extern void efi_virtmap_init(void); #else #define efi_init() -#define efi_idmap_init() #define efi_virtmap_init() #endif diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 5fd40c43be80..3d311761e3c2 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -31,8 +31,6 @@ extern void paging_init(void); extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); -/* create an identity mapping for memory (or io if map_io is true) */ -extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4a5d7343dddd..a98415b5979c 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -54,27 +54,6 @@ static int __init is_normal_ram(efi_memory_desc_t *md) return 0; } -static void __init efi_setup_idmap(void) -{ - struct memblock_region *r; - efi_memory_desc_t *md; - u64 paddr, npages, size; - - for_each_memblock(memory, r) - create_id_mapping(r->base, r->size, 0); - - /* map runtime io spaces */ - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) - continue; - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - create_id_mapping(paddr, size, 1); - } -} - /* * Translate a EFI virtual address into a physical address: this is necessary, * as some data members of the EFI system table are virtually remapped after @@ -236,16 +215,6 @@ void __init efi_init(void) reserve_regions(); } -void __init efi_idmap_init(void) -{ - if (!efi_enabled(EFI_BOOT)) - return; - - /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ - efi_setup_idmap(); - early_memunmap(memmap.map, memmap.map_end - memmap.map); -} - /* * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., * non-early mapping of the UEFI system table and virtual mappings for all @@ -386,4 +355,5 @@ void __init efi_virtmap_init(void) create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); } set_bit(EFI_VIRTMAP, &efi.flags); + early_memunmap(memmap.map, memmap.map_end - memmap.map); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index beac8188fdbd..199d1b7809d7 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -402,7 +402,6 @@ void __init setup_arch(char **cmdline_p) request_standard_resources(); efi_virtmap_init(); - efi_idmap_init(); early_ioremap_reset(); unflatten_device_tree(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3f3d5aa4a8b1..328638548871 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -271,18 +271,6 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, size, PAGE_KERNEL_EXEC); } -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) -{ - if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) { - pr_warn("BUG: not creating id mapping for %pa\n", &addr); - return; - } - __create_mapping(&init_mm, &idmap_pg_dir[pgd_index(addr)], - addr, addr, size, - map_io ? __pgprot(PROT_DEVICE_nGnRE) - : PAGE_KERNEL_EXEC); -} - void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) From 26a945caf381225c9a1e68f14826a884c08ea9cb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Jan 2015 17:07:47 +0000 Subject: [PATCH 09/47] arm64: remove broken cachepolicy code The cachepolicy kernel parameter was intended to aid in the debugging of coherency issues, but it is fundamentally broken for several reasons: * On SMP platforms, only the boot CPU's tcr_el1 is altered. Secondary CPUs may therefore use differ w.r.t. the attributes they apply to MT_NORMAL memory, resulting in a loss of coherency. * The cache maintenance using flush_dcache_all (based on Set/Way operations) is not guaranteed to empty a given CPU's cache hierarchy while said CPU has caches enabled, it cannot empty the caches of other coherent PEs, nor is it guaranteed to flush data to the PoC even when caches are disabled. * The TLBs are not invalidated around the modification of MAIR_EL1 and TCR_EL1, as required by the architecture (as both are permitted to be cached in a TLB). This may result in CPUs using attributes other than those expected for some memory accesses, resulting in a loss of coherency. * Exclusive accesses are not architecturally guaranteed to function as expected on memory marked as Write-Through or Non-Cacheable. Thus changing the attributes of MT_NORMAL away from the (architecurally safe) defaults may cause uses of these instructions (e.g. atomics) to behave erratically. Given this, the cachepolicy code cannot be used for debugging purposes as it alone is likely to cause coherency issues. This patch removes the broken cachepolicy code. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 74 --------------------------------------------- 1 file changed, 74 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 328638548871..e57c170a91f3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -45,80 +45,6 @@ struct page *empty_zero_page; EXPORT_SYMBOL(empty_zero_page); -struct cachepolicy { - const char policy[16]; - u64 mair; - u64 tcr; -}; - -static struct cachepolicy cache_policies[] __initdata = { - { - .policy = "uncached", - .mair = 0x44, /* inner, outer non-cacheable */ - .tcr = TCR_IRGN_NC | TCR_ORGN_NC, - }, { - .policy = "writethrough", - .mair = 0xaa, /* inner, outer write-through, read-allocate */ - .tcr = TCR_IRGN_WT | TCR_ORGN_WT, - }, { - .policy = "writeback", - .mair = 0xee, /* inner, outer write-back, read-allocate */ - .tcr = TCR_IRGN_WBnWA | TCR_ORGN_WBnWA, - } -}; - -/* - * These are useful for identifying cache coherency problems by allowing the - * cache or the cache and writebuffer to be turned off. It changes the Normal - * memory caching attributes in the MAIR_EL1 register. - */ -static int __init early_cachepolicy(char *p) -{ - int i; - u64 tmp; - - for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { - int len = strlen(cache_policies[i].policy); - - if (memcmp(p, cache_policies[i].policy, len) == 0) - break; - } - if (i == ARRAY_SIZE(cache_policies)) { - pr_err("ERROR: unknown or unsupported cache policy: %s\n", p); - return 0; - } - - flush_cache_all(); - - /* - * Modify MT_NORMAL attributes in MAIR_EL1. - */ - asm volatile( - " mrs %0, mair_el1\n" - " bfi %0, %1, %2, #8\n" - " msr mair_el1, %0\n" - " isb\n" - : "=&r" (tmp) - : "r" (cache_policies[i].mair), "i" (MT_NORMAL * 8)); - - /* - * Modify TCR PTW cacheability attributes. - */ - asm volatile( - " mrs %0, tcr_el1\n" - " bic %0, %0, %2\n" - " orr %0, %0, %1\n" - " msr tcr_el1, %0\n" - " isb\n" - : "=&r" (tmp) - : "r" (cache_policies[i].tcr), "r" (TCR_IRGN_MASK | TCR_ORGN_MASK)); - - flush_cache_all(); - - return 0; -} -early_param("cachepolicy", early_cachepolicy); - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { From 5d425c18653731af62831d30a4fa023d532657a9 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 8 Jan 2015 10:42:34 +0000 Subject: [PATCH 10/47] arm64: kernel: add support for cpu cache information This patch adds support for cacheinfo on ARM64. On ARMv8, the cache hierarchy can be identified through Cache Level ID (CLIDR) register while the cache geometry is provided by Cache Size ID (CCSIDR) register. Since the architecture doesn't provide any way of detecting the cpus sharing particular cache, device tree is used for the same purpose. Signed-off-by: Sudeep Holla Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cachetype.h | 29 +++++-- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/cacheinfo.c | 128 +++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 12 --- 4 files changed, 152 insertions(+), 19 deletions(-) create mode 100644 arch/arm64/kernel/cacheinfo.c diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h index 4c631a0a3609..da2fc9e3cedd 100644 --- a/arch/arm64/include/asm/cachetype.h +++ b/arch/arm64/include/asm/cachetype.h @@ -39,24 +39,41 @@ extern unsigned long __icache_flags; +/* + * NumSets, bits[27:13] - (Number of sets in cache) - 1 + * Associativity, bits[12:3] - (Associativity of cache) - 1 + * LineSize, bits[2:0] - (Log2(Number of words in cache line)) - 2 + */ +#define CCSIDR_EL1_WRITE_THROUGH BIT(31) +#define CCSIDR_EL1_WRITE_BACK BIT(30) +#define CCSIDR_EL1_READ_ALLOCATE BIT(29) +#define CCSIDR_EL1_WRITE_ALLOCATE BIT(28) #define CCSIDR_EL1_LINESIZE_MASK 0x7 #define CCSIDR_EL1_LINESIZE(x) ((x) & CCSIDR_EL1_LINESIZE_MASK) - +#define CCSIDR_EL1_ASSOCIATIVITY_SHIFT 3 +#define CCSIDR_EL1_ASSOCIATIVITY_MASK 0x3ff +#define CCSIDR_EL1_ASSOCIATIVITY(x) \ + (((x) >> CCSIDR_EL1_ASSOCIATIVITY_SHIFT) & CCSIDR_EL1_ASSOCIATIVITY_MASK) #define CCSIDR_EL1_NUMSETS_SHIFT 13 -#define CCSIDR_EL1_NUMSETS_MASK (0x7fff << CCSIDR_EL1_NUMSETS_SHIFT) +#define CCSIDR_EL1_NUMSETS_MASK 0x7fff #define CCSIDR_EL1_NUMSETS(x) \ - (((x) & CCSIDR_EL1_NUMSETS_MASK) >> CCSIDR_EL1_NUMSETS_SHIFT) + (((x) >> CCSIDR_EL1_NUMSETS_SHIFT) & CCSIDR_EL1_NUMSETS_MASK) -extern u64 __attribute_const__ icache_get_ccsidr(void); +#define CACHE_LINESIZE(x) (16 << CCSIDR_EL1_LINESIZE(x)) +#define CACHE_NUMSETS(x) (CCSIDR_EL1_NUMSETS(x) + 1) +#define CACHE_ASSOCIATIVITY(x) (CCSIDR_EL1_ASSOCIATIVITY(x) + 1) +extern u64 __attribute_const__ cache_get_ccsidr(u64 csselr); + +/* Helpers for Level 1 Instruction cache csselr = 1L */ static inline int icache_get_linesize(void) { - return 16 << CCSIDR_EL1_LINESIZE(icache_get_ccsidr()); + return CACHE_LINESIZE(cache_get_ccsidr(1L)); } static inline int icache_get_numsets(void) { - return 1 + CCSIDR_EL1_NUMSETS(icache_get_ccsidr()); + return CACHE_NUMSETS(cache_get_ccsidr(1L)); } /* diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index eaa77ed7766a..77d3d9568333 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -16,7 +16,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ - cpuinfo.o cpu_errata.o alternative.o + cpuinfo.o cpu_errata.o alternative.o cacheinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o \ diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c new file mode 100644 index 000000000000..b8629d52fba9 --- /dev/null +++ b/arch/arm64/kernel/cacheinfo.c @@ -0,0 +1,128 @@ +/* + * ARM64 cacheinfo support + * + * Copyright (C) 2015 ARM Ltd. + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define MAX_CACHE_LEVEL 7 /* Max 7 level supported */ +/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ +#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) +#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) +#define CLIDR_CTYPE(clidr, level) \ + (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) + +static inline enum cache_type get_cache_type(int level) +{ + u64 clidr; + + if (level > MAX_CACHE_LEVEL) + return CACHE_TYPE_NOCACHE; + asm volatile ("mrs %x0, clidr_el1" : "=r" (clidr)); + return CLIDR_CTYPE(clidr, level); +} + +/* + * Cache Size Selection Register(CSSELR) selects which Cache Size ID + * Register(CCSIDR) is accessible by specifying the required cache + * level and the cache type. We need to ensure that no one else changes + * CSSELR by calling this in non-preemtible context + */ +u64 __attribute_const__ cache_get_ccsidr(u64 csselr) +{ + u64 ccsidr; + + WARN_ON(preemptible()); + + /* Put value into CSSELR */ + asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + isb(); + /* Read result out of CCSIDR */ + asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr)); + + return ccsidr; +} + +static void ci_leaf_init(struct cacheinfo *this_leaf, + enum cache_type type, unsigned int level) +{ + bool is_icache = type & CACHE_TYPE_INST; + u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache); + + this_leaf->level = level; + this_leaf->type = type; + this_leaf->coherency_line_size = CACHE_LINESIZE(tmp); + this_leaf->number_of_sets = CACHE_NUMSETS(tmp); + this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp); + this_leaf->size = this_leaf->number_of_sets * + this_leaf->coherency_line_size * this_leaf->ways_of_associativity; + this_leaf->attributes = + ((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) | + ((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) | + ((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) | + ((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0); +} + +static int __init_cache_level(unsigned int cpu) +{ + unsigned int ctype, level, leaves; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + + for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { + ctype = get_cache_type(level); + if (ctype == CACHE_TYPE_NOCACHE) { + level--; + break; + } + /* Separate instruction and data caches */ + leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; + } + + this_cpu_ci->num_levels = level; + this_cpu_ci->num_leaves = leaves; + return 0; +} + +static int __populate_cache_leaves(unsigned int cpu) +{ + unsigned int level, idx; + enum cache_type type; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + + for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && + idx < this_cpu_ci->num_leaves; idx++, level++) { + type = get_cache_type(level); + if (type == CACHE_TYPE_SEPARATE) { + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, type, level); + } + } + return 0; +} + +DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) +DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 07d435cf2eea..49782282a027 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -231,15 +231,3 @@ void __init cpuinfo_store_boot_cpu(void) boot_cpu_data = *info; } - -u64 __attribute_const__ icache_get_ccsidr(void) -{ - u64 ccsidr; - - WARN_ON(preemptible()); - - /* Select L1 I-cache and read its size ID register */ - asm("msr csselr_el1, %1; isb; mrs %0, ccsidr_el1" - : "=r"(ccsidr) : "r"(1L)); - return ccsidr; -} From cf99a48dce66b126391bb33c7709892d3d8002d7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 12:03:32 +0000 Subject: [PATCH 11/47] arm64: introduce common ESR_ELx_* definitions Currently we have separate ESR_EL{1,2}_* macros, despite the fact that the encodings are common. While encodings are architected to refer to the current EL or a lower EL, the macros refer to particular ELs (e.g. ESR_ELx_EC_DABT_EL0). Having these duplicate definitions is redundant, and their naming is misleading. This patch introduces common ESR_ELx_* macros that can be used in all cases, in preparation for later patches which will migrate existing users over. Some additional cleanups are made in the process: * Suffixes for particular exception levelts (e.g. _EL0, _EL1) are replaced with more general _LOW and _CUR suffixes, matching the architectural intent. * ESR_ELx_EC_WFx, rather than ESR_ELx_EC_WFI is introduced, as this EC encoding covers traps from both WFE and WFI. Similarly, ESR_ELx_WFx_ISS_WFE rather than ESR_ELx_EC_WFI_ISS_WFE is introduced. * Multi-bit fields are given consistently named _SHIFT and _MASK macros. * UL() is used for compatiblity with assembly files. * Comments are added for currently unallocated ESR_ELx.EC encodings. For fields other than ESR_ELx.EC, macros are only implemented for fields for which there is already an ESR_EL{1,2}_* macro. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/esr.h | 79 ++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 72674f4c3871..0fd1b0e15ea8 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -54,4 +54,83 @@ #define ESR_EL1_EC_BKPT32 (0x38) #define ESR_EL1_EC_BRK64 (0x3C) +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) +/* Unallocated EC: 0x09 - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) +#define ESR_ELx_EC_SMC32 (0x13) +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) +#define ESR_ELx_EC_SMC64 (0x17) +#define ESR_ELx_EC_SYS64 (0x18) +/* Unallocated EC: 0x19 - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) +/* Unallocted EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) + +#define ESR_ELx_IL (UL(1) << 25) +#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISV (UL(1) << 24) +#define ESR_ELx_SAS_SHIFT (22) +#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) +#define ESR_ELx_SSE (UL(1) << 21) +#define ESR_ELx_SRT_SHIFT (16) +#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) +#define ESR_ELx_SF (UL(1) << 15) +#define ESR_ELx_AR (UL(1) << 14) +#define ESR_ELx_EA (UL(1) << 9) +#define ESR_ELx_CM (UL(1) << 8) +#define ESR_ELx_S1PTW (UL(1) << 7) +#define ESR_ELx_WNR (UL(1) << 6) +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_CV (UL(1) << 24) +#define ESR_ELx_COND_SHIFT (20) +#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) + #endif /* __ASM_ESR_H */ From aed40e0144bdbdadb45f2b623e07aa3157eb74c9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 12:31:40 +0000 Subject: [PATCH 12/47] arm64: move to ESR_ELx macros Now that we have common ESR_ELx_* macros, move the core arm64 code over to them. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/kernel/entry.S | 64 ++++++++++++++++++------------------ arch/arm64/kernel/signal32.c | 2 +- arch/arm64/mm/fault.c | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index fd4fa374e5d2..02e6af117762 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -269,18 +269,18 @@ ENDPROC(el1_error_invalid) el1_sync: kernel_entry 1 mrs x1, esr_el1 // read the syndrome register - lsr x24, x1, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_DABT_EL1 // data abort in EL1 + lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 b.eq el1_da - cmp x24, #ESR_EL1_EC_SYS64 // configurable trap + cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el1_undef - cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception + cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el1_sp_pc - cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception b.eq el1_sp_pc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL1 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 b.eq el1_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL1 // debug exception in EL1 + cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 b.ge el1_dbg b el1_inv el1_da: @@ -318,7 +318,7 @@ el1_dbg: /* * Debug exception handling */ - cmp x24, #ESR_EL1_EC_BRK64 // if BRK64 + cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only mrs x0, far_el1 @@ -375,26 +375,26 @@ el1_preempt: el0_sync: kernel_entry 0 mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state + lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state b.eq el0_svc - cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 + cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 b.eq el0_da - cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 + cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 b.eq el0_ia - cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access + cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access b.eq el0_fpsimd_acc - cmp x24, #ESR_EL1_EC_FP_EXC64 // FP/ASIMD exception + cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc - cmp x24, #ESR_EL1_EC_SYS64 // configurable trap + cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception + cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc - cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception b.eq el0_sp_pc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 + cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 b.ge el0_dbg b el0_inv @@ -403,30 +403,30 @@ el0_sync: el0_sync_compat: kernel_entry 0, 32 mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state + lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state b.eq el0_svc_compat - cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 + cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 b.eq el0_da - cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 + cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 b.eq el0_ia - cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access + cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access b.eq el0_fpsimd_acc - cmp x24, #ESR_EL1_EC_FP_EXC32 // FP/ASIMD exception + cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP15_32 // CP15 MRC/MCR trap + cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP15_64 // CP15 MRRC/MCRR trap + cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_MR // CP14 MRC/MCR trap + cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_LS // CP14 LDC/STC trap + cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_64 // CP14 MRRC/MCRR trap + cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 + cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 b.ge el0_dbg b el0_inv el0_svc_compat: diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 5a1ba6e80d4e..192d900c058f 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -501,7 +501,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); /* set the compat FSR WnR */ - __put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) << + __put_user_error(!!(current->thread.fault_code & ESR_ELx_WNR) << FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err); __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c11cd27ca8f5..96da13167d4a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -219,7 +219,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (esr & ESR_LNX_EXEC) { vm_flags = VM_EXEC; - } else if ((esr & ESR_EL1_WRITE) && !(esr & ESR_EL1_CM)) { + } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } From 60a1f02c9e91e0796b54e83b14fb8a07f7a568b6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 18 Nov 2014 12:16:30 +0000 Subject: [PATCH 13/47] arm64: decode ESR_ELx.EC when reporting exceptions To aid the developer when something triggers an unexpected exception, decode the ESR_ELx.EC field when logging an ESR_ELx value. This doesn't tell the developer the specifics of the exception encoded in the remaining IL and ISS bits, but it can be helpful to distinguish between exception classes (e.g. SError and a data abort) without having to manually decode the field, which can be tiresome. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/esr.h | 6 +++++ arch/arm64/kernel/traps.c | 50 ++++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 0fd1b0e15ea8..c315543d50f9 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -133,4 +133,10 @@ #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) +#ifndef __ASSEMBLY__ +#include + +const char *esr_get_class_string(u32 esr); +#endif /* __ASSEMBLY */ + #endif /* __ASM_ESR_H */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0a801e3743d5..1ef2940df13c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -373,6 +374,51 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) return sys_ni_syscall(); } +static const char *esr_class_str[] = { + [0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC", + [ESR_ELx_EC_UNKNOWN] = "Unknown/Uncategorized", + [ESR_ELx_EC_WFx] = "WFI/WFE", + [ESR_ELx_EC_CP15_32] = "CP15 MCR/MRC", + [ESR_ELx_EC_CP15_64] = "CP15 MCRR/MRRC", + [ESR_ELx_EC_CP14_MR] = "CP14 MCR/MRC", + [ESR_ELx_EC_CP14_LS] = "CP14 LDC/STC", + [ESR_ELx_EC_FP_ASIMD] = "ASIMD", + [ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS", + [ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC", + [ESR_ELx_EC_ILL] = "PSTATE.IL", + [ESR_ELx_EC_SVC32] = "SVC (AArch32)", + [ESR_ELx_EC_HVC32] = "HVC (AArch32)", + [ESR_ELx_EC_SMC32] = "SMC (AArch32)", + [ESR_ELx_EC_SVC64] = "SVC (AArch64)", + [ESR_ELx_EC_HVC64] = "HVC (AArch64)", + [ESR_ELx_EC_SMC64] = "SMC (AArch64)", + [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", + [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", + [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", + [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", + [ESR_ELx_EC_PC_ALIGN] = "PC Alignment", + [ESR_ELx_EC_DABT_LOW] = "DABT (lower EL)", + [ESR_ELx_EC_DABT_CUR] = "DABT (current EL)", + [ESR_ELx_EC_SP_ALIGN] = "SP Alignment", + [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", + [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", + [ESR_ELx_EC_SERROR] = "SError", + [ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)", + [ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)", + [ESR_ELx_EC_SOFTSTP_LOW] = "Software Step (lower EL)", + [ESR_ELx_EC_SOFTSTP_CUR] = "Software Step (current EL)", + [ESR_ELx_EC_WATCHPT_LOW] = "Watchpoint (lower EL)", + [ESR_ELx_EC_WATCHPT_CUR] = "Watchpoint (current EL)", + [ESR_ELx_EC_BKPT32] = "BKPT (AArch32)", + [ESR_ELx_EC_VECTOR32] = "Vector catch (AArch32)", + [ESR_ELx_EC_BRK64] = "BRK (AArch64)", +}; + +const char *esr_get_class_string(u32 esr) +{ + return esr_class_str[esr >> ESR_ELx_EC_SHIFT]; +} + /* * bad_mode handles the impossible case in the exception vector. */ @@ -382,8 +428,8 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __user *pc = (void __user *)instruction_pointer(regs); console_verbose(); - pr_crit("Bad mode in %s handler detected, code 0x%08x\n", - handler[reason], esr); + pr_crit("Bad mode in %s handler detected, code 0x%08x -- %s\n", + handler[reason], esr, esr_get_class_string(esr)); __show_regs(regs); info.si_signo = SIGILL; From c6d01a947a51193e839516165286bc8d14a0e409 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 13:59:30 +0000 Subject: [PATCH 14/47] arm64: kvm: move to ESR_ELx macros Now that we have common ESR_ELx macros, make use of them in the arm64 KVM code. The addition of to the include path highlighted badly ordered (i.e. not alphabetical) include lists; these are changed to alphabetical order. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Christoffer Dall Cc: Catalin Marinas Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/kvm_emulate.h | 28 +++++++++++++----------- arch/arm64/kvm/emulate.c | 5 +++-- arch/arm64/kvm/handle_exit.c | 32 +++++++++++++++------------- arch/arm64/kvm/hyp.S | 17 ++++++++------- arch/arm64/kvm/inject_fault.c | 14 ++++++------ arch/arm64/kvm/sys_regs.c | 23 +++++++++++--------- 6 files changed, 64 insertions(+), 55 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 8127e45e2637..5c56c0d2cef1 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -23,8 +23,10 @@ #define __ARM64_KVM_EMULATE_H__ #include -#include + +#include #include +#include #include #include @@ -128,63 +130,63 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); } static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR); } static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { - return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT; + return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA); } static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); } static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { - return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT); + return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } /* This one is not specific to Data Abort */ static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); } static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT; + return kvm_vcpu_get_hsr(vcpu) >> ESR_ELx_EC_SHIFT; } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT; + return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW; } static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC; + return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; } static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; + return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; } static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c index 124418d17049..f87d8fbaa48d 100644 --- a/arch/arm64/kvm/emulate.c +++ b/arch/arm64/kvm/emulate.c @@ -22,6 +22,7 @@ */ #include +#include #include /* @@ -55,8 +56,8 @@ static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); - if (esr & ESR_EL2_CV) - return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT; + if (esr & ESR_ELx_CV) + return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; return -1; } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 34b8bd0711e9..bcbc923d3060 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -21,8 +21,10 @@ #include #include -#include + +#include #include +#include #include #include @@ -61,7 +63,7 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) + if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) kvm_vcpu_on_spin(vcpu); else kvm_vcpu_block(vcpu); @@ -72,19 +74,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) } static exit_handle_fn arm_exit_handlers[] = { - [ESR_EL2_EC_WFI] = kvm_handle_wfx, - [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, - [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, - [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_32, - [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, - [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_64, - [ESR_EL2_EC_HVC32] = handle_hvc, - [ESR_EL2_EC_SMC32] = handle_smc, - [ESR_EL2_EC_HVC64] = handle_hvc, - [ESR_EL2_EC_SMC64] = handle_smc, - [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, - [ESR_EL2_EC_IABT] = kvm_handle_guest_abort, - [ESR_EL2_EC_DABT] = kvm_handle_guest_abort, + [ESR_ELx_EC_WFx] = kvm_handle_wfx, + [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, + [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, + [ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32, + [ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store, + [ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64, + [ESR_ELx_EC_HVC32] = handle_hvc, + [ESR_ELx_EC_SMC32] = handle_smc, + [ESR_ELx_EC_HVC64] = handle_hvc, + [ESR_ELx_EC_SMC64] = handle_smc, + [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg, + [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort, + [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort, }; static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index fbe909fb0a1a..c0d820280a5e 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -17,15 +17,16 @@ #include -#include -#include #include +#include #include +#include #include #include -#include #include +#include #include +#include #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) @@ -1140,9 +1141,9 @@ el1_sync: // Guest trapped into EL2 push x2, x3 mrs x1, esr_el2 - lsr x2, x1, #ESR_EL2_EC_SHIFT + lsr x2, x1, #ESR_ELx_EC_SHIFT - cmp x2, #ESR_EL2_EC_HVC64 + cmp x2, #ESR_ELx_EC_HVC64 b.ne el1_trap mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest @@ -1177,13 +1178,13 @@ el1_trap: * x1: ESR * x2: ESR_EC */ - cmp x2, #ESR_EL2_EC_DABT - mov x0, #ESR_EL2_EC_IABT + cmp x2, #ESR_ELx_EC_DABT_LOW + mov x0, #ESR_ELx_EC_IABT_LOW ccmp x2, x0, #4, ne b.ne 1f // Not an abort we care about /* This is an abort. Check for permission fault */ - and x2, x1, #ESR_EL2_FSC_TYPE + and x2, x1, #ESR_ELx_FSC_TYPE cmp x2, #FSC_PERM b.ne 1f // Not a permission fault diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 81a02a8762b0..f02530e726f6 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -118,27 +118,27 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr * instruction set. Report an external synchronous abort. */ if (kvm_vcpu_trap_il_is32bit(vcpu)) - esr |= ESR_EL1_IL; + esr |= ESR_ELx_IL; /* * Here, the guest runs in AArch64 mode when in EL1. If we get * an AArch32 fault, it means we managed to trap an EL0 fault. */ if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) - esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT); + esr |= (ESR_ELx_EC_IABT_LOW << ESR_ELx_EC_SHIFT); else - esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT); + esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); if (!is_iabt) - esr |= ESR_EL1_EC_DABT_EL0; + esr |= ESR_ELx_EC_DABT_LOW; - vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT; + vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; } static void inject_undef64(struct kvm_vcpu *vcpu) { unsigned long cpsr = *vcpu_cpsr(vcpu); - u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT); + u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); @@ -151,7 +151,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) * set. */ if (kvm_vcpu_trap_il_is32bit(vcpu)) - esr |= ESR_EL1_IL; + esr |= ESR_ELx_IL; vcpu_sys_reg(vcpu, ESR_EL1) = esr; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3d7c2df89946..6b859d7a48e7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -20,17 +20,20 @@ * along with this program. If not, see . */ -#include #include +#include #include -#include -#include -#include -#include -#include + #include #include #include +#include +#include +#include +#include +#include +#include + #include #include "sys_regs.h" @@ -815,12 +818,12 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, int cp; switch(hsr_ec) { - case ESR_EL2_EC_CP15_32: - case ESR_EL2_EC_CP15_64: + case ESR_ELx_EC_CP15_32: + case ESR_ELx_EC_CP15_64: cp = 15; break; - case ESR_EL2_EC_CP14_MR: - case ESR_EL2_EC_CP14_64: + case ESR_ELx_EC_CP14_MR: + case ESR_ELx_EC_CP14_64: cp = 14; break; default: From 4a939087bd02ce38135a3924ad3099685abe4c5f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 14:03:52 +0000 Subject: [PATCH 15/47] arm64: remove ESR_EL1_* macros Now that all users have been moved over to the common ESR_ELx_* macros, remove the redundant ESR_EL1 macros. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/esr.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index c315543d50f9..62167090937d 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -18,42 +18,6 @@ #ifndef __ASM_ESR_H #define __ASM_ESR_H -#define ESR_EL1_WRITE (1 << 6) -#define ESR_EL1_CM (1 << 8) -#define ESR_EL1_IL (1 << 25) - -#define ESR_EL1_EC_SHIFT (26) -#define ESR_EL1_EC_UNKNOWN (0x00) -#define ESR_EL1_EC_WFI (0x01) -#define ESR_EL1_EC_CP15_32 (0x03) -#define ESR_EL1_EC_CP15_64 (0x04) -#define ESR_EL1_EC_CP14_MR (0x05) -#define ESR_EL1_EC_CP14_LS (0x06) -#define ESR_EL1_EC_FP_ASIMD (0x07) -#define ESR_EL1_EC_CP10_ID (0x08) -#define ESR_EL1_EC_CP14_64 (0x0C) -#define ESR_EL1_EC_ILL_ISS (0x0E) -#define ESR_EL1_EC_SVC32 (0x11) -#define ESR_EL1_EC_SVC64 (0x15) -#define ESR_EL1_EC_SYS64 (0x18) -#define ESR_EL1_EC_IABT_EL0 (0x20) -#define ESR_EL1_EC_IABT_EL1 (0x21) -#define ESR_EL1_EC_PC_ALIGN (0x22) -#define ESR_EL1_EC_DABT_EL0 (0x24) -#define ESR_EL1_EC_DABT_EL1 (0x25) -#define ESR_EL1_EC_SP_ALIGN (0x26) -#define ESR_EL1_EC_FP_EXC32 (0x28) -#define ESR_EL1_EC_FP_EXC64 (0x2C) -#define ESR_EL1_EC_SERROR (0x2F) -#define ESR_EL1_EC_BREAKPT_EL0 (0x30) -#define ESR_EL1_EC_BREAKPT_EL1 (0x31) -#define ESR_EL1_EC_SOFTSTP_EL0 (0x32) -#define ESR_EL1_EC_SOFTSTP_EL1 (0x33) -#define ESR_EL1_EC_WATCHPT_EL0 (0x34) -#define ESR_EL1_EC_WATCHPT_EL1 (0x35) -#define ESR_EL1_EC_BKPT32 (0x38) -#define ESR_EL1_EC_BRK64 (0x3C) - #define ESR_ELx_EC_UNKNOWN (0x00) #define ESR_ELx_EC_WFx (0x01) /* Unallocated EC: 0x02 */ From 6e53031ed840fc6d6ad4d4e5778eed5a50183f23 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 14:05:44 +0000 Subject: [PATCH 16/47] arm64: kvm: remove ESR_EL2_* macros Now that all users have been moved over to the common ESR_ELx_* macros, remove the redundant ESR_EL2 macros. To maintain compatibility with the fault handling code shared with 32-bit, the FSC_{FAULT,PERM} macros are retained as aliases for the common ESR_ELx_FSC_{FAULT,PERM} definitions. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Christoffer Dall Reviewed-by: Christoffer Dall Cc: Catalin Marinas Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 73 ++------------------------------ 1 file changed, 4 insertions(+), 69 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8afb863f5a9e..94674eb7e7bb 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -18,6 +18,7 @@ #ifndef __ARM64_KVM_ARM_H__ #define __ARM64_KVM_ARM_H__ +#include #include #include @@ -184,77 +185,11 @@ #define MDCR_EL2_TPMCR (1 << 5) #define MDCR_EL2_HPMN_MASK (0x1F) -/* Exception Syndrome Register (ESR) bits */ -#define ESR_EL2_EC_SHIFT (26) -#define ESR_EL2_EC (UL(0x3f) << ESR_EL2_EC_SHIFT) -#define ESR_EL2_IL (UL(1) << 25) -#define ESR_EL2_ISS (ESR_EL2_IL - 1) -#define ESR_EL2_ISV_SHIFT (24) -#define ESR_EL2_ISV (UL(1) << ESR_EL2_ISV_SHIFT) -#define ESR_EL2_SAS_SHIFT (22) -#define ESR_EL2_SAS (UL(3) << ESR_EL2_SAS_SHIFT) -#define ESR_EL2_SSE (1 << 21) -#define ESR_EL2_SRT_SHIFT (16) -#define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT) -#define ESR_EL2_SF (1 << 15) -#define ESR_EL2_AR (1 << 14) -#define ESR_EL2_EA (1 << 9) -#define ESR_EL2_CM (1 << 8) -#define ESR_EL2_S1PTW (1 << 7) -#define ESR_EL2_WNR (1 << 6) -#define ESR_EL2_FSC (0x3f) -#define ESR_EL2_FSC_TYPE (0x3c) - -#define ESR_EL2_CV_SHIFT (24) -#define ESR_EL2_CV (UL(1) << ESR_EL2_CV_SHIFT) -#define ESR_EL2_COND_SHIFT (20) -#define ESR_EL2_COND (UL(0xf) << ESR_EL2_COND_SHIFT) - - -#define FSC_FAULT (0x04) -#define FSC_PERM (0x0c) +/* For compatibility with fault code shared with 32-bit */ +#define FSC_FAULT ESR_ELx_FSC_FAULT +#define FSC_PERM ESR_ELx_FSC_PERM /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) -#define ESR_EL2_EC_UNKNOWN (0x00) -#define ESR_EL2_EC_WFI (0x01) -#define ESR_EL2_EC_CP15_32 (0x03) -#define ESR_EL2_EC_CP15_64 (0x04) -#define ESR_EL2_EC_CP14_MR (0x05) -#define ESR_EL2_EC_CP14_LS (0x06) -#define ESR_EL2_EC_FP_ASIMD (0x07) -#define ESR_EL2_EC_CP10_ID (0x08) -#define ESR_EL2_EC_CP14_64 (0x0C) -#define ESR_EL2_EC_ILL_ISS (0x0E) -#define ESR_EL2_EC_SVC32 (0x11) -#define ESR_EL2_EC_HVC32 (0x12) -#define ESR_EL2_EC_SMC32 (0x13) -#define ESR_EL2_EC_SVC64 (0x15) -#define ESR_EL2_EC_HVC64 (0x16) -#define ESR_EL2_EC_SMC64 (0x17) -#define ESR_EL2_EC_SYS64 (0x18) -#define ESR_EL2_EC_IABT (0x20) -#define ESR_EL2_EC_IABT_HYP (0x21) -#define ESR_EL2_EC_PC_ALIGN (0x22) -#define ESR_EL2_EC_DABT (0x24) -#define ESR_EL2_EC_DABT_HYP (0x25) -#define ESR_EL2_EC_SP_ALIGN (0x26) -#define ESR_EL2_EC_FP_EXC32 (0x28) -#define ESR_EL2_EC_FP_EXC64 (0x2C) -#define ESR_EL2_EC_SERROR (0x2F) -#define ESR_EL2_EC_BREAKPT (0x30) -#define ESR_EL2_EC_BREAKPT_HYP (0x31) -#define ESR_EL2_EC_SOFTSTP (0x32) -#define ESR_EL2_EC_SOFTSTP_HYP (0x33) -#define ESR_EL2_EC_WATCHPT (0x34) -#define ESR_EL2_EC_WATCHPT_HYP (0x35) -#define ESR_EL2_EC_BKPT32 (0x38) -#define ESR_EL2_EC_VECTOR32 (0x3A) -#define ESR_EL2_EC_BRK64 (0x3C) - -#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 - -#define ESR_EL2_EC_WFI_ISS_WFE (1 << 0) - #endif /* __ARM64_KVM_ARM_H__ */ From 056bb5f51c357ee00046fde4929a03468ff45e7a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Jan 2015 11:26:18 +0000 Subject: [PATCH 17/47] arm64: kvm: decode ESR_ELx.EC when reporting exceptions To aid the developer when something triggers an unexpected exception, decode the ESR_ELx.EC field when logging an ESR_ELx value using the newly introduced esr_get_class_string. This doesn't tell the developer the specifics of the exception encoded in the remaining IL and ISS bits, but it can be helpful to distinguish between exception classes (e.g. SError and a data abort) without having to manually decode the field, which can be tiresome. Signed-off-by: Mark Rutland Acked-by: Christoffer Dall Reviewed-by: Christoffer Dall Cc: Catalin Marinas Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/kvm/handle_exit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index bcbc923d3060..29b184a8f3f8 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -91,12 +91,13 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { - u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + u32 hsr = kvm_vcpu_get_hsr(vcpu); + u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT; if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x\n", - (unsigned int)kvm_vcpu_get_hsr(vcpu)); + kvm_err("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); BUG(); } From d67703a8a69eecc8367bb9f848640b9efd430337 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 15 Jan 2015 12:07:33 +0000 Subject: [PATCH 18/47] arm64: kill off the libgcc dependency The arm64 kernel builds fine without the libgcc. Actually it should not be used at all in the kernel. The following are the reasons indicated by Russell King: Although libgcc is part of the compiler, libgcc is built with the expectation that it will be running in userland - it expects to link to a libc. That's why you can't build libgcc without having the glibc headers around. [...] Meanwhile, having the kernel build the compiler support functions that it needs ensures that (a) we know what compiler support functions are being used, (b) we know the implementation of those support functions are sane for use in the kernel, (c) we can build them with appropriate compiler flags for best performance, and (d) we remove an unnecessary dependency on the build toolchain. Signed-off-by: Kevin Hao Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 1c43cec971b5..a20c28348be4 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,8 +15,6 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 -LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) - KBUILD_DEFCONFIG := defconfig KBUILD_CFLAGS += -mgeneral-regs-only @@ -50,7 +48,6 @@ core-$(CONFIG_KVM) += arch/arm64/kvm/ core-$(CONFIG_XEN) += arch/arm64/xen/ core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) -libs-y += $(LIBGCC) libs-$(CONFIG_EFI_STUB) += drivers/firmware/efi/libstub/ # Default target when executing plain make From 7fe5d2b1daf6fd82857a8d0ee47547d7852ebe7b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Jan 2015 12:01:06 +0000 Subject: [PATCH 19/47] arm64/efi: handle potential failure to remap memory map When remapping the UEFI memory map using ioremap_cache(), we have to deal with potential failure. Note that, even if the common case is for ioremap_cache() to return the existing linear mapping of the memory map, we cannot rely on that to be always the case, e.g., in the presence of a mem= kernel parameter. At the same time, remove a stale comment and move the memmap code together. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Acked-by: Mark Salter Signed-off-by: Catalin Marinas --- arch/arm64/kernel/efi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index a98415b5979c..c9cb0fbe7aa4 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -229,19 +229,21 @@ static int __init arm64_enable_runtime_services(void) return -1; } - mapsize = memmap.map_end - memmap.map; - if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); return -1; } pr_info("Remapping and enabling EFI services.\n"); - /* replace early memmap mapping with permanent mapping */ + + mapsize = memmap.map_end - memmap.map; memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, mapsize); + if (!memmap.map) { + pr_err("Failed to remap EFI memory map\n"); + return -1; + } memmap.map_end = memmap.map + mapsize; - efi.memmap = &memmap; efi.systab = (__force void *)ioremap_cache(efi_system_table, From 6083fe74b7bfffc2c7be8c711596608bda0cda6e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 15 Jan 2015 16:42:14 +0000 Subject: [PATCH 20/47] arm64: respect mem= for EFI When booting with EFI, we acquire the EFI memory map after parsing the early params. This unfortuantely renders the option useless as we call memblock_enforce_memory_limit (which uses memblock_remove_range behind the scenes) before we've added any memblocks. We end up removing nothing, then adding all of memory later when efi_init calls reserve_regions. Instead, we can log the limit and apply this later when we do the rest of the memblock work in memblock_init, which should work regardless of the presence of EFI. At the same time we may as well move the early parameter into arm64's mm/init.c, close to arm64_memblock_init. Any memory which must be mapped (e.g. for use by EFI runtime services) must be mapped explicitly reather than relying on the linear mapping, which may be truncated as a result of a mem= option passed on the kernel command line. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Acked-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 19 ------------------- arch/arm64/mm/init.c | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 199d1b7809d7..207413fe08a0 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -322,25 +322,6 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name()); } -/* - * Limit the memory size that was specified via FDT. - */ -static int __init early_mem(char *p) -{ - phys_addr_t limit; - - if (!p) - return 1; - - limit = memparse(p, &p) & PAGE_MASK; - pr_notice("Memory limited to %lldMB\n", limit >> 20); - - memblock_enforce_memory_limit(limit); - - return 0; -} -early_param("mem", early_mem); - static void __init request_standard_resources(void) { struct memblock_region *region; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index bac492c12fcc..11c7b701b681 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -136,10 +136,29 @@ static void arm64_memory_present(void) } #endif +static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX; + +/* + * Limit the memory size that was specified via FDT. + */ +static int __init early_mem(char *p) +{ + if (!p) + return 1; + + memory_limit = memparse(p, &p) & PAGE_MASK; + pr_notice("Memory limited to %lldMB\n", memory_limit >> 20); + + return 0; +} +early_param("mem", early_mem); + void __init arm64_memblock_init(void) { phys_addr_t dma_phys_limit = 0; + memblock_enforce_memory_limit(memory_limit); + /* * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. From 2f896d5866107e2926dcdec34a7d40bc56dd2951 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 22 Jan 2015 01:36:05 +0000 Subject: [PATCH 21/47] arm64: use fixmap for text patching When kernel text is marked as read only, it cannot be modified directly. Use a fixmap to modify the text instead in a similar manner to x86 and arm. Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Tested-by: Kees Cook Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fixmap.h | 1 + arch/arm64/kernel/insn.c | 47 ++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 9ef6eca905ca..defa0ff98250 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -49,6 +49,7 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + FIX_TEXT_POKE0, __end_of_fixed_addresses }; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7e9327a0986d..27d4864577e5 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -17,14 +17,19 @@ * along with this program. If not, see . */ #include +#include #include #include +#include #include +#include #include +#include #include #include #include +#include #include #define AARCH64_INSN_SF_BIT BIT(31) @@ -72,6 +77,29 @@ bool __kprobes aarch64_insn_is_nop(u32 insn) } } +static DEFINE_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + unsigned long uintaddr = (uintptr_t) addr; + bool module = !core_kernel_text(uintaddr); + struct page *page; + + if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) + page = vmalloc_to_page(addr); + else + page = virt_to_page(addr); + + BUG_ON(!page); + set_fixmap(fixmap, page_to_phys(page)); + + return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} /* * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always * little-endian. @@ -88,10 +116,27 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) return ret; } +static int __kprobes __aarch64_insn_write(void *addr, u32 insn) +{ + void *waddr = addr; + unsigned long flags = 0; + int ret; + + spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + + patch_unmap(FIX_TEXT_POKE0); + spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + int __kprobes aarch64_insn_write(void *addr, u32 insn) { insn = cpu_to_le32(insn); - return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); + return __aarch64_insn_write(addr, insn); } static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) From da141706aea52c1a9fbd28cb8d289b78819f5436 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 21 Jan 2015 17:36:06 -0800 Subject: [PATCH 22/47] arm64: add better page protections to arm64 Add page protections for arm64 similar to those in arm. This is for security reasons to prevent certain classes of exploits. The current method: - Map all memory as either RWX or RW. We round to the nearest section to avoid creating page tables before everything is mapped - Once everything is mapped, if either end of the RWX section should not be X, we split the PMD and remap as necessary - When initmem is to be freed, we change the permissions back to RW (using stop machine if necessary to flush the TLB) - If CONFIG_DEBUG_RODATA is set, the read only sections are set read only. Acked-by: Ard Biesheuvel Tested-by: Kees Cook Tested-by: Ard Biesheuvel Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig.debug | 23 +++ arch/arm64/include/asm/cacheflush.h | 5 + arch/arm64/kernel/vmlinux.lds.S | 17 ++- arch/arm64/mm/init.c | 1 + arch/arm64/mm/mm.h | 2 + arch/arm64/mm/mmu.c | 211 ++++++++++++++++++++++++---- 6 files changed, 233 insertions(+), 26 deletions(-) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 5fdd6dce8061..4a8741073c90 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -66,4 +66,27 @@ config DEBUG_SET_MODULE_RONX against certain classes of kernel exploits. If in doubt, say "N". +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" + help + If this is set, kernel text and rodata will be made read-only. This + is to help catch accidental or malicious attempts to change the + kernel's executable code. Additionally splits rodata from kernel + text so it can be made explicitly non-executable. + + If in doubt, say Y + +config DEBUG_ALIGN_RODATA + depends on DEBUG_RODATA && !ARM64_64K_PAGES + bool "Align linker sections up to SECTION_SIZE" + help + If this option is enabled, sections that may potentially be marked as + read only or non-executable will be aligned up to the section size of + the kernel. This prevents sections from being split into pages and + avoids a potential TLB penalty. The downside is an increase in + alignment and potentially wasted space. Turn on this option if + performance is more important than memory pressure. + + If in doubt, say N + endmenu diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 7ae31a2cc6c0..67d309cc3b6b 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -152,4 +152,9 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + #endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 9965ec87cbec..5d9d2dca530d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -8,6 +8,7 @@ #include #include #include +#include #include "image.h" @@ -49,6 +50,14 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif +#ifdef CONFIG_DEBUG_ALIGN_RODATA +#define ALIGN_DEBUG_RO . = ALIGN(1< #include #include +#include #include #include @@ -59,21 +60,43 @@ EXPORT_SYMBOL(phys_mem_access_prot); static void __init *early_alloc(unsigned long sz) { void *ptr = __va(memblock_alloc(sz, sz)); + BUG_ON(!ptr); memset(ptr, 0, sz); return ptr; } -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, +/* + * remap a PMD into pages + */ +static void split_pmd(pmd_t *pmd, pte_t *pte) +{ + unsigned long pfn = pmd_pfn(*pmd); + int i = 0; + + do { + /* + * Need to have the least restrictive permissions available + * permissions will be fixed up later + */ + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, i++, i < PTRS_PER_PTE); +} + +static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, - pgprot_t prot) + pgprot_t prot, + void *(*alloc)(unsigned long size)) { pte_t *pte; - if (pmd_none(*pmd)) { - pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_sect(*pmd)) + split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + flush_tlb_all(); } - BUG_ON(pmd_bad(*pmd)); pte = pte_offset_kernel(pmd, addr); do { @@ -82,9 +105,22 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, +void split_pud(pud_t *old_pud, pmd_t *pmd) +{ + unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; + pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); + int i = 0; + + do { + set_pmd(pmd, __pmd(addr | prot)); + addr += PMD_SIZE; + } while (pmd++, i++, i < PTRS_PER_PMD); +} + +static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot) + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pmd_t *pmd; unsigned long next; @@ -93,8 +129,16 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_bad(*pud)) { - pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); + pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + if (pud_sect(*pud)) { + /* + * need to have the 1G of mappings continue to be + * present + */ + split_pud(pud, pmd); + } pud_populate(mm, pud, pmd); + flush_tlb_all(); } pmd = pmd_offset(pud, addr); @@ -113,21 +157,34 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, flush_tlb_all(); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot); + prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); } -static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, +static inline bool use_1G_block(unsigned long addr, unsigned long next, + unsigned long phys) +{ + if (PAGE_SHIFT != 12) + return false; + + if (((addr | next | phys) & ~PUD_MASK) != 0) + return false; + + return true; +} + +static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot) + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); + pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -139,8 +196,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, /* * For 4K granule only, attempt to put down a 1GB block */ - if ((PAGE_SHIFT == 12) && - ((addr | next | phys) & ~PUD_MASK) == 0) { + if (use_1G_block(addr, next, phys)) { pud_t old_pud = *pud; set_pud(pud, __pud(phys | pgprot_val(mk_sect_prot(prot)))); @@ -158,7 +214,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, flush_tlb_all(); } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot); + alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -168,9 +224,10 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, +static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) + phys_addr_t size, pgprot_t prot, + void *(*alloc)(unsigned long size)) { unsigned long addr, length, end, next; @@ -180,13 +237,23 @@ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot); + alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, - phys_addr_t size) +static void *late_alloc(unsigned long size) +{ + void *ptr; + + BUG_ON(size > PAGE_SIZE); + ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + return ptr; +} + +static void __ref create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", @@ -194,16 +261,72 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, - size, PAGE_KERNEL_EXEC); + size, prot, early_alloc); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot); + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, + early_alloc); } +static void create_mapping_late(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + + return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + phys, virt, size, prot, late_alloc); +} + +#ifdef CONFIG_DEBUG_RODATA +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + /* + * Set up the executable regions using the existing section mappings + * for now. This will get more fine grained later once all memory + * is mapped + */ + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + + if (end < kernel_x_start) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else if (start >= kernel_x_end) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else { + if (start < kernel_x_start) + create_mapping(start, __phys_to_virt(start), + kernel_x_start - start, + PAGE_KERNEL); + create_mapping(kernel_x_start, + __phys_to_virt(kernel_x_start), + kernel_x_end - kernel_x_start, + PAGE_KERNEL_EXEC); + if (kernel_x_end < end) + create_mapping(kernel_x_end, + __phys_to_virt(kernel_x_end), + end - kernel_x_end, + PAGE_KERNEL); + } + +} +#else +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + create_mapping(start, __phys_to_virt(start), end - start, + PAGE_KERNEL_EXEC); +} +#endif + static void __init map_mem(void) { struct memblock_region *reg; @@ -248,14 +371,53 @@ static void __init map_mem(void) memblock_set_current_limit(limit); } #endif - - create_mapping(start, __phys_to_virt(start), end - start); + __map_memblock(start, end); } /* Limit no longer required. */ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } +void __init fixup_executable(void) +{ +#ifdef CONFIG_DEBUG_RODATA + /* now that we are actually fully mapped, make the start/end more fine grained */ + if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + unsigned long aligned_start = round_down(__pa(_stext), + SECTION_SIZE); + + create_mapping(aligned_start, __phys_to_virt(aligned_start), + __pa(_stext) - aligned_start, + PAGE_KERNEL); + } + + if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + unsigned long aligned_end = round_up(__pa(__init_end), + SECTION_SIZE); + create_mapping(__pa(__init_end), (unsigned long)__init_end, + aligned_end - __pa(__init_end), + PAGE_KERNEL); + } +#endif +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)_etext - (unsigned long)_stext, + PAGE_KERNEL_EXEC | PTE_RDONLY); + +} +#endif + +void fixup_init(void) +{ + create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, + (unsigned long)__init_end - (unsigned long)__init_begin, + PAGE_KERNEL); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. @@ -265,6 +427,7 @@ void __init paging_init(void) void *zero_page; map_mem(); + fixup_executable(); /* * Finally flush the caches and tlb to ensure that we're in a From 60305db9884515ca063474e262b454f6da04e4e2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 22 Jan 2015 10:01:40 +0000 Subject: [PATCH 23/47] arm64/efi: move virtmap init to early initcall Now that the create_mapping() code in mm/mmu.c is able to support setting up kernel page tables at initcall time, we can move the whole virtmap creation to arm64_enable_runtime_services() instead of having a distinct stage during early boot. This also allows us to drop the arm64-specific EFI_VIRTMAP flag. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 18 ++---- arch/arm64/kernel/efi.c | 111 +++++++++++++++++------------------ arch/arm64/kernel/setup.c | 1 - arch/arm64/mm/mmu.c | 2 +- 4 files changed, 59 insertions(+), 73 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 7baf2cc04e1e..ef572206f1c3 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -6,10 +6,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efi_virtmap_init(void); #else #define efi_init() -#define efi_virtmap_init() #endif #define efi_call_virt(f, ...) \ @@ -53,23 +51,17 @@ extern void efi_virtmap_init(void); #define EFI_ALLOC_ALIGN SZ_64K /* - * On ARM systems, virtually remapped UEFI runtime services are set up in three + * On ARM systems, virtually remapped UEFI runtime services are set up in two * distinct stages: * - The stub retrieves the final version of the memory map from UEFI, populates * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime * service to communicate the new mapping to the firmware (Note that the new * mapping is not live at this time) - * - During early boot, the page tables are allocated and populated based on the - * virt_addr fields in the memory map, but only if all descriptors with the - * EFI_MEMORY_RUNTIME attribute have a non-zero value for virt_addr. If this - * succeeds, the EFI_VIRTMAP flag is set to indicate that the virtual mappings - * have been installed successfully. - * - During an early initcall(), the UEFI Runtime Services are enabled and the - * EFI_RUNTIME_SERVICES bit set if some conditions are met, i.e., we need a - * non-early mapping of the UEFI system table, and we need to have the virtmap - * installed. + * - During an early initcall(), the EFI system table is permanently remapped + * and the virtual remapping of the UEFI Runtime Services regions is loaded + * into a private set of page tables. If this all succeeds, the Runtime + * Services are enabled and the EFI_RUNTIME_SERVICES bit set. */ -#define EFI_VIRTMAP EFI_ARCH_1 void efi_virtmap_load(void); void efi_virtmap_unload(void); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index c9cb0fbe7aa4..b42c7b480e1e 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -38,6 +38,19 @@ struct efi_memory_map memmap; static u64 efi_system_table; +static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .pgd = efi_pgd, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + INIT_MM_CONTEXT(efi_mm) +}; + static int uefi_debug __initdata; static int __init uefi_debug_setup(char *str) { @@ -213,6 +226,45 @@ void __init efi_init(void) return; reserve_regions(); + early_memunmap(memmap.map, params.mmap_size); +} + +static bool __init efi_virtmap_init(void) +{ + efi_memory_desc_t *md; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + pgprot_t prot; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + return false; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + pr_info(" EFI remap 0x%016llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + /* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. + */ + if (!is_normal_ram(md)) + prot = __pgprot(PROT_DEVICE_nGnRE); + else if (md->type == EFI_RUNTIME_SERVICES_CODE) + prot = PAGE_KERNEL_EXEC; + else + prot = PAGE_KERNEL; + + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); + } + return true; } /* @@ -254,7 +306,7 @@ static int __init arm64_enable_runtime_services(void) } set_bit(EFI_SYSTEM_TABLES, &efi.flags); - if (!efi_enabled(EFI_VIRTMAP)) { + if (!efi_virtmap_init()) { pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); return -1; } @@ -283,19 +335,6 @@ static int __init arm64_dmi_init(void) } core_initcall(arm64_dmi_init); -static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; - -static struct mm_struct efi_mm = { - .mm_rb = RB_ROOT, - .pgd = efi_pgd, - .mm_users = ATOMIC_INIT(2), - .mm_count = ATOMIC_INIT(1), - .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), - .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), - .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), - INIT_MM_CONTEXT(efi_mm) -}; - static void efi_set_pgd(struct mm_struct *mm) { cpu_switch_mm(mm->pgd, mm); @@ -315,47 +354,3 @@ void efi_virtmap_unload(void) efi_set_pgd(current->active_mm); preempt_enable(); } - -void __init efi_virtmap_init(void) -{ - efi_memory_desc_t *md; - - if (!efi_enabled(EFI_BOOT)) - return; - - for_each_efi_memory_desc(&memmap, md) { - u64 paddr, npages, size; - pgprot_t prot; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (WARN(md->virt_addr == 0, - "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n", - md->phys_addr)) - return; - - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - pr_info(" EFI remap 0x%016llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - /* - * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be - * executable, everything else can be mapped with the XN bits - * set. - */ - if (!is_normal_ram(md)) - prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE) - prot = PAGE_KERNEL_EXEC; - else - prot = PAGE_KERNEL; - - create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); - } - set_bit(EFI_VIRTMAP, &efi.flags); - early_memunmap(memmap.map, memmap.map_end - memmap.map); -} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 207413fe08a0..bb10903887d4 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -382,7 +382,6 @@ void __init setup_arch(char **cmdline_p) paging_init(); request_standard_resources(); - efi_virtmap_init(); early_ioremap_reset(); unflatten_device_tree(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 91d55b6efd8a..155cbb0a74b6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -269,7 +269,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot) { __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - early_alloc); + late_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, From aa03c428e67881795f4190f9ffdb62fc14978608 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Jan 2015 18:20:35 +0000 Subject: [PATCH 24/47] arm64: Fix overlapping VA allocations PCI IO space was intended to be 16MiB, at 32MiB below MODULES_VADDR, but commit d1e6dc91b532d3d3 ("arm64: Add architectural support for PCI") extended this to cover the full 32MiB. The final 8KiB of this 32MiB is also allocated for the fixmap, allowing for potential clashes between the two. This change was masked by assumptions in mem_init and the page table dumping code, which assumed the I/O space to be 16MiB long through seaparte hard-coded definitions. This patch changes the definition of the PCI I/O space allocation to live in asm/memory.h, along with the other VA space allocations. As the fixmap allocation depends on the number of fixmap entries, this is moved below the PCI I/O space allocation. Both the fixmap and PCI I/O space are guarded with 2MB of padding. Sites assuming the I/O space was 16MiB are moved over use new PCI_IO_{START,END} definitions, which will keep in sync with the size of the IO space (now restored to 16MiB). As a useful side effect, the use of the new PCI_IO_{START,END} definitions prevents a build issue in the dumping code due to a (now redundant) missing include of io.h for PCI_IOBASE. Signed-off-by: Mark Rutland Cc: Kees Cook Cc: Laura Abbott Cc: Liviu Dudau Cc: Steve Capper Cc: Will Deacon [catalin.marinas@arm.com: reorder FIXADDR and PCI_IO address_markers_idx enum] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/io.h | 5 +++-- arch/arm64/include/asm/memory.h | 10 +++++++++- arch/arm64/mm/dump.c | 8 ++++---- arch/arm64/mm/init.c | 5 +++-- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 949c406d4df4..540f7c0aea82 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -145,8 +146,8 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * I/O port access primitives. */ #define arch_has_dev_port() (1) -#define IO_SPACE_LIMIT (SZ_32M - 1) -#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_32M)) +#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1) +#define PCI_IOBASE ((void __iomem *)PCI_IO_START) /* * String version of I/O memory access operations. diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 6486b2bfd562..f800d45ea226 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -32,6 +32,12 @@ */ #define UL(x) _AC(x, UL) +/* + * Size of the PCI I/O space. This must remain a power of two so that + * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses. + */ +#define PCI_IO_SIZE SZ_16M + /* * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) @@ -45,7 +51,9 @@ #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) -#define FIXADDR_TOP (MODULES_VADDR - SZ_2M - PAGE_SIZE) +#define PCI_IO_END (MODULES_VADDR - SZ_2M) +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) +#define FIXADDR_TOP (PCI_IO_START - SZ_2M) #define TASK_SIZE_64 (UL(1) << VA_BITS) #ifdef CONFIG_COMPAT diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index cf33f33333cc..77d39a1b6f8d 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -36,10 +36,10 @@ enum address_markers_idx { VMEMMAP_START_NR, VMEMMAP_END_NR, #endif - PCI_START_NR, - PCI_END_NR, FIXADDR_START_NR, FIXADDR_END_NR, + PCI_START_NR, + PCI_END_NR, MODULES_START_NR, MODUELS_END_NR, KERNEL_SPACE_NR, @@ -52,10 +52,10 @@ static struct addr_marker address_markers[] = { { 0, "vmemmap start" }, { 0, "vmemmap end" }, #endif - { (unsigned long) PCI_IOBASE, "PCI I/O start" }, - { (unsigned long) PCI_IOBASE + SZ_16M, "PCI I/O end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, + { PCI_IO_START, "PCI I/O start" }, + { PCI_IO_END, "PCI I/O end" }, { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, { PAGE_OFFSET, "Kernel Mapping" }, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 43cccb5101c0..a8dfb40eefa8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -296,8 +297,8 @@ void __init mem_init(void) " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" " 0x%16lx - 0x%16lx (%6ld MB actual)\n" #endif - " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" " fixed : 0x%16lx - 0x%16lx (%6ld KB)\n" + " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" @@ -310,8 +311,8 @@ void __init mem_init(void) MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif - MLM((unsigned long)PCI_IOBASE, (unsigned long)PCI_IOBASE + SZ_16M), MLK(FIXADDR_START, FIXADDR_TOP), + MLM(PCI_IO_START, PCI_IO_END), MLM(MODULES_VADDR, MODULES_END), MLM(PAGE_OFFSET, (unsigned long)high_memory), MLK_ROUNDUP(__init_begin, __init_end), From 764011ca8247808e066a182bcfe42a2b14c99a9a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Jan 2015 18:20:36 +0000 Subject: [PATCH 25/47] arm64: mm: dump: add missing includes The arm64 dump code is currently relying on some definitions which are pulled in via transitive dependencies. It seems we have implicit dependencies on the following definitions: * MODULES_VADDR (asm/memory.h) * MODULES_END (asm/memory.h) * PAGE_OFFSET (asm/memory.h) * PTE_* (asm/pgtable-hwdef.h) * ENOMEM (linux/errno.h) * device_initcall (linux/init.h) This patch ensures we explicitly include the relevant headers for the above items, fixing the observed build issue and hopefully preventing future issues as headers are refactored. Signed-off-by: Mark Rutland Reported-by: Mark Brown Acked-by: Steve Capper Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/dump.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 77d39a1b6f8d..bbc5a29ecaaf 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -14,13 +14,17 @@ * of the License. */ #include +#include #include +#include #include #include #include #include +#include #include +#include #define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS) From da1f2b82054c171166ca0069cd60aa9300127cf1 Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Fri, 26 Dec 2014 16:52:10 +0000 Subject: [PATCH 26/47] arm64: add ioremap physical address information In /proc/vmallocinfo, it's good to show the physical address of each ioremap in vmallocinfo. Add physical address information in arm64 ioremap. 0xffffc900047f2000-0xffffc900047f4000 8192 _nv013519rm+0x57/0xa0 [nvidia] phys=f8100000 ioremap 0xffffc900047f4000-0xffffc900047f6000 8192 _nv013519rm+0x57/0xa0 [nvidia] phys=f8008000 ioremap 0xffffc90004800000-0xffffc90004821000 135168 e1000_probe+0x22c/0xb95 [e1000e] phys=f4300000 ioremap 0xffffc900049c0000-0xffffc900049e1000 135168 _nv013521rm+0x4d/0xd0 [nvidia] phys=e0140000 ioremap Signed-off-by: Min-Hua Chen Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/ioremap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index cbb99c8f1e04..01e88c8bcab0 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -62,6 +62,7 @@ static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, if (!area) return NULL; addr = (unsigned long)area->addr; + area->phys_addr = phys_addr; err = ioremap_page_range(addr, addr + size, phys_addr, prot); if (err) { From 9f71ac961be8d44ad9d7742a5d1129577514660d Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 17 Dec 2014 15:50:21 +0000 Subject: [PATCH 27/47] arm64: Fix SCTLR_EL1 initialisation We initialise the SCTLR_EL1 value by read-modify-writeback of the desired bits, leaving the other bits (including reserved bits(RESx)) untouched. However, sometimes the boot monitor could leave garbage values in the RESx bits which could have different implications. This patch makes sure that all the bits, including the RESx bits, are set to the proper state, except for the 'endianness' control bits, EE(25) & E0E(24)- which are set early in the el2_setup. Updated the state of the Bit[6] in the comment to RES0 in the comment. Signed-off-by: Suzuki K. Poulose Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 4e778b13291b..b98fc8ce6a16 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -244,14 +244,18 @@ ENTRY(__cpu_setup) ENDPROC(__cpu_setup) /* + * We set the desired value explicitly, including those of the + * reserved bits. The values of bits EE & E0E were set early in + * el2_setup, which are left untouched below. + * * n n T * U E WT T UD US IHBS * CE0 XWHW CZ ME TEEA S * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM - * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved - * .... .1.. .... 01.1 11.1 ..01 0001 1101 < software settings + * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved + * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings */ .type crval, #object crval: - .word 0x000802e2 // clear - .word 0x0405d11d // set + .word 0xfcffffff // clear + .word 0x34d5d91d // set From 9d3bfbb4df58a8025b46b2676da2cf450385b754 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 12 Jan 2015 20:48:53 +0000 Subject: [PATCH 28/47] arm64: Combine coherent and non-coherent swiotlb dma_ops Since dev_archdata now has a dma_coherent state, combine the two coherent and non-coherent operations and remove their declaration, together with set_dma_ops, from the arch dma-mapping.h file. Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/dma-mapping.h | 11 +-- arch/arm64/mm/dma-mapping.c | 116 ++++++++++++--------------- 2 files changed, 54 insertions(+), 73 deletions(-) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 9ce3e680ae1c..6932bb57dba0 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -28,8 +28,6 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; -extern struct dma_map_ops coherent_swiotlb_dma_ops; -extern struct dma_map_ops noncoherent_swiotlb_dma_ops; static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { @@ -47,23 +45,18 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return __generic_dma_ops(dev); } -static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) -{ - dev->archdata.dma_ops = ops; -} - static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, struct iommu_ops *iommu, bool coherent) { dev->archdata.dma_coherent = coherent; - if (coherent) - set_dma_ops(dev, &coherent_swiotlb_dma_ops); } #define arch_setup_dma_ops arch_setup_dma_ops /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { + if (!dev) + return false; return dev->archdata.dma_coherent; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index d92094203913..0a24b9b8c698 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -134,16 +134,17 @@ static void __dma_free_coherent(struct device *dev, size_t size, swiotlb_free_coherent(dev, size, vaddr, dma_handle); } -static void *__dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) +static void *__dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { struct page *page; void *ptr, *coherent_ptr; + bool coherent = is_device_dma_coherent(dev); size = PAGE_ALIGN(size); - if (!(flags & __GFP_WAIT)) { + if (!coherent && !(flags & __GFP_WAIT)) { struct page *page = NULL; void *addr = __alloc_from_pool(size, &page); @@ -151,13 +152,16 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, *dma_handle = phys_to_dma(dev, page_to_phys(page)); return addr; - } ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); if (!ptr) goto no_mem; + /* no need for non-cacheable mapping if coherent */ + if (coherent) + return ptr; + /* remove any dirty cache lines on the kernel alias */ __dma_flush_range(ptr, ptr + size); @@ -179,15 +183,17 @@ no_mem: return NULL; } -static void __dma_free_noncoherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) +static void __dma_free(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); - if (__free_from_pool(vaddr, size)) - return; - vunmap(vaddr); + if (!is_device_dma_coherent(dev)) { + if (__free_from_pool(vaddr, size)) + return; + vunmap(vaddr); + } __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); } @@ -199,7 +205,8 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, dma_addr_t dev_addr; dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + if (!is_device_dma_coherent(dev)) + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); return dev_addr; } @@ -209,7 +216,8 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + if (!is_device_dma_coherent(dev)) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); } @@ -221,9 +229,10 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int i, ret; ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); - for_each_sg(sgl, sg, ret, i) - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + if (!is_device_dma_coherent(dev)) + for_each_sg(sgl, sg, ret, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); return ret; } @@ -236,9 +245,10 @@ static void __swiotlb_unmap_sg_attrs(struct device *dev, struct scatterlist *sg; int i; - for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + if (!is_device_dma_coherent(dev)) + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); } @@ -246,7 +256,8 @@ static void __swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir) { - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + if (!is_device_dma_coherent(dev)) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); } @@ -255,7 +266,8 @@ static void __swiotlb_sync_single_for_device(struct device *dev, enum dma_data_direction dir) { swiotlb_sync_single_for_device(dev, dev_addr, size, dir); - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + if (!is_device_dma_coherent(dev)) + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); } static void __swiotlb_sync_sg_for_cpu(struct device *dev, @@ -265,9 +277,10 @@ static void __swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg; int i; - for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + if (!is_device_dma_coherent(dev)) + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); } @@ -279,9 +292,10 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, int i; swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); - for_each_sg(sgl, sg, nelems, i) - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + if (!is_device_dma_coherent(dev)) + for_each_sg(sgl, sg, nelems, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); } /* vma->vm_page_prot must be set appropriately before calling this function */ @@ -308,28 +322,20 @@ static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } -static int __swiotlb_mmap_noncoherent(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) +static int __swiotlb_mmap(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) { - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false); + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + is_device_dma_coherent(dev)); return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); } -static int __swiotlb_mmap_coherent(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) -{ - /* Just use whatever page_prot attributes were specified */ - return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); -} - -struct dma_map_ops noncoherent_swiotlb_dma_ops = { - .alloc = __dma_alloc_noncoherent, - .free = __dma_free_noncoherent, - .mmap = __swiotlb_mmap_noncoherent, +static struct dma_map_ops swiotlb_dma_ops = { + .alloc = __dma_alloc, + .free = __dma_free, + .mmap = __swiotlb_mmap, .map_page = __swiotlb_map_page, .unmap_page = __swiotlb_unmap_page, .map_sg = __swiotlb_map_sg_attrs, @@ -341,24 +347,6 @@ struct dma_map_ops noncoherent_swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; -EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops); - -struct dma_map_ops coherent_swiotlb_dma_ops = { - .alloc = __dma_alloc_coherent, - .free = __dma_free_coherent, - .mmap = __swiotlb_mmap_coherent, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .dma_supported = swiotlb_dma_supported, - .mapping_error = swiotlb_dma_mapping_error, -}; -EXPORT_SYMBOL(coherent_swiotlb_dma_ops); extern int swiotlb_late_init_with_default_size(size_t default_size); @@ -427,7 +415,7 @@ static int __init swiotlb_late_init(void) { size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); - dma_ops = &noncoherent_swiotlb_dma_ops; + dma_ops = &swiotlb_dma_ops; return swiotlb_late_init_with_default_size(swiotlb_size); } From 78d51e0b8b57728099a3da74f4a10b6f8c71b764 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 20:48:54 +0000 Subject: [PATCH 29/47] arm64: implement generic IOMMU configuration Add the necessary call to of_iommu_init. Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index bb10903887d4..e8420f635bd4 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -405,6 +406,7 @@ void __init setup_arch(char **cmdline_p) static int __init arm64_device_init(void) { + of_iommu_init(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } From 04597a65c5efc207257a736d339c6f2f5b00250f Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 21 Jan 2015 12:43:09 +0000 Subject: [PATCH 30/47] arm64: Track system support for mixed endian EL0 This patch keeps track of the mixed endian EL0 support across the system and provides helper functions to export it. The status is a boolean indicating whether all the CPUs on the system supports mixed endian at EL0. Signed-off-by: Suzuki K. Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/include/asm/cputype.h | 14 ++++++++++++++ arch/arm64/kernel/cpuinfo.c | 22 ++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 07547ccc1f2b..b6c16d5f622f 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -52,6 +52,8 @@ static inline void cpus_set_cap(unsigned int num) } void check_local_cpu_errata(void); +bool cpu_supports_mixed_endian_el0(void); +bool system_supports_mixed_endian_el0(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 8adb986a3086..ee6403df9fe4 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -72,6 +72,15 @@ #define APM_CPU_PART_POTENZA 0x000 +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT) +#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \ + (((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT) +#define ID_AA64MMFR0_BIGEND_SHIFT 8 +#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT) +#define ID_AA64MMFR0_BIGEND(mmfr0) \ + (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) + #ifndef __ASSEMBLY__ /* @@ -104,6 +113,11 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) return read_cpuid(CTR_EL0); } +static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) +{ + return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) || + (ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1); +} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 49782282a027..929855691dae 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -35,6 +35,7 @@ */ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; +static bool mixed_endian_el0 = true; static char *icache_policy_str[] = { [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", @@ -68,6 +69,26 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } +bool cpu_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); +} + +bool system_supports_mixed_endian_el0(void) +{ + return mixed_endian_el0; +} + +static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) +{ + mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); +} + +static void update_cpu_features(struct cpuinfo_arm64 *info) +{ + update_mixed_endian_el0_support(info); +} + static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) { if ((boot & mask) == (cur & mask)) @@ -215,6 +236,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) cpuinfo_detect_icache_policy(info); check_local_cpu_errata(); + update_cpu_features(info); } void cpuinfo_store_cpu(void) From 736d474f0fafd1486f178570bc47660ee9dfdef8 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 21 Jan 2015 12:43:10 +0000 Subject: [PATCH 31/47] arm64: Consolidate hotplug notifier for instruction emulation As of now each insn_emulation has a cpu hotplug notifier that enables/disables the CPU feature bit for the functionality. This patch re-arranges the code, such that there is only one notifier that runs through the list of registered emulation hooks and runs their corresponding set_hw_mode. We do nothing when a CPU is dying as we will set the appropriate bits as it comes back online based on the state of the hooks. Signed-off-by: Mark Rutland Signed-off-by: Suzuki K. Poulose Cc: Will Deacon Cc: Punit Agrawal [catalin.marinas@arm.com: fix pr_warn compilation error] [catalin.marinas@arm.com: remove unnecessary "insn" check] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 + arch/arm64/kernel/armv8_deprecated.c | 125 +++++++++++++++++---------- 2 files changed, 79 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ee6403df9fe4..68732e9a02fb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -81,6 +81,8 @@ #define ID_AA64MMFR0_BIGEND(mmfr0) \ (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) +#define SCTLR_EL1_CP15BEN (0x1 << 5) + #ifndef __ASSEMBLY__ /* diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c363671d7509..68b955e1fd99 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -19,6 +19,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace-events-emulation.h" @@ -85,6 +86,57 @@ static void remove_emulation_hooks(struct insn_emulation_ops *ops) pr_notice("Removed %s emulation handler\n", ops->name); } +static void enable_insn_hw_mode(void *data) +{ + struct insn_emulation *insn = (struct insn_emulation *)data; + if (insn->ops->set_hw_mode) + insn->ops->set_hw_mode(true); +} + +static void disable_insn_hw_mode(void *data) +{ + struct insn_emulation *insn = (struct insn_emulation *)data; + if (insn->ops->set_hw_mode) + insn->ops->set_hw_mode(false); +} + +/* Run set_hw_mode(mode) on all active CPUs */ +static int run_all_cpu_set_hw_mode(struct insn_emulation *insn, bool enable) +{ + if (!insn->ops->set_hw_mode) + return -EINVAL; + if (enable) + on_each_cpu(enable_insn_hw_mode, (void *)insn, true); + else + on_each_cpu(disable_insn_hw_mode, (void *)insn, true); + return 0; +} + +/* + * Run set_hw_mode for all insns on a starting CPU. + * Returns: + * 0 - If all the hooks ran successfully. + * -EINVAL - At least one hook is not supported by the CPU. + */ +static int run_all_insn_set_hw_mode(unsigned long cpu) +{ + int rc = 0; + unsigned long flags; + struct insn_emulation *insn; + + raw_spin_lock_irqsave(&insn_emulation_lock, flags); + list_for_each_entry(insn, &insn_emulation, node) { + bool enable = (insn->current_mode == INSN_HW); + if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) { + pr_warn("CPU[%ld] cannot support the emulation of %s", + cpu, insn->ops->name); + rc = -EINVAL; + } + } + raw_spin_unlock_irqrestore(&insn_emulation_lock, flags); + return rc; +} + static int update_insn_emulation_mode(struct insn_emulation *insn, enum insn_emulation_mode prev) { @@ -97,10 +149,8 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, remove_emulation_hooks(insn->ops); break; case INSN_HW: - if (insn->ops->set_hw_mode) { - insn->ops->set_hw_mode(false); + if (!run_all_cpu_set_hw_mode(insn, false)) pr_notice("Disabled %s support\n", insn->ops->name); - } break; } @@ -111,10 +161,9 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, register_emulation_hooks(insn->ops); break; case INSN_HW: - if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(true)) + ret = run_all_cpu_set_hw_mode(insn, true); + if (!ret) pr_notice("Enabled %s support\n", insn->ops->name); - else - ret = -EINVAL; break; } @@ -133,6 +182,8 @@ static void register_insn_emulation(struct insn_emulation_ops *ops) switch (ops->status) { case INSN_DEPRECATED: insn->current_mode = INSN_EMULATE; + /* Disable the HW mode if it was turned on at early boot time */ + run_all_cpu_set_hw_mode(insn, false); insn->max = INSN_HW; break; case INSN_OBSOLETE: @@ -453,8 +504,6 @@ ret: return 0; } -#define SCTLR_EL1_CP15BEN (1 << 5) - static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; @@ -465,48 +514,13 @@ static inline void config_sctlr_el1(u32 clear, u32 set) asm volatile("msr sctlr_el1, %0" : : "r" (val)); } -static void enable_cp15_ben(void *info) -{ - config_sctlr_el1(0, SCTLR_EL1_CP15BEN); -} - -static void disable_cp15_ben(void *info) -{ - config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); -} - -static int cpu_hotplug_notify(struct notifier_block *b, - unsigned long action, void *hcpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - enable_cp15_ben(NULL); - return NOTIFY_DONE; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_cp15_ben(NULL); - return NOTIFY_DONE; - } - - return NOTIFY_OK; -} - -static struct notifier_block cpu_hotplug_notifier = { - .notifier_call = cpu_hotplug_notify, -}; - static int cp15_barrier_set_hw_mode(bool enable) { - if (enable) { - register_cpu_notifier(&cpu_hotplug_notifier); - on_each_cpu(enable_cp15_ben, NULL, true); - } else { - unregister_cpu_notifier(&cpu_hotplug_notifier); - on_each_cpu(disable_cp15_ben, NULL, true); - } - - return true; + if (enable) + config_sctlr_el1(0, SCTLR_EL1_CP15BEN); + else + config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); + return 0; } static struct undef_hook cp15_barrier_hooks[] = { @@ -534,6 +548,20 @@ static struct insn_emulation_ops cp15_barrier_ops = { .set_hw_mode = cp15_barrier_set_hw_mode, }; +static int insn_cpu_hotplug_notify(struct notifier_block *b, + unsigned long action, void *hcpu) +{ + int rc = 0; + if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING) + rc = run_all_insn_set_hw_mode((unsigned long)hcpu); + + return notifier_from_errno(rc); +} + +static struct notifier_block insn_cpu_hotplug_notifier = { + .notifier_call = insn_cpu_hotplug_notify, +}; + /* * Invoked as late_initcall, since not needed before init spawned. */ @@ -545,6 +573,7 @@ static int __init armv8_deprecated_init(void) if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION)) register_insn_emulation(&cp15_barrier_ops); + register_cpu_notifier(&insn_cpu_hotplug_notifier); register_insn_emulation_sysctl(ctl_abi); return 0; From 2d888f48e056119495847a269a435d5c3d9df349 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 21 Jan 2015 12:43:11 +0000 Subject: [PATCH 32/47] arm64: Emulate SETEND for AArch32 tasks Emulate deprecated 'setend' instruction for AArch32 bit tasks. setend [le/be] - Sets the endianness of EL0 On systems with CPUs which support mixed endian at EL0, the hardware support for the instruction can be enabled by setting the SCTLR_EL1.SED bit. Like the other emulated instructions it is controlled by an entry in /proc/sys/abi/. For more information see : Documentation/arm64/legacy_instructions.txt The instruction is emulated by setting/clearing the SPSR_EL1.E bit, which will be reflected in the PSTATE.E in AArch32 context. This patch also restores the native endianness for the execution of signal handlers, since the process could have changed the endianness. Note: All CPUs on the system must have mixed endian support at EL0. Once the handler is registered, hotplugging a CPU which doesn't support mixed endian, could lead to unexpected results/behavior in applications. Signed-off-by: Suzuki K. Poulose Cc: Will Deacon Cc: Punit Agrawal Signed-off-by: Catalin Marinas --- Documentation/arm64/legacy_instructions.txt | 12 ++++ arch/arm64/Kconfig | 15 ++++ arch/arm64/include/asm/cputype.h | 1 + arch/arm64/include/asm/ptrace.h | 7 ++ arch/arm64/kernel/armv8_deprecated.c | 80 +++++++++++++++++++++ arch/arm64/kernel/signal32.c | 5 +- 6 files changed, 119 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt index a3b3da2ec6ed..01bf3d9fac85 100644 --- a/Documentation/arm64/legacy_instructions.txt +++ b/Documentation/arm64/legacy_instructions.txt @@ -32,6 +32,9 @@ The default mode depends on the status of the instruction in the architecture. Deprecated instructions should default to emulation while obsolete instructions must be undefined by default. +Note: Instruction emulation may not be possible in all cases. See +individual instruction notes for further information. + Supported legacy instructions ----------------------------- * SWP{B} @@ -43,3 +46,12 @@ Default: Undef (0) Node: /proc/sys/abi/cp15_barrier Status: Deprecated Default: Emulate (1) + +* SETEND +Node: /proc/sys/abi/setend +Status: Deprecated +Default: Emulate (1)* +Note: All the cpus on the system must have mixed endian support at EL0 +for this feature to be enabled. If a new CPU - which doesn't support mixed +endian - is hotplugged in after this feature has been enabled, there could +be unexpected results in the application. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1f9a20a3677..21a59bf37145 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -540,6 +540,21 @@ config CP15_BARRIER_EMULATION If unsure, say Y +config SETEND_EMULATION + bool "Emulate SETEND instruction" + help + The SETEND instruction alters the data-endianness of the + AArch32 EL0, and is deprecated in ARMv8. + + Say Y here to enable software emulation of the instruction + for AArch32 userspace code. + + Note: All the cpus on the system must have mixed endian support at EL0 + for this feature to be enabled. If a new CPU - which doesn't support mixed + endian - is hotplugged in after this feature has been enabled, there could + be unexpected results in the applications. + + If unsure, say Y endif endmenu diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 68732e9a02fb..a84ec605bed8 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -82,6 +82,7 @@ (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) #define SCTLR_EL1_CP15BEN (0x1 << 5) +#define SCTLR_EL1_SED (0x1 << 8) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41ed9e13795e..d6dd9fdbc3be 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -58,6 +58,13 @@ #define COMPAT_PSR_Z_BIT 0x40000000 #define COMPAT_PSR_N_BIT 0x80000000 #define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */ + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define COMPAT_PSR_ENDSTATE COMPAT_PSR_E_BIT +#else +#define COMPAT_PSR_ENDSTATE 0 +#endif + /* * These are 'magic' values for PTRACE_PEEKUSR that return info about where a * process is located in memory. diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 68b955e1fd99..7922c2e710ca 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -548,6 +548,79 @@ static struct insn_emulation_ops cp15_barrier_ops = { .set_hw_mode = cp15_barrier_set_hw_mode, }; +static int setend_set_hw_mode(bool enable) +{ + if (!cpu_supports_mixed_endian_el0()) + return -EINVAL; + + if (enable) + config_sctlr_el1(SCTLR_EL1_SED, 0); + else + config_sctlr_el1(0, SCTLR_EL1_SED); + return 0; +} + +static int compat_setend_handler(struct pt_regs *regs, u32 big_endian) +{ + char *insn; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); + + if (big_endian) { + insn = "setend be"; + regs->pstate |= COMPAT_PSR_E_BIT; + } else { + insn = "setend le"; + regs->pstate &= ~COMPAT_PSR_E_BIT; + } + + trace_instruction_emulation(insn, regs->pc); + pr_warn_ratelimited("\"%s\" (%ld) uses deprecated setend instruction at 0x%llx\n", + current->comm, (unsigned long)current->pid, regs->pc); + + return 0; +} + +static int a32_setend_handler(struct pt_regs *regs, u32 instr) +{ + int rc = compat_setend_handler(regs, (instr >> 9) & 1); + regs->pc += 4; + return rc; +} + +static int t16_setend_handler(struct pt_regs *regs, u32 instr) +{ + int rc = compat_setend_handler(regs, (instr >> 3) & 1); + regs->pc += 2; + return rc; +} + +static struct undef_hook setend_hooks[] = { + { + .instr_mask = 0xfffffdff, + .instr_val = 0xf1010000, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = a32_setend_handler, + }, + { + /* Thumb mode */ + .instr_mask = 0x0000fff7, + .instr_val = 0x0000b650, + .pstate_mask = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_MASK), + .pstate_val = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_USR), + .fn = t16_setend_handler, + }, + {} +}; + +static struct insn_emulation_ops setend_ops = { + .name = "setend", + .status = INSN_DEPRECATED, + .hooks = setend_hooks, + .set_hw_mode = setend_set_hw_mode, +}; + static int insn_cpu_hotplug_notify(struct notifier_block *b, unsigned long action, void *hcpu) { @@ -573,6 +646,13 @@ static int __init armv8_deprecated_init(void) if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION)) register_insn_emulation(&cp15_barrier_ops); + if (IS_ENABLED(CONFIG_SETEND_EMULATION)) { + if(system_supports_mixed_endian_el0()) + register_insn_emulation(&setend_ops); + else + pr_info("setend instruction emulation is not supported on the system"); + } + register_cpu_notifier(&insn_cpu_hotplug_notifier); register_insn_emulation_sysctl(ctl_abi); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 192d900c058f..e299de396e9b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -440,7 +440,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, { compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler); compat_ulong_t retcode; - compat_ulong_t spsr = regs->pstate & ~PSR_f; + compat_ulong_t spsr = regs->pstate & ~(PSR_f | COMPAT_PSR_E_BIT); int thumb; /* Check if the handler is written for ARM or Thumb */ @@ -454,6 +454,9 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* The IT state must be cleared for both ARM and Thumb-2 */ spsr &= ~COMPAT_PSR_IT_MASK; + /* Restore the original endianness */ + spsr |= COMPAT_PSR_ENDSTATE; + if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); } else { From 0aaf0dae81b586134faeb52e28b7ad567629dd68 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 23 Jan 2015 05:36:42 +0000 Subject: [PATCH 33/47] smp, ARM64: Kill SMP single function call interrupt Commit 9a46ad6d6df3b54 "smp: make smp_call_function_many() use logic similar to smp_call_function_single()" has unified the way to handle single and multiple cross-CPU function calls. Now only one interrupt is needed for architecture specific code to support generic SMP function call interfaces, so kill the redundant single function call interrupt. Signed-off-by: Jiang Liu Acked-by: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: Arnd Bergmann Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hardirq.h | 2 +- arch/arm64/kernel/smp.c | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index e8a3268a891c..6aae421f4d73 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include #include -#define NR_IPI 6 +#define NR_IPI 5 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 7ae6ee085261..328b8ce4b007 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -65,7 +65,6 @@ struct secondary_data secondary_data; enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_TIMER, IPI_IRQ_WORK, @@ -483,7 +482,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { #define S(x,s) [x] = s S(IPI_RESCHEDULE, "Rescheduling interrupts"), S(IPI_CALL_FUNC, "Function call interrupts"), - S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), @@ -527,7 +525,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } #ifdef CONFIG_IRQ_WORK @@ -585,12 +583,6 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; - case IPI_CALL_FUNC_SINGLE: - irq_enter(); - generic_smp_call_function_single_interrupt(); - irq_exit(); - break; - case IPI_CPU_STOP: irq_enter(); ipi_cpu_stop(cpu); From 33b36543df336d9158e1a763fe97251885f52c5c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 16 Jan 2015 13:52:14 +0000 Subject: [PATCH 34/47] arm64: uapi: expose our struct ucontext to the uapi headers arm64 defines its own ucontext structure which is incompatible with the struct defined (and exposed to userspace by) the asm-generic headers. glibc carries its own struct definition that is compatible with the arm64 definition, but we should expose our format in the uapi headers in case other libraries want to make use of the ucontext pushed as part of an arm64 sigframe. This patch moves the arm64 asm/ucontext.h to the uapi headers, along with the necessary #include of linux/types.h. Cc: Arnd Bergmann Cc: Marcus Shawcroft Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/Kbuild | 1 + arch/arm64/include/{ => uapi}/asm/ucontext.h | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) rename arch/arm64/include/{ => uapi}/asm/ucontext.h (88%) diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild index 942376d37d22..825b0fe51c2b 100644 --- a/arch/arm64/include/uapi/asm/Kbuild +++ b/arch/arm64/include/uapi/asm/Kbuild @@ -18,4 +18,5 @@ header-y += siginfo.h header-y += signal.h header-y += stat.h header-y += statfs.h +header-y += ucontext.h header-y += unistd.h diff --git a/arch/arm64/include/asm/ucontext.h b/arch/arm64/include/uapi/asm/ucontext.h similarity index 88% rename from arch/arm64/include/asm/ucontext.h rename to arch/arm64/include/uapi/asm/ucontext.h index 42e04c877428..791de8e89e35 100644 --- a/arch/arm64/include/asm/ucontext.h +++ b/arch/arm64/include/uapi/asm/ucontext.h @@ -13,8 +13,10 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef __ASM_UCONTEXT_H -#define __ASM_UCONTEXT_H +#ifndef _UAPI__ASM_UCONTEXT_H +#define _UAPI__ASM_UCONTEXT_H + +#include struct ucontext { unsigned long uc_flags; @@ -27,4 +29,4 @@ struct ucontext { struct sigcontext uc_mcontext; }; -#endif /* __ASM_UCONTEXT_H */ +#endif /* _UAPI__ASM_UCONTEXT_H */ From e9fb8b7e4f6af17e3aa4835281e06fdc920341f9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 16:48:36 +0000 Subject: [PATCH 35/47] compat: Declare compat_sys_sigpending and compat_sys_sigprocmask prototypes __ARCH_WANT_SYS_SIGPENDING or __ARCH_WANT_SYS_SIGPROGMASK may be defined for compat support but the corresponding prototypes are missing from linux/compat.h. Signed-off-by: Catalin Marinas Acked-by: Andrew Morton Cc: Arnd Bergmann --- include/linux/compat.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/compat.h b/include/linux/compat.h index 7450ca2ac1fc..ab25814690bc 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -689,6 +689,15 @@ asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, compat_stack_t __user *uoss_ptr); +#ifdef __ARCH_WANT_SYS_SIGPENDING +asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set); +#endif + +#ifdef __ARCH_WANT_SYS_SIGPROCMASK +asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *nset, + compat_old_sigset_t __user *oset); +#endif + int compat_restore_altstack(const compat_stack_t __user *uss); int __compat_save_altstack(compat_stack_t __user *, unsigned long); #define compat_save_altstack_ex(uss, sp) do { \ From 54e45c169dbce43cf46d00eb1521b655b6e4f9e9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 16:52:38 +0000 Subject: [PATCH 36/47] syscalls: Declare sys_*stat64 prototypes if __ARCH_WANT_(COMPAT_)STAT64 Currently, the sys_stat64, sys_fstat64 and sys_lstat64 prototpyes are only declared if BITS_PER_LONG == 32. Following commit 0753f70f07fb (fs: Build sys_stat64() and friends if __ARCH_WANT_COMPAT_STAT64), the implementation of these functions is allowed on 64-bit systems for compat support. The patch changes the condition on the prototype declaration from BITS_PER_LONG == 32 to defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64). In addition, it moves the sys_fstatat64 prototype under the same #if block Signed-off-by: Catalin Marinas Acked-by: Andrew Morton Cc: Arnd Bergmann --- include/linux/syscalls.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 85893d744901..76d1e38aabe1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -410,12 +410,16 @@ asmlinkage long sys_newlstat(const char __user *filename, struct stat __user *statbuf); asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf); asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf); -#if BITS_PER_LONG == 32 +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) asmlinkage long sys_stat64(const char __user *filename, struct stat64 __user *statbuf); asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); asmlinkage long sys_lstat64(const char __user *filename, struct stat64 __user *statbuf); +asmlinkage long sys_fstatat64(int dfd, const char __user *filename, + struct stat64 __user *statbuf, int flag); +#endif +#if BITS_PER_LONG == 32 asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); #endif @@ -771,8 +775,6 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); asmlinkage long sys_newfstatat(int dfd, const char __user *filename, struct stat __user *statbuf, int flag); -asmlinkage long sys_fstatat64(int dfd, const char __user *filename, - struct stat64 __user *statbuf, int flag); asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, int bufsiz); asmlinkage long sys_utimensat(int dfd, const char __user *filename, From 0156411b1828771c09c92f034e7b81f702b84f07 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 16:42:32 +0000 Subject: [PATCH 37/47] arm64: Implement the compat_sys_call_table in C Unlike the sys_call_table[], the compat one was implemented in sys32.S making it impossible to notice discrepancies between the number of compat syscalls and the __NR_compat_syscalls macro, the latter having to be defined in asm/unistd.h as including asm/unistd32.h would cause conflicts on __NR_* definitions. With this patch, incorrect __NR_compat_syscalls values will result in a build-time error. Signed-off-by: Catalin Marinas Suggested-by: Mark Rutland Acked-by: Mark Rutland --- arch/arm64/include/asm/unistd.h | 3 ++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/{sys32.S => entry32.S} | 34 ++++++---------- arch/arm64/kernel/sys32.c | 51 ++++++++++++++++++++++++ 5 files changed, 68 insertions(+), 24 deletions(-) rename arch/arm64/kernel/{sys32.S => entry32.S} (80%) create mode 100644 arch/arm64/kernel/sys32.c diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index b780c6c76eec..159b44fff258 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -48,6 +48,9 @@ #endif #define __ARCH_WANT_SYS_CLONE + +#ifndef __COMPAT_SYSCALL_NR #include +#endif #define NR_syscalls (__NR_syscalls) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 77d3d9568333..3b3b1cd3e7f0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -19,7 +19,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ cpuinfo.o cpu_errata.o alternative.o cacheinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o \ + sys_compat.o entry32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 02e6af117762..cf21bb3bf752 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -433,7 +433,7 @@ el0_svc_compat: /* * AArch32 syscall handling */ - adr stbl, compat_sys_call_table // load compat syscall table pointer + adrp stbl, compat_sys_call_table // load compat syscall table pointer uxtw scno, w7 // syscall number in w7 (r7) mov sc_nr, #__NR_compat_syscalls b el0_svc_naked diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/entry32.S similarity index 80% rename from arch/arm64/kernel/sys32.S rename to arch/arm64/kernel/entry32.S index 423a5b3fc2be..9a8f6ae2530e 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/entry32.S @@ -27,26 +27,26 @@ * System call wrappers for the AArch32 compatibility layer. */ -compat_sys_sigreturn_wrapper: +ENTRY(compat_sys_sigreturn_wrapper) mov x0, sp mov x27, #0 // prevent syscall restart handling (why) b compat_sys_sigreturn ENDPROC(compat_sys_sigreturn_wrapper) -compat_sys_rt_sigreturn_wrapper: +ENTRY(compat_sys_rt_sigreturn_wrapper) mov x0, sp mov x27, #0 // prevent syscall restart handling (why) b compat_sys_rt_sigreturn ENDPROC(compat_sys_rt_sigreturn_wrapper) -compat_sys_statfs64_wrapper: +ENTRY(compat_sys_statfs64_wrapper) mov w3, #84 cmp w1, #88 csel w1, w3, w1, eq b compat_sys_statfs64 ENDPROC(compat_sys_statfs64_wrapper) -compat_sys_fstatfs64_wrapper: +ENTRY(compat_sys_fstatfs64_wrapper) mov w3, #84 cmp w1, #88 csel w1, w3, w1, eq @@ -58,33 +58,33 @@ ENDPROC(compat_sys_fstatfs64_wrapper) * in registers or that take 32-bit parameters which require sign * extension. */ -compat_sys_pread64_wrapper: +ENTRY(compat_sys_pread64_wrapper) regs_to_64 x3, x4, x5 b sys_pread64 ENDPROC(compat_sys_pread64_wrapper) -compat_sys_pwrite64_wrapper: +ENTRY(compat_sys_pwrite64_wrapper) regs_to_64 x3, x4, x5 b sys_pwrite64 ENDPROC(compat_sys_pwrite64_wrapper) -compat_sys_truncate64_wrapper: +ENTRY(compat_sys_truncate64_wrapper) regs_to_64 x1, x2, x3 b sys_truncate ENDPROC(compat_sys_truncate64_wrapper) -compat_sys_ftruncate64_wrapper: +ENTRY(compat_sys_ftruncate64_wrapper) regs_to_64 x1, x2, x3 b sys_ftruncate ENDPROC(compat_sys_ftruncate64_wrapper) -compat_sys_readahead_wrapper: +ENTRY(compat_sys_readahead_wrapper) regs_to_64 x1, x2, x3 mov w2, w4 b sys_readahead ENDPROC(compat_sys_readahead_wrapper) -compat_sys_fadvise64_64_wrapper: +ENTRY(compat_sys_fadvise64_64_wrapper) mov w6, w1 regs_to_64 x1, x2, x3 regs_to_64 x2, x4, x5 @@ -92,24 +92,14 @@ compat_sys_fadvise64_64_wrapper: b sys_fadvise64_64 ENDPROC(compat_sys_fadvise64_64_wrapper) -compat_sys_sync_file_range2_wrapper: +ENTRY(compat_sys_sync_file_range2_wrapper) regs_to_64 x2, x2, x3 regs_to_64 x3, x4, x5 b sys_sync_file_range2 ENDPROC(compat_sys_sync_file_range2_wrapper) -compat_sys_fallocate_wrapper: +ENTRY(compat_sys_fallocate_wrapper) regs_to_64 x2, x2, x3 regs_to_64 x3, x4, x5 b sys_fallocate ENDPROC(compat_sys_fallocate_wrapper) - -#undef __SYSCALL -#define __SYSCALL(x, y) .quad y // x - -/* - * The system calls table must be 4KB aligned. - */ - .align 12 -ENTRY(compat_sys_call_table) -#include diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c new file mode 100644 index 000000000000..2d5ab3c90b82 --- /dev/null +++ b/arch/arm64/kernel/sys32.c @@ -0,0 +1,51 @@ +/* + * arch/arm64/kernel/sys32.c + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software(void); you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * Needed to avoid conflicting __NR_* macros between uapi/asm/unistd.h and + * asm/unistd32.h. + */ +#define __COMPAT_SYSCALL_NR + +#include +#include + +asmlinkage long compat_sys_sigreturn_wrapper(void); +asmlinkage long compat_sys_rt_sigreturn_wrapper(void); +asmlinkage long compat_sys_statfs64_wrapper(void); +asmlinkage long compat_sys_fstatfs64_wrapper(void); +asmlinkage long compat_sys_pread64_wrapper(void); +asmlinkage long compat_sys_pwrite64_wrapper(void); +asmlinkage long compat_sys_truncate64_wrapper(void); +asmlinkage long compat_sys_ftruncate64_wrapper(void); +asmlinkage long compat_sys_readahead_wrapper(void); +asmlinkage long compat_sys_fadvise64_64_wrapper(void); +asmlinkage long compat_sys_sync_file_range2_wrapper(void); +asmlinkage long compat_sys_fallocate_wrapper(void); + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = sym, + +/* + * The sys_call_table array must be 4K aligned to be accessible from + * kernel/entry.S. + */ +void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = { + [0 ... __NR_compat_syscalls - 1] = sys_ni_syscall, +#include +}; From 9648606946b3b0c608846dddb30482b48a6f5c68 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 17:01:56 +0000 Subject: [PATCH 38/47] arm64: Remove asm/syscalls.h This patch moves the sys_rt_sigreturn_wrapper prototype to arch/arm64/kernel/sys.c and removes the asm/syscalls.h header. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/syscalls.h | 30 ------------------------------ arch/arm64/kernel/sys.c | 3 +-- 2 files changed, 1 insertion(+), 32 deletions(-) delete mode 100644 arch/arm64/include/asm/syscalls.h diff --git a/arch/arm64/include/asm/syscalls.h b/arch/arm64/include/asm/syscalls.h deleted file mode 100644 index 48fe7c600e98..000000000000 --- a/arch/arm64/include/asm/syscalls.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_SYSCALLS_H -#define __ASM_SYSCALLS_H - -#include -#include -#include - -/* - * System call wrappers implemented in kernel/entry.S. - */ -asmlinkage long sys_rt_sigreturn_wrapper(void); - -#include - -#endif /* __ASM_SYSCALLS_H */ diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 3fa98ff14f0e..dec351a9f3d6 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -39,10 +39,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, /* * Wrappers to pass the pt_regs argument. */ +asmlinkage long sys_rt_sigreturn_wrapper(void); #define sys_rt_sigreturn sys_rt_sigreturn_wrapper -#include - #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, From c623b33b4e9599c6ac5076f7db7369eb9869aa04 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Jan 2015 11:42:59 +0000 Subject: [PATCH 39/47] arm64: make sys_call_table const As with x86, mark the sys_call_table const such that it will be placed in the .rodata section. This will cause attempts to modify the table (accidental or deliberate) to fail when strict page permissions are in place. In the absence of strict page permissions, there should be no functional change. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index dec351a9f3d6..75151aaf1a52 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -49,7 +49,7 @@ asmlinkage long sys_rt_sigreturn_wrapper(void); * The sys_call_table array must be 4K aligned to be accessible from * kernel/entry.S. */ -void *sys_call_table[__NR_syscalls] __aligned(4096) = { +void * const sys_call_table[__NR_syscalls] __aligned(4096) = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include }; From af3cfdbf56b91785650f54e7c9a899d814b4b9fb Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 26 Jan 2015 18:33:44 +0000 Subject: [PATCH 40/47] arm64: kernel: remove ARM64_CPU_SUSPEND config option ARM64_CPU_SUSPEND config option was introduced to make code providing context save/restore selectable only on platforms requiring power management capabilities. Currently ARM64_CPU_SUSPEND depends on the PM_SLEEP config option which in turn is set by the SUSPEND config option. The introduction of CPU_IDLE for arm64 requires that code configured by ARM64_CPU_SUSPEND (context save/restore) should be compiled in in order to enable the CPU idle driver to rely on CPU operations carrying out context save/restore. The ARM64_CPUIDLE config option (ARM64 generic idle driver) is therefore forced to select ARM64_CPU_SUSPEND, even if there may be (ie PM_SLEEP) failed dependencies, which is not a clean way of handling the kernel configuration option. For these reasons, this patch removes the ARM64_CPU_SUSPEND config option and makes the context save/restore dependent on CPU_PM, which is selected whenever either SUSPEND or CPU_IDLE are configured, cleaning up dependencies in the process. This way, code previously configured through ARM64_CPU_SUSPEND is compiled in whenever a power management subsystem requires it to be present in the kernel (SUSPEND || CPU_IDLE), which is the behaviour expected on ARM64 kernels. The cpu_suspend and cpu_init_idle CPU operations are added only if CPU_IDLE is selected, since they are CPU_IDLE specific methods and should be grouped and defined accordingly. PSCI CPU operations are updated to reflect the introduced changes. Signed-off-by: Lorenzo Pieralisi Cc: Arnd Bergmann Cc: Will Deacon Cc: Krzysztof Kozlowski Cc: Daniel Lezcano Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 3 --- arch/arm64/include/asm/cpu_ops.h | 8 ++++---- arch/arm64/include/asm/cpuidle.h | 6 ++++++ arch/arm64/include/asm/suspend.h | 2 -- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/asm-offsets.c | 2 +- arch/arm64/kernel/cpuidle.c | 20 ++++++++++++++++++++ arch/arm64/kernel/hw_breakpoint.c | 2 +- arch/arm64/kernel/psci.c | 2 -- arch/arm64/kernel/suspend.c | 21 --------------------- arch/arm64/mm/proc.S | 2 +- drivers/cpuidle/Kconfig.arm64 | 1 - drivers/cpuidle/cpuidle-arm64.c | 1 - 13 files changed, 34 insertions(+), 38 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 21a59bf37145..d3f7e4941231 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -642,9 +642,6 @@ source "kernel/power/Kconfig" config ARCH_SUSPEND_POSSIBLE def_bool y -config ARM64_CPU_SUSPEND - def_bool PM_SLEEP - endmenu menu "CPU Power Management" diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 6f8e2ef9094a..da301ee7395c 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -28,8 +28,6 @@ struct device_node; * enable-method property. * @cpu_init: Reads any data necessary for a specific enable-method from the * devicetree, for a given cpu node and proposed logical id. - * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from - * devicetree, for a given cpu node and proposed logical id. * @cpu_prepare: Early one-time preparation step for a cpu. If there is a * mechanism for doing so, tests whether it is possible to boot * the given CPU. @@ -42,6 +40,8 @@ struct device_node; * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. + * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from + * devicetree, for a given cpu node and proposed logical id. * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing * to wrong parameters or error conditions. Called from the * CPU being suspended. Must be called with IRQs disabled. @@ -49,7 +49,6 @@ struct device_node; struct cpu_operations { const char *name; int (*cpu_init)(struct device_node *, unsigned int); - int (*cpu_init_idle)(struct device_node *, unsigned int); int (*cpu_prepare)(unsigned int); int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); @@ -58,7 +57,8 @@ struct cpu_operations { void (*cpu_die)(unsigned int cpu); int (*cpu_kill)(unsigned int cpu); #endif -#ifdef CONFIG_ARM64_CPU_SUSPEND +#ifdef CONFIG_CPU_IDLE + int (*cpu_init_idle)(struct device_node *, unsigned int); int (*cpu_suspend)(unsigned long); #endif }; diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h index b52a9932e2b1..0710654631e7 100644 --- a/arch/arm64/include/asm/cpuidle.h +++ b/arch/arm64/include/asm/cpuidle.h @@ -3,11 +3,17 @@ #ifdef CONFIG_CPU_IDLE extern int cpu_init_idle(unsigned int cpu); +extern int cpu_suspend(unsigned long arg); #else static inline int cpu_init_idle(unsigned int cpu) { return -EOPNOTSUPP; } + +static inline int cpu_suspend(unsigned long arg) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 456d67c1f0fa..003802f58963 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -23,6 +23,4 @@ struct sleep_save_sp { extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); extern void cpu_resume(void); -extern int cpu_suspend(unsigned long); - #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 3b3b1cd3e7f0..bef04afd6031 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -27,7 +27,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o -arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o +arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9a9fce090d58..a2ae19403abb 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -152,7 +152,7 @@ int main(void) DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); #endif -#ifdef CONFIG_ARM64_CPU_SUSPEND +#ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 19d17f51db37..5c0896647fd1 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -29,3 +29,23 @@ int cpu_init_idle(unsigned int cpu) of_node_put(cpu_node); return ret; } + +/** + * cpu_suspend() - function to enter a low-power idle state + * @arg: argument to pass to CPU suspend operations + * + * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU + * operations back-end error code otherwise. + */ +int cpu_suspend(unsigned long arg) +{ + int cpu = smp_processor_id(); + + /* + * If cpu_ops have not been registered or suspend + * has not been initialized, cpu_suspend call fails early. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) + return -EOPNOTSUPP; + return cpu_ops[cpu]->cpu_suspend(arg); +} diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index df1cf15377b4..98bbe06e469c 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -894,7 +894,7 @@ static struct notifier_block hw_breakpoint_reset_nb = { .notifier_call = hw_breakpoint_reset_notify, }; -#ifdef CONFIG_ARM64_CPU_SUSPEND +#ifdef CONFIG_CPU_PM extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); #else static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index f1dbca7d5c96..3425f311c49e 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -540,8 +540,6 @@ const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE .cpu_init_idle = cpu_psci_cpu_init_idle, -#endif -#ifdef CONFIG_ARM64_CPU_SUSPEND .cpu_suspend = cpu_psci_cpu_suspend, #endif #ifdef CONFIG_SMP diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 2d6b6065fe7f..d7daf45ae7a2 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -51,26 +50,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) hw_breakpoint_restore = hw_bp_restore; } -/** - * cpu_suspend() - function to enter a low-power state - * @arg: argument to pass to CPU suspend operations - * - * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU - * operations back-end error code otherwise. - */ -int cpu_suspend(unsigned long arg) -{ - int cpu = smp_processor_id(); - - /* - * If cpu_ops have not been registered or suspend - * has not been initialized, cpu_suspend call fails early. - */ - if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) - return -EOPNOTSUPP; - return cpu_ops[cpu]->cpu_suspend(arg); -} - /* * __cpu_suspend * diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b98fc8ce6a16..28eebfb6af76 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -102,7 +102,7 @@ ENTRY(cpu_do_idle) ret ENDPROC(cpu_do_idle) -#ifdef CONFIG_ARM64_CPU_SUSPEND +#ifdef CONFIG_CPU_PM /** * cpu_do_suspend - save CPU registers context * diff --git a/drivers/cpuidle/Kconfig.arm64 b/drivers/cpuidle/Kconfig.arm64 index d0a08ed1b2ee..6effb3656735 100644 --- a/drivers/cpuidle/Kconfig.arm64 +++ b/drivers/cpuidle/Kconfig.arm64 @@ -4,7 +4,6 @@ config ARM64_CPUIDLE bool "Generic ARM64 CPU idle Driver" - select ARM64_CPU_SUSPEND select DT_IDLE_STATES help Select this to enable generic cpuidle driver for ARM64. diff --git a/drivers/cpuidle/cpuidle-arm64.c b/drivers/cpuidle/cpuidle-arm64.c index 80704b931ba4..39a2c62716c3 100644 --- a/drivers/cpuidle/cpuidle-arm64.c +++ b/drivers/cpuidle/cpuidle-arm64.c @@ -19,7 +19,6 @@ #include #include -#include #include "dt_idle_states.h" From 62fa5e20bd73385253d2b8d3a51f80d1e6d2e1c6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 27 Jan 2015 18:25:25 +0000 Subject: [PATCH 41/47] arm64: Enable CPU_IDLE in defconfig This patch enables CPU_IDLE and the generic arm64 cpuidle driver (ARM64_CPUIDLE). Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5376d908eabe..66b6cacc3251 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -45,6 +45,8 @@ CONFIG_CMA=y CONFIG_CMDLINE="console=ttyAMA0" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y +CONFIG_CPU_IDLE=y +CONFIG_ARM64_CPUIDLE=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y From 523d6e9fae9333a0e2a7baf4d11c8bcca544790e Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Tue, 9 Dec 2014 07:26:47 +0000 Subject: [PATCH 42/47] arm64:mm: free the useless initial page table For 64K page system, after mapping a PMD section, the corresponding initial page table is not needed any more. That page can be freed. Signed-off-by: Zhichang Yuan [catalin.marinas@arm.com: added BUG_ON() to catch late memblock freeing] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 3 +++ arch/arm64/mm/mmu.c | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 59079248529d..67f6ede39474 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -342,9 +342,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #ifdef CONFIG_ARM64_64K_PAGES #define pud_sect(pud) (0) +#define pud_table(pud) (1) #else #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT) +#define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ + PUD_TYPE_TABLE) #endif static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 155cbb0a74b6..eb293febfb56 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -153,8 +153,14 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for previous table entries created during * boot (__create_page_tables) and flush them. */ - if (!pmd_none(old_pmd)) + if (!pmd_none(old_pmd)) { flush_tlb_all(); + if (pmd_table(old_pmd)) { + phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); + BUG_ON(alloc != early_alloc); + memblock_free(table, PAGE_SIZE); + } + } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), prot, alloc); @@ -209,9 +215,12 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, * Look up the old pmd table and free it. */ if (!pud_none(old_pud)) { - phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); - memblock_free(table, PAGE_SIZE); flush_tlb_all(); + if (pud_table(old_pud)) { + phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); + BUG_ON(alloc != early_alloc); + memblock_free(table, PAGE_SIZE); + } } } else { alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); From a3bba370c227b6cbc480aa8959bb32cf966d79a6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 27 Jan 2015 16:52:50 +0000 Subject: [PATCH 43/47] arm64: drop unnecessary cache+tlb maintenance In paging_init, we call flush_cache_all, but this is backed by Set/Way operations which may not achieve anything in the presence of cache line migration and/or system caches. If the caches are already in an inconsistent state at this point, there is nothing we can do (short of flushing the entire physical address space by VA) to empty architected and system caches. As such, flush_cache_all only serves to mask other potential bugs. Hence, this patch removes the boot-time call to flush_cache_all. Immediately after the cache maintenance we flush the TLBs, but this is also unnecessary. Before enabling the MMU, the TLBs are invalidated, and thus are initially clean. When changing the contents of active tables (e.g. in fixup_executable() for DEBUG_RODATA) we perform the required TLB maintenance following the update, and therefore no additional maintenance is required to ensure the new table entries are in effect. Since activating the MMU we will not have modified system register fields permitted to be cached in a TLB, and therefore do not need maintenance for any cached system register fields. Hence, the TLB flush is unnecessary. Shortly after the unnecessary TLB flush, we update TTBR0 to point to an empty zero page rather than the idmap, and flush the TLBs. This maintenance is necessary to remove the global idmap entries from the TLBs (as they would conflict with userspace mappings), and is retained. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Acked-by: Steve Capper Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index eb293febfb56..a421f535d351 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -438,13 +438,6 @@ void __init paging_init(void) map_mem(); fixup_executable(); - /* - * Finally flush the caches and tlb to ensure that we're in a - * consistent state. - */ - flush_cache_all(); - flush_tlb_all(); - /* allocate the zero page. */ zero_page = early_alloc(PAGE_SIZE); From a1c76574f345342d23836b520ce44674d23bc267 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 27 Jan 2015 16:36:30 +0000 Subject: [PATCH 44/47] arm64: mm: use *_sect to check for section maps The {pgd,pud,pmd}_bad family of macros have slightly fuzzy cross-architecture semantics, and seem to imply a populated entry that is not a next-level table, rather than a particular type of entry (e.g. a section map). In arm64 code, for those cases where we care about whether an entry is a section mapping, we can instead use the {pud,pmd}_sect macros to explicitly check for this case. This helps to document precisely what we care about, making the code easier to read, and allows for future relaxation of the *_bad macros to check for other "bad" entries. To that end this patch updates the table dumping and initial table setup to check for section mappings with {pud,pmd}_sect, and adds/restores BUG_ON(*_bad((*p)) checks after we've handled the *_sect and *_none cases so as to catch remaining "bad" cases. In the fault handling code, show_pte is left with *_bad checks as it only cares about whether it can walk the next level table, and this path is used for both kernel and userspace fault handling. The former case will be followed by a die() where we'll report the address that triggered the fault, which can be useful context for debugging. Signed-off-by: Mark Rutland Acked-by: Steve Capper Cc: Ard Biesheuvel Cc: Kees Cook Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/dump.c | 18 ++++++++++++------ arch/arm64/mm/mmu.c | 6 ++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index bbc5a29ecaaf..612965375473 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -249,10 +249,12 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { addr = start + i * PMD_SIZE; - if (pmd_none(*pmd) || pmd_sect(*pmd) || pmd_bad(*pmd)) + if (pmd_none(*pmd) || pmd_sect(*pmd)) { note_page(st, addr, 3, pmd_val(*pmd)); - else + } else { + BUG_ON(pmd_bad(*pmd)); walk_pte(st, pmd, addr); + } } } @@ -264,10 +266,12 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) for (i = 0; i < PTRS_PER_PUD; i++, pud++) { addr = start + i * PUD_SIZE; - if (pud_none(*pud) || pud_sect(*pud) || pud_bad(*pud)) + if (pud_none(*pud) || pud_sect(*pud)) { note_page(st, addr, 2, pud_val(*pud)); - else + } else { + BUG_ON(pud_bad(*pud)); walk_pmd(st, pud, addr); + } } } @@ -279,10 +283,12 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long st for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { addr = start + i * PGDIR_SIZE; - if (pgd_none(*pgd) || pgd_bad(*pgd)) + if (pgd_none(*pgd)) { note_page(st, addr, 1, pgd_val(*pgd)); - else + } else { + BUG_ON(pgd_bad(*pgd)); walk_pud(st, pgd, addr); + } } } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a421f535d351..2eeac10aa0cf 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -90,13 +90,14 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, { pte_t *pte; - if (pmd_none(*pmd) || pmd_bad(*pmd)) { + if (pmd_none(*pmd) || pmd_sect(*pmd)) { pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); if (pmd_sect(*pmd)) split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); flush_tlb_all(); } + BUG_ON(pmd_bad(*pmd)); pte = pte_offset_kernel(pmd, addr); do { @@ -128,7 +129,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, /* * Check for initial section mappings in the pgd/pud and remove them. */ - if (pud_none(*pud) || pud_bad(*pud)) { + if (pud_none(*pud) || pud_sect(*pud)) { pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); if (pud_sect(*pud)) { /* @@ -140,6 +141,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, pud_populate(mm, pud, pmd); flush_tlb_all(); } + BUG_ON(pud_bad(*pud)); pmd = pmd_offset(pud, addr); do { From 6917c857e3ab5bc5e15d2b1ff34dc2443ccf5b0d Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Thu, 29 Jan 2015 16:24:43 +0000 Subject: [PATCH 45/47] arm64: Avoid breakage caused by .altmacro in fpsimd save/restore macros Alternate macro mode is not a property of a macro definition, but a gas runtime state that alters the way macros are expanded for ever after (until .noaltmacro is seen). This means that subsequent assembly code that calls other macros can break if fpsimdmacros.h is included. Since these instruction sequences are simple (if dull -- but in a good way), this patch solves the problem by simply expanding the .irp loops. The pre-existing fpsimd_{save,restore} macros weren't rolled with .irp anyway and the sequences affected are short, so this change restores consistency at little cost. Signed-off-by: Dave Martin Acked-by: Marc Zyngier Acked-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimdmacros.h | 43 ++++++++++++++++++++------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 007618b8188c..a2daf1293028 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -76,7 +76,6 @@ fpsimd_restore_fpcr x\tmpnr, \state .endm -.altmacro .macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2 mrs x\tmpnr1, fpsr str w\numnr, [\state, #8] @@ -86,11 +85,22 @@ add \state, \state, x\numnr, lsl #4 sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1 br x\tmpnr1 - .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 - .irp qb, %(qa + 1) - stp q\qa, q\qb, [\state, # -16 * \qa - 16] - .endr - .endr + stp q30, q31, [\state, #-16 * 30 - 16] + stp q28, q29, [\state, #-16 * 28 - 16] + stp q26, q27, [\state, #-16 * 26 - 16] + stp q24, q25, [\state, #-16 * 24 - 16] + stp q22, q23, [\state, #-16 * 22 - 16] + stp q20, q21, [\state, #-16 * 20 - 16] + stp q18, q19, [\state, #-16 * 18 - 16] + stp q16, q17, [\state, #-16 * 16 - 16] + stp q14, q15, [\state, #-16 * 14 - 16] + stp q12, q13, [\state, #-16 * 12 - 16] + stp q10, q11, [\state, #-16 * 10 - 16] + stp q8, q9, [\state, #-16 * 8 - 16] + stp q6, q7, [\state, #-16 * 6 - 16] + stp q4, q5, [\state, #-16 * 4 - 16] + stp q2, q3, [\state, #-16 * 2 - 16] + stp q0, q1, [\state, #-16 * 0 - 16] 0: .endm @@ -103,10 +113,21 @@ add \state, \state, x\tmpnr2, lsl #4 sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1 br x\tmpnr1 - .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 - .irp qb, %(qa + 1) - ldp q\qa, q\qb, [\state, # -16 * \qa - 16] - .endr - .endr + ldp q30, q31, [\state, #-16 * 30 - 16] + ldp q28, q29, [\state, #-16 * 28 - 16] + ldp q26, q27, [\state, #-16 * 26 - 16] + ldp q24, q25, [\state, #-16 * 24 - 16] + ldp q22, q23, [\state, #-16 * 22 - 16] + ldp q20, q21, [\state, #-16 * 20 - 16] + ldp q18, q19, [\state, #-16 * 18 - 16] + ldp q16, q17, [\state, #-16 * 16 - 16] + ldp q14, q15, [\state, #-16 * 14 - 16] + ldp q12, q13, [\state, #-16 * 12 - 16] + ldp q10, q11, [\state, #-16 * 10 - 16] + ldp q8, q9, [\state, #-16 * 8 - 16] + ldp q6, q7, [\state, #-16 * 6 - 16] + ldp q4, q5, [\state, #-16 * 4 - 16] + ldp q2, q3, [\state, #-16 * 2 - 16] + ldp q0, q1, [\state, #-16 * 0 - 16] 0: .endm From 41089357e1874559458f672b9591436ffd3a12e9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 29 Jan 2015 17:33:35 +0000 Subject: [PATCH 46/47] arm64: Fix section mismatch on alloc_init_p[mu]d() Commit 523d6e9fae93 (arm64:mm: free the useless initial page table) introduced a BUG_ON checking for the allocation type but it was referring the early_alloc() function in the __init section. This patch changes the check to slab_is_available() and also relaxes the BUG to a WARN_ON_ONCE. Reported-by: Will Deacon Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2eeac10aa0cf..c6daaf6c6f97 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -159,8 +160,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, flush_tlb_all(); if (pmd_table(old_pmd)) { phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); - BUG_ON(alloc != early_alloc); - memblock_free(table, PAGE_SIZE); + if (!WARN_ON_ONCE(slab_is_available())) + memblock_free(table, PAGE_SIZE); } } } else { @@ -220,8 +221,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, flush_tlb_all(); if (pud_table(old_pud)) { phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); - BUG_ON(alloc != early_alloc); - memblock_free(table, PAGE_SIZE); + if (!WARN_ON_ONCE(slab_is_available())) + memblock_free(table, PAGE_SIZE); } } } else { From d476d94f180af3f0fca77394651d4a98f4df1c54 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 2 Feb 2015 16:44:39 +0000 Subject: [PATCH 47/47] arm64: compat: Remove incorrect comment in compat_siginfo The comment was right originally but the _pad array size was wrong. It was fixed in the meantime but the comment not updated. Reported-by: Peter Maydell Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/compat.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 3fb053fa6e98..7fbed6919b54 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -161,7 +161,6 @@ typedef struct compat_siginfo { int si_code; union { - /* The padding is the same size as AArch64. */ int _pad[128/sizeof(int) - 3]; /* kill() */