From 61d2d1808b20da4a45a90b4bd61ae92f729bab78 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 20 Sep 2022 14:47:31 +0100 Subject: [PATCH 1/4] arm64: mm: don't acquire mutex when rewriting swapper Since commit: 47546a1912fc4a03 ("arm64: mm: install KPTI nG mappings with MMU enabled)" ... when building with CONFIG_DEBUG_ATOMIC_SLEEP=y and booting under QEMU TCG with '-cpu max', there's a boot-time splat: | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 | in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 15, name: migration/0 | preempt_count: 1, expected: 0 | RCU nest depth: 0, expected: 0 | no locks held by migration/0/15. | irq event stamp: 28 | hardirqs last enabled at (27): [] _raw_spin_unlock_irq+0x3c/0x7c | hardirqs last disabled at (28): [] multi_cpu_stop+0x150/0x18c | softirqs last enabled at (0): [] copy_process+0x594/0x1964 | softirqs last disabled at (0): [<0000000000000000>] 0x0 | CPU: 0 PID: 15 Comm: migration/0 Not tainted 6.0.0-rc3-00002-g419b42ff7eef #3 | Hardware name: linux,dummy-virt (DT) | Stopper: multi_cpu_stop+0x0/0x18c <- stop_cpus.constprop.0+0xa0/0xfc | Call trace: | dump_backtrace.part.0+0xd0/0xe0 | show_stack+0x1c/0x5c | dump_stack_lvl+0x88/0xb4 | dump_stack+0x1c/0x38 | __might_resched+0x180/0x230 | __might_sleep+0x4c/0xa0 | __mutex_lock+0x5c/0x450 | mutex_lock_nested+0x30/0x40 | create_kpti_ng_temp_pgd+0x4fc/0x6d0 | kpti_install_ng_mappings+0x2b8/0x3b0 | cpu_enable_non_boot_scope_capabilities+0x7c/0xd0 | multi_cpu_stop+0xa0/0x18c | cpu_stopper_thread+0x88/0x11c | smpboot_thread_fn+0x1ec/0x290 | kthread+0x118/0x120 | ret_from_fork+0x10/0x20 Since commit: ee017ee353506fce ("arm64/mm: avoid fixmap race condition when create pud mapping") ... once the kernel leave the SYSTEM_BOOTING state, the fixmap pagetable entries are protected by the fixmap_lock mutex. The new KPTI rewrite code uses __create_pgd_mapping() to create a temporary pagetable. This happens in atomic context, after secondary CPUs are brought up and the kernel has left the SYSTEM_BOOTING state. Hence we try to acquire a mutex in atomic context, which is generally unsound (though benign in this case as the mutex should be free and all other CPUs are quiescent). This patch avoids the issue by pulling the mutex out of alloc_init_pud() and calling it at a higher level in the pagetable manipulation code. This allows it to be used without locking where one CPU is known to be in exclusive control of the machine, even after having left the SYSTEM_BOOTING state. Fixes: 47546a1912fc ("arm64: mm: install KPTI nG mappings with MMU enabled") Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220920134731.1625740-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e7ad44585f40..eb489302c28a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -331,12 +331,6 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } BUG_ON(p4d_bad(p4d)); - /* - * No need for locking during early boot. And it doesn't work as - * expected with KASLR enabled. - */ - if (system_state != SYSTEM_BOOTING) - mutex_lock(&fixmap_lock); pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -368,15 +362,13 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); - if (system_state != SYSTEM_BOOTING) - mutex_unlock(&fixmap_lock); } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), - int flags) +static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) { unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); @@ -400,8 +392,20 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgdp++, addr = next, addr != end); } +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) +{ + mutex_lock(&fixmap_lock); + __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, + pgtable_alloc, flags); + mutex_unlock(&fixmap_lock); +} + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -extern __alias(__create_pgd_mapping) +extern __alias(__create_pgd_mapping_locked) void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(int), int flags); From d4955c0ad77dbc684fc716387070ac24801b8bca Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Fri, 16 Sep 2022 23:17:07 +0300 Subject: [PATCH 2/4] arm64: topology: fix possible overflow in amu_fie_setup() cpufreq_get_hw_max_freq() returns max frequency in kHz as *unsigned int*, while freq_inv_set_max_ratio() gets passed this frequency in Hz as 'u64'. Multiplying max frequency by 1000 can potentially result in overflow -- multiplying by 1000ULL instead should avoid that... Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: cd0ed03a8903 ("arm64: use activity monitors for frequency invariance") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/01493d64-2bce-d968-86dc-11a122a9c07d@omp.ru Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index ad2bfc794257..44ebf5b2fc4b 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -237,7 +237,7 @@ static void amu_fie_setup(const struct cpumask *cpus) for_each_cpu(cpu, cpus) { if (!freq_counters_valid(cpu) || freq_inv_set_max_ratio(cpu, - cpufreq_get_hw_max_freq(cpu) * 1000, + cpufreq_get_hw_max_freq(cpu) * 1000ULL, arch_timer_get_rate())) return; } From 05d6f6d346fea2fa4580a0c2b6be207456bebb08 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Mon, 8 Aug 2022 12:54:55 -0700 Subject: [PATCH 3/4] perf/arm-cmn: Add more bits to child node address offset field CMN-600 uses bits [27:0] for child node address offset while bits [30:28] are required to be zero. For CMN-650, the child node address offset field has been increased to include bits [29:0] while leaving only bit 30 set to zero. Let's include the missing two bits and assume older implementations comply with the spec and set bits [29:28] to 0. Signed-off-by: Ilkka Koskinen Fixes: 60d1504070c2 ("perf/arm-cmn: Support new IP features") Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20220808195455.79277-1-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 80d8309652a4..b80a9b74662b 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -36,7 +36,7 @@ #define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0) #define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16) -#define CMN_CHILD_NODE_ADDR GENMASK(27, 0) +#define CMN_CHILD_NODE_ADDR GENMASK(29, 0) #define CMN_CHILD_NODE_EXTERNAL BIT(31) #define CMN_MAX_DIMENSION 12 From 13b0566962914e167cb3238fbe29ced618f07a27 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 22 Sep 2022 22:57:15 +0100 Subject: [PATCH 4/4] vmlinux.lds.h: CFI: Reduce alignment of jump-table to function alignment Due to undocumented, hysterical raisins on x86, the CFI jump-table sections in .text are needlessly aligned to PMD_SIZE in the vmlinux linker script. When compiling a CFI-enabled arm64 kernel with a 64KiB page-size, a PMD maps 512MiB of virtual memory and so the .text section increases to a whopping 940MiB and blows the final Image up to 960MiB. Others report a link failure. Since the CFI jump-table requires only instruction alignment, reduce the alignment directives to function alignment for parity with other parts of the .text section. This reduces the size of the .text section for the aforementioned 64KiB page size arm64 kernel to 19MiB for a much more reasonable total Image size of 39MiB. Cc: Sami Tolvanen Cc: Mark Rutland Cc: "Mohan Rao .vanimina" Cc: Kees Cook Cc: Nathan Chancellor Cc: Link: https://lore.kernel.org/all/CAL_GTzigiNOMYkOPX1KDnagPhJtFNqSK=1USNbS0wUL4PW6-Uw@mail.gmail.com/ Fixes: cf68fffb66d6 ("add support for Clang CFI") Reviewed-by: Mark Rutland Tested-by: Mark Rutland Reviewed-by: Sami Tolvanen Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20220922215715.13345-1-will@kernel.org Signed-off-by: Will Deacon --- include/asm-generic/vmlinux.lds.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7515a465ec03..7c90b1ab3e00 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -543,10 +543,9 @@ */ #ifdef CONFIG_CFI_CLANG #define TEXT_CFI_JT \ - . = ALIGN(PMD_SIZE); \ + ALIGN_FUNCTION(); \ __cfi_jt_start = .; \ *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) \ - . = ALIGN(PMD_SIZE); \ __cfi_jt_end = .; #else #define TEXT_CFI_JT