From d132443a73d7a131775df46f33000f67ed92de1e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:43 +0000 Subject: [PATCH 01/43] powerpc/kasan: Fix error detection on memory allocation In case (k_start & PAGE_MASK) doesn't equal (kstart), 'va' will never be NULL allthough 'block' is NULL Check the return of memblock_alloc() directly instead of the resulting address in the loop. Fixes: 509cd3f2b473 ("powerpc/32: Simplify KASAN init") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7cb8ca82042bfc45a5cfe726c921cd7e7eeb12a3.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/kasan/kasan_init_32.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index cbcad369fcb2..8b15fe09b967 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -76,15 +76,14 @@ static int __init kasan_init_region(void *start, size_t size) return ret; block = memblock_alloc(k_end - k_start, PAGE_SIZE); + if (!block) + return -ENOMEM; for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_ptr_k(k_cur); void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); - if (!va) - return -ENOMEM; - __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); } flush_tlb_kernel_range(k_start, k_end); From 3a66a24f6060e6775f8c02ac52329ea0152d7e58 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:44 +0000 Subject: [PATCH 02/43] powerpc/kasan: Fix issues by lowering KASAN_SHADOW_END At the time being, KASAN_SHADOW_END is 0x100000000, which is 0 in 32 bits representation. This leads to a couple of issues: - kasan_remap_early_shadow_ro() does nothing because the comparison k_cur < k_end is always false. - In ptdump, address comparison for markers display fails and the marker's name is printed at the start of the KASAN area instead of being printed at the end. However, there is no need to shadow the KASAN shadow area itself, so the KASAN shadow area can stop shadowing memory at the start of itself. With a PAGE_OFFSET set to 0xc0000000, KASAN shadow area is then going from 0xf8000000 to 0xff000000. Fixes: cbd18991e24f ("powerpc/mm: Fix an Oops in kasan_mmu_init()") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ae1a3c0d19a37410c209c3fc453634cfcc0ee318.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kasan.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index fbff9ff9032e..fc900937f653 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -23,9 +23,7 @@ #define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) -#define KASAN_SHADOW_END 0UL - -#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) +#define KASAN_SHADOW_END (-(-KASAN_SHADOW_START >> KASAN_SHADOW_SCALE_SHIFT)) #ifdef CONFIG_KASAN void kasan_early_init(void); From d2a91cef9bbdeb87b7449fdab1a6be6000930210 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:45 +0000 Subject: [PATCH 03/43] powerpc/kasan: Fix shadow pages allocation failure Doing kasan pages allocation in MMU_init is too early, kernel doesn't have access yet to the entire memory space and memblock_alloc() fails when the kernel is a bit big. Do it from kasan_init() instead. Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c24163ee5d5f8cdf52fefa45055ceb35435b8f15.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kasan.h | 2 -- arch/powerpc/mm/init_32.c | 2 -- arch/powerpc/mm/kasan/kasan_init_32.c | 4 +++- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index fc900937f653..4769bbf7173a 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -27,12 +27,10 @@ #ifdef CONFIG_KASAN void kasan_early_init(void); -void kasan_mmu_init(void); void kasan_init(void); void kasan_late_init(void); #else static inline void kasan_init(void) { } -static inline void kasan_mmu_init(void) { } static inline void kasan_late_init(void) { } #endif diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 872df48ae41b..a6991ef8727d 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -170,8 +170,6 @@ void __init MMU_init(void) btext_unmap(); #endif - kasan_mmu_init(); - setup_kup(); /* Shortly after that, the entire linear mapping will be available */ diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 8b15fe09b967..b7c287adfd59 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -131,7 +131,7 @@ static void __init kasan_unmap_early_shadow_vmalloc(void) flush_tlb_kernel_range(k_start, k_end); } -void __init kasan_mmu_init(void) +static void __init kasan_mmu_init(void) { int ret; struct memblock_region *reg; @@ -159,6 +159,8 @@ void __init kasan_mmu_init(void) void __init kasan_init(void) { + kasan_mmu_init(); + kasan_remap_early_shadow_ro(); clear_page(kasan_early_shadow_page); From 7c31c05e00fc5ff2067332c5f80e525573e7269c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:46 +0000 Subject: [PATCH 04/43] powerpc/kasan: Remove unnecessary page table locking Commit 45ff3c559585 ("powerpc/kasan: Fix parallel loading of modules.") added spinlocks to manage parallele module loading. Since then commit 47febbeeec44 ("powerpc/32: Force KASAN_VMALLOC for modules") converted the module loading to KASAN_VMALLOC. The spinlocking has then become unneeded and can be removed to simplify kasan_init_shadow_page_tables() Also remove inclusion of linux/moduleloader.h and linux/vmalloc.h which are not needed anymore since the removal of modules management. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/81a4d3aee8b82bc1355595935c8f4ad9d3b22a83.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/kasan/kasan_init_32.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index b7c287adfd59..91e2ade75192 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -5,9 +5,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -34,31 +32,22 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned { pmd_t *pmd; unsigned long k_cur, k_next; - pte_t *new = NULL; pmd = pmd_ptr_k(k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { + pte_t *new; + k_next = pgd_addr_end(k_cur, k_end); if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) continue; - if (!new) - new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); + new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); if (!new) return -ENOMEM; kasan_populate_pte(new, PAGE_KERNEL); - - smp_wmb(); /* See comment in __pte_alloc */ - - spin_lock(&init_mm.page_table_lock); - /* Has another populated it ? */ - if (likely((void *)pmd_page_vaddr(*pmd) == kasan_early_shadow_pte)) { - pmd_populate_kernel(&init_mm, pmd, new); - new = NULL; - } - spin_unlock(&init_mm.page_table_lock); + pmd_populate_kernel(&init_mm, pmd, new); } return 0; } From 7dec42ab57f2f59feba82abf0353164479bfde4c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:47 +0000 Subject: [PATCH 05/43] powerpc/kasan: Refactor update of early shadow mappings kasan_remap_early_shadow_ro() and kasan_unmap_early_shadow_vmalloc() are both updating the early shadow mapping: the first one sets the mapping read-only while the other clears the mapping. Refactor and create kasan_update_early_region() Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8c496c0828de2608c7c940c45525d177e91b6f1b.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/kasan/kasan_init_32.c | 39 +++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 91e2ade75192..10481d904fea 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -79,45 +79,42 @@ static int __init kasan_init_region(void *start, size_t size) return 0; } -static void __init kasan_remap_early_shadow_ro(void) +static void __init +kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { - pgprot_t prot = kasan_prot_ro(); - unsigned long k_start = KASAN_SHADOW_START; - unsigned long k_end = KASAN_SHADOW_END; unsigned long k_cur; phys_addr_t pa = __pa(kasan_early_shadow_page); - kasan_populate_pte(kasan_early_shadow_pte, prot); - - for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_ptr_k(k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) continue; - __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + __set_pte_at(&init_mm, k_cur, ptep, pte, 0); } - flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); + + flush_tlb_kernel_range(k_start, k_end); +} + +static void __init kasan_remap_early_shadow_ro(void) +{ + pgprot_t prot = kasan_prot_ro(); + phys_addr_t pa = __pa(kasan_early_shadow_page); + + kasan_populate_pte(kasan_early_shadow_pte, prot); + + kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END, + pfn_pte(PHYS_PFN(pa), prot)); } static void __init kasan_unmap_early_shadow_vmalloc(void) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START); unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); - unsigned long k_cur; - phys_addr_t pa = __pa(kasan_early_shadow_page); - for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); - pte_t *ptep = pte_offset_kernel(pmd, k_cur); - - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) - continue; - - __set_pte_at(&init_mm, k_cur, ptep, __pte(0), 0); - } - flush_tlb_kernel_range(k_start, k_end); + kasan_update_early_region(k_start, k_end, __pte(0)); } static void __init kasan_mmu_init(void) From ec97d022f621c6c850aec46d8818b49c6aae95ad Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:48 +0000 Subject: [PATCH 06/43] powerpc/kasan: Declare kasan_init_region() weak In order to alloc sub-arches to alloc KASAN regions using optimised methods (Huge pages on 8xx, BATs on BOOK3S, ...), declare kasan_init_region() weak. Also make kasan_init_shadow_page_tables() accessible from outside, so that it can be called from the specific kasan_init_region() functions if needed. And populate remaining KASAN address space only once performed the region mapping, to allow 8xx to allocate hugepd instead of standard page tables for mapping via 8M hugepages. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/3c1ce419fa1b5a4171b92d7fb16455ca17e1b96d.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kasan.h | 3 +++ arch/powerpc/mm/kasan/kasan_init_32.c | 21 +++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 4769bbf7173a..107a24c3f7b3 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -34,5 +34,8 @@ static inline void kasan_init(void) { } static inline void kasan_late_init(void) { } #endif +int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end); +int kasan_init_region(void *start, size_t size); + #endif /* __ASSEMBLY */ #endif diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 10481d904fea..76d418af4ce8 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -28,7 +28,7 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); } -static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) { pmd_t *pmd; unsigned long k_cur, k_next; @@ -52,7 +52,7 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned return 0; } -static int __init kasan_init_region(void *start, size_t size) +int __init __weak kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); @@ -122,14 +122,6 @@ static void __init kasan_mmu_init(void) int ret; struct memblock_region *reg; - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || - IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); - - if (ret) - panic("kasan: kasan_init_shadow_page_tables() failed"); - } - for_each_memblock(memory, reg) { phys_addr_t base = reg->base; phys_addr_t top = min(base + reg->size, total_lowmem); @@ -141,6 +133,15 @@ static void __init kasan_mmu_init(void) if (ret) panic("kasan: kasan_init_region() failed"); } + + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || + IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } + } void __init kasan_init(void) From 3af4786eb429b2df76cbd7ce3bae21467ac3e4fb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:51 +0000 Subject: [PATCH 07/43] powerpc/ptdump: Add _PAGE_COHERENT flag For platforms using shared.c (4xx, Book3e, Book3s/32), also handle the _PAGE_COHERENT flag which corresponds to the M bit of the WIMG flags. Signed-off-by: Christophe Leroy [mpe: Make it more verbose, use "coherent" rather than "m"] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/324c3d860717e8e91fca3bb6c0f8b23e1644a404.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/shared.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index f7ed2f187cb0..784f8df17f73 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -30,6 +30,11 @@ static const struct flag_info flag_array[] = { .val = _PAGE_PRESENT, .set = "present", .clear = " ", + }, { + .mask = _PAGE_COHERENT, + .val = _PAGE_COHERENT, + .set = "coherent", + .clear = " ", }, { .mask = _PAGE_GUARDED, .val = _PAGE_GUARDED, From 6b30830e2003d9d77696084ebe2fc19dbe7d6f70 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:52 +0000 Subject: [PATCH 08/43] powerpc/ptdump: Display size of BATs Display the size of areas mapped with BATs. For that, the size display for pages is refactorised. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/acf764eee231f0358e66ca9e819f052804055acc.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/bats.c | 4 ++++ arch/powerpc/mm/ptdump/ptdump.c | 23 ++++++++++++++--------- arch/powerpc/mm/ptdump/ptdump.h | 3 +++ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index d3a5d6b318d1..d6c660f63d71 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -10,6 +10,8 @@ #include #include +#include "ptdump.h" + static char *pp_601(int k, int pp) { if (pp == 0) @@ -42,6 +44,7 @@ static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) #else seq_printf(m, "0x%08x ", pbn); #endif + pt_dump_size(m, size); seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp)); @@ -88,6 +91,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool #else seq_printf(m, "0x%08x ", brpn); #endif + pt_dump_size(m, size); if (k == 1) seq_puts(m, "User "); diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index d92bb8ea229c..1f97668853e3 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -112,6 +112,19 @@ static struct addr_marker address_markers[] = { seq_putc(m, c); \ }) +void pt_dump_size(struct seq_file *m, unsigned long size) +{ + static const char units[] = "KMGTPE"; + const char *unit = units; + + /* Work out what appropriate unit to use */ + while (!(size & 1023) && unit[1]) { + size >>= 10; + unit++; + } + pt_dump_seq_printf(m, "%9lu%c ", size, *unit); +} + static void dump_flag_info(struct pg_state *st, const struct flag_info *flag, u64 pte, int num) { @@ -146,8 +159,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info static void dump_addr(struct pg_state *st, unsigned long addr) { - static const char units[] = "KMGTPE"; - const char *unit = units; unsigned long delta; #ifdef CONFIG_PPC64 @@ -164,13 +175,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr) pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; } - /* Work out what appropriate unit to use */ - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit); - + pt_dump_size(st->seq, delta); } static void note_prot_wx(struct pg_state *st, unsigned long addr) diff --git a/arch/powerpc/mm/ptdump/ptdump.h b/arch/powerpc/mm/ptdump/ptdump.h index 5d513636de73..154efae96ae0 100644 --- a/arch/powerpc/mm/ptdump/ptdump.h +++ b/arch/powerpc/mm/ptdump/ptdump.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include struct flag_info { u64 mask; @@ -17,3 +18,5 @@ struct pgtable_level { }; extern struct pgtable_level pg_level[5]; + +void pt_dump_size(struct seq_file *m, unsigned long delta); From 8961a2a5353cca5451f648f4838cd848a3b2354c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:53 +0000 Subject: [PATCH 09/43] powerpc/ptdump: Standardise display of BAT flags Display BAT flags the same way as page flags: rwx and wimg Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a07585f353c167b8db9597d83f992a5cb4fbf4c4.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/bats.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index d6c660f63d71..cebb58c7e289 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -15,12 +15,12 @@ static char *pp_601(int k, int pp) { if (pp == 0) - return k ? "NA" : "RWX"; + return k ? " " : "rwx"; if (pp == 1) - return k ? "ROX" : "RWX"; + return k ? "r x" : "rwx"; if (pp == 2) - return k ? "RWX" : "RWX"; - return k ? "ROX" : "ROX"; + return "rwx"; + return "r x"; } static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) @@ -48,12 +48,9 @@ static void bat_show_601(struct seq_file *m, int idx, u32 lower, u32 upper) seq_printf(m, "Kernel %s User %s", pp_601(k & 2, pp), pp_601(k & 1, pp)); - if (lower & _PAGE_WRITETHRU) - seq_puts(m, "write through "); - if (lower & _PAGE_NO_CACHE) - seq_puts(m, "no cache "); - if (lower & _PAGE_COHERENT) - seq_puts(m, "coherent "); + seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); + seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); + seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); seq_puts(m, "\n"); } @@ -101,20 +98,16 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool seq_puts(m, "Kernel/User "); if (lower & BPP_RX) - seq_puts(m, is_d ? "RO " : "EXEC "); + seq_puts(m, is_d ? "r " : " x "); else if (lower & BPP_RW) - seq_puts(m, is_d ? "RW " : "EXEC "); + seq_puts(m, is_d ? "rw " : " x "); else - seq_puts(m, is_d ? "NA " : "NX "); + seq_puts(m, is_d ? " " : " "); - if (lower & _PAGE_WRITETHRU) - seq_puts(m, "write through "); - if (lower & _PAGE_NO_CACHE) - seq_puts(m, "no cache "); - if (lower & _PAGE_COHERENT) - seq_puts(m, "coherent "); - if (lower & _PAGE_GUARDED) - seq_puts(m, "guarded "); + seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " "); + seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " "); + seq_puts(m, lower & _PAGE_COHERENT ? "m " : " "); + seq_puts(m, lower & _PAGE_GUARDED ? "g " : " "); seq_puts(m, "\n"); } From b00ff6d8c1c3898b0f768cbb38ef722d25bd2f39 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:54 +0000 Subject: [PATCH 10/43] powerpc/ptdump: Properly handle non standard page size In order to properly display information regardless of the page size, it is necessary to take into account real page size. Fixes: cabe8138b23c ("powerpc: dump as a single line areas mapping a single physical page.") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a53b2a0ffd042a8d85464bf90d55bc5b970e00a1.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/ptdump.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 1f97668853e3..98d82dcf6f0b 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -60,6 +60,7 @@ struct pg_state { unsigned long start_address; unsigned long start_pa; unsigned long last_pa; + unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -168,9 +169,9 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { + if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = PAGE_SIZE >> 10; + delta = st->page_size >> 10; } else { pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; @@ -195,7 +196,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val) + unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; u64 pa = val & PTE_RPN_MASK; @@ -207,6 +208,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: @@ -218,7 +220,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ } else if (flag != st->current_flags || level != st->level || addr >= st->marker[1].start_address || - (pa != st->last_pa + PAGE_SIZE && + (pa != st->last_pa + st->page_size && (pa != st->start_pa || st->start_pa != st->last_pa))) { /* Check the PTE flags */ @@ -246,6 +248,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; st->current_flags = flag; st->level = level; } else { @@ -261,7 +264,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) for (i = 0; i < PTRS_PER_PTE; i++, pte++) { addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte)); + note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE); } } @@ -278,7 +281,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) /* pmd exists */ walk_pte(st, pmd, addr); else - note_page(st, addr, 3, pmd_val(*pmd)); + note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE); } } @@ -294,7 +297,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) /* pud exists */ walk_pmd(st, pud, addr); else - note_page(st, addr, 2, pud_val(*pud)); + note_page(st, addr, 2, pud_val(*pud), PUD_SIZE); } } @@ -313,7 +316,7 @@ static void walk_pagetables(struct pg_state *st) /* pgd exists */ walk_pud(st, pgd, addr); else - note_page(st, addr, 1, pgd_val(*pgd)); + note_page(st, addr, 1, pgd_val(*pgd), PGDIR_SIZE); } } @@ -368,7 +371,7 @@ static int ptdump_show(struct seq_file *m, void *v) /* Traverse kernel page tables */ walk_pagetables(&st); - note_page(&st, 0, 0, 0); + note_page(&st, 0, 0, 0, 0); return 0; } From 6b789a26d7da2e0256d199da980369ef8fb49ec6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:55 +0000 Subject: [PATCH 11/43] powerpc/ptdump: Handle hugepd at PGD level The 8xx is about to map kernel linear space and IMMR using huge pages. In order to display those pages properly, ptdump needs to handle hugepd tables at PGD level. For the time being do it only at PGD level. Further patches may add handling of hugepd tables at lower level for other platforms when needed in the future. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/630728289158dcfeb06b14d40ed7c4c4e7148cf1.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/ptdump.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 98d82dcf6f0b..5fc880e30175 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -269,6 +270,26 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) } } +static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start, + int pdshift, int level) +{ +#ifdef CONFIG_ARCH_HAS_HUGEPD + unsigned int i; + int shift = hugepd_shift(*phpd); + int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1; + + if (start & ((1 << shift) - 1)) + return; + + for (i = 0; i < ptrs_per_hpd; i++) { + unsigned long addr = start + (i << shift); + pte_t *pte = hugepte_offset(*phpd, addr, pdshift); + + note_page(st, addr, level + 1, pte_val(*pte), 1 << shift); + } +#endif +} + static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) { pmd_t *pmd = pmd_offset(pud, 0); @@ -312,11 +333,13 @@ static void walk_pagetables(struct pg_state *st) * the hash pagetable. */ for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { - if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) + if (pgd_none(*pgd) || pgd_is_leaf(*pgd)) + note_page(st, addr, 1, pgd_val(*pgd), PGDIR_SIZE); + else if (is_hugepd(__hugepd(pgd_val(*pgd)))) + walk_hugepd(st, (hugepd_t *)pgd, addr, PGDIR_SHIFT, 1); + else /* pgd exists */ walk_pud(st, pgd, addr); - else - note_page(st, addr, 1, pgd_val(*pgd), PGDIR_SIZE); } } From 4b19f96a81bceaf0bcf44d79c0855c61158065ec Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:56 +0000 Subject: [PATCH 12/43] powerpc/32s: Don't warn when mapping RO data ROX. Mapping RO data as ROX is not an issue since that data cannot be modified to introduce an exploit. PPC64 accepts to have RO data mapped ROX, as a trade off between kernel size and strictness of protection. On PPC32, kernel size is even more critical as amount of memory is usually small. Depending on the number of available IBATs, the last IBATs might overflow the end of text. Only warn if it crosses the end of RO data. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6499f8eeb2a36330e5c9fc1cee9a79374875bd54.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 39ba53ca5bb5..a9b2cbc74797 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -187,6 +187,7 @@ void mmu_mark_initmem_nx(void) int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) @@ -201,9 +202,10 @@ void mmu_mark_initmem_nx(void) size = block_size(base, top); size = max(size, 128UL << 10); if ((top - base) > size) { - if (strict_kernel_rwx_enabled()) - pr_warn("Kernel _etext not properly aligned\n"); size <<= 1; + if (strict_kernel_rwx_enabled() && base + size > border) + pr_warn("Some RW data is getting mapped X. " + "Adjust CONFIG_DATA_SHIFT to avoid that.\n"); } setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; From 925ac141d106b55acbe112a9272f970631a3c082 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:58 +0000 Subject: [PATCH 13/43] powerpc/mm: Allocate static page tables for fixmap Allocate static page tables for the fixmap area. This allows setting mappings through page tables before memblock is ready. That's needed to use early_ioremap() early and to use standard page mappings with fixmap. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4f4b1412d34de6801b8e925cb88fc69d056ff536.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/fixmap.h | 4 ++++ arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/mm/pgtable_32.c | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 2ef155a3c821..ccbe2e83c950 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -86,6 +86,10 @@ enum fixed_addresses { #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) +#define FIXMAP_ALIGNED_SIZE (ALIGN(FIXADDR_TOP, PGDIR_SIZE) - \ + ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE)) +#define FIXMAP_PTE_SIZE (FIXMAP_ALIGNED_SIZE / PGDIR_SIZE * PTE_TABLE_SIZE) + #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG #define FIXMAP_PAGE_IO PAGE_KERNEL_NCG diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 15f0a7c84944..d642e42eabb1 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -80,7 +80,7 @@ notrace void __init machine_init(u64 dt_ptr) /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); - early_ioremap_setup(); + early_ioremap_init(); /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f62de06e3d07..9934659cb871 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -29,11 +29,27 @@ #include #include #include +#include #include extern char etext[], _stext[], _sinittext[], _einittext[]; +static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data; + +notrace void __init early_ioremap_init(void) +{ + unsigned long addr = ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE); + pte_t *ptep = (pte_t *)early_fixmap_pagetable; + pmd_t *pmdp = pmd_ptr_k(addr); + + for (; (s32)(FIXADDR_TOP - addr) > 0; + addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++) + pmd_populate_kernel(&init_mm, pmdp, ptep); + + early_ioremap_setup(); +} + static void __init *early_alloc_pgtable(unsigned long size) { void *ptr = memblock_alloc(size, size); From 4e3319c23a66dabfd6c35f4d2633d64d99b68096 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:48:59 +0000 Subject: [PATCH 14/43] powerpc/mm: Fix conditions to perform MMU specific management by blocks on PPC32. Setting init mem to NX shall depend on sinittext being mapped by block, not on stext being mapped by block. Setting text and rodata to RO shall depend on stext being mapped by block, not on sinittext being mapped by block. Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7d565fb8f51b18a3d98445a830b2f6548cb2da2a.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/pgtable_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 9934659cb871..bd0cb6e3573e 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -185,7 +185,7 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_stext + 1)) + if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else change_page_attr(page, numpages, PAGE_KERNEL); @@ -197,7 +197,7 @@ void mark_rodata_ro(void) struct page *page; unsigned long numpages; - if (v_block_mapped((unsigned long)_sinittext)) { + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); ptdump_check_wx(); return; From fadaac67c9007cad9fc485e36dcc54460d6d5886 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:00 +0000 Subject: [PATCH 15/43] powerpc/mm: PTE_ATOMIC_UPDATES is only for 40x Only 40x still uses PTE_ATOMIC_UPDATES. 40x cannot not select CONFIG_PTE64_BIT. Drop handling of PTE_ATOMIC_UPDATES: - In nohash/64 - In nohash/32 for CONFIG_PTE_64BIT Keep PTE_ATOMIC_UPDATES only for nohash/32 for !CONFIG_PTE_64BIT Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d6f8e1f46583f1842de24581a68b0496feb15516.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 17 ------------ arch/powerpc/include/asm/nohash/64/pgtable.h | 28 +------------------- 2 files changed, 1 insertion(+), 44 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 4315d40906a0..7e908a176e9e 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -262,25 +262,8 @@ static inline unsigned long long pte_update(pte_t *p, unsigned long clr, unsigned long set) { -#ifdef PTE_ATOMIC_UPDATES - unsigned long long old; - unsigned long tmp; - - __asm__ __volatile__("\ -1: lwarx %L0,0,%4\n\ - lwzx %0,0,%3\n\ - andc %1,%L0,%5\n\ - or %1,%1,%6\n" - PPC405_ERR77(0,%3) -" stwcx. %1,0,%4\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p) - : "cc" ); -#else /* PTE_ATOMIC_UPDATES */ unsigned long long old = pte_val(*p); *p = __pte((old & ~(unsigned long long)clr) | set); -#endif /* !PTE_ATOMIC_UPDATES */ #ifdef CONFIG_44x if ((old & _PAGE_USER) && (old & _PAGE_EXEC)) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 9a33b8bd842d..9c703b140d64 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -211,22 +211,9 @@ static inline unsigned long pte_update(struct mm_struct *mm, unsigned long set, int huge) { -#ifdef PTE_ATOMIC_UPDATES - unsigned long old, tmp; - - __asm__ __volatile__( - "1: ldarx %0,0,%3 # pte_update\n\ - andc %1,%0,%4 \n\ - or %1,%1,%6\n\ - stdcx. %1,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*ptep) - : "r" (ptep), "r" (clr), "m" (*ptep), "r" (set) - : "cc" ); -#else unsigned long old = pte_val(*ptep); *ptep = __pte((old & ~clr) | set); -#endif + /* huge pages use the old page table lock */ if (!huge) assert_pte_locked(mm, addr); @@ -310,21 +297,8 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long bits = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); -#ifdef PTE_ATOMIC_UPDATES - unsigned long old, tmp; - - __asm__ __volatile__( - "1: ldarx %0,0,%4\n\ - or %0,%3,%0\n\ - stdcx. %0,0,%4\n\ - bne- 1b" - :"=&r" (old), "=&r" (tmp), "=m" (*ptep) - :"r" (bits), "r" (ptep), "m" (*ptep) - :"cc"); -#else unsigned long old = pte_val(*ptep); *ptep = __pte(old | bits); -#endif flush_tlb_page(vma, address); } From 2db99aeb63dd6e8808dc054d181c4d0e8645bbe0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:01 +0000 Subject: [PATCH 16/43] powerpc/mm: Refactor pte_update() on nohash/32 When CONFIG_PTE_64BIT is set, pte_update() operates on 'unsigned long long' When CONFIG_PTE_64BIT is not set, pte_update() operates on 'unsigned long' In asm/page.h, we have pte_basic_t which is 'unsigned long long' when CONFIG_PTE_64BIT is set and 'unsigned long' otherwise. Refactor pte_update() using pte_basic_t. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/590d67994a2847cd9fe088f7d974499e3a18b6ac.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 26 +++----------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 7e908a176e9e..db17f50d6ac3 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -222,12 +222,9 @@ static inline void pmd_clear(pmd_t *pmdp) * to properly flush the virtually tagged instruction cache of * those implementations. */ -#ifndef CONFIG_PTE_64BIT -static inline unsigned long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) +static inline pte_basic_t pte_update(pte_t *p, unsigned long clr, unsigned long set) { -#ifdef PTE_ATOMIC_UPDATES +#if defined(PTE_ATOMIC_UPDATES) && !defined(CONFIG_PTE_64BIT) unsigned long old, tmp; __asm__ __volatile__("\ @@ -241,8 +238,8 @@ static inline unsigned long pte_update(pte_t *p, : "r" (p), "r" (clr), "r" (set), "m" (*p) : "cc" ); #else /* PTE_ATOMIC_UPDATES */ - unsigned long old = pte_val(*p); - unsigned long new = (old & ~clr) | set; + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) p->pte = p->pte1 = p->pte2 = p->pte3 = new; @@ -257,21 +254,6 @@ static inline unsigned long pte_update(pte_t *p, #endif return old; } -#else /* CONFIG_PTE_64BIT */ -static inline unsigned long long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) -{ - unsigned long long old = pte_val(*p); - *p = __pte((old & ~(unsigned long long)clr) | set); - -#ifdef CONFIG_44x - if ((old & _PAGE_USER) && (old & _PAGE_EXEC)) - icache_44x_need_flush = 1; -#endif - return old; -} -#endif /* CONFIG_PTE_64BIT */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) From 1c1bf294882bd12669e39ccd7680c4ce34b7c15c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:02 +0000 Subject: [PATCH 17/43] powerpc/mm: Refactor pte_update() on book3s/32 When CONFIG_PTE_64BIT is set, pte_update() operates on 'unsigned long long' When CONFIG_PTE_64BIT is not set, pte_update() operates on 'unsigned long' In asm/page.h, we have pte_basic_t which is 'unsigned long long' when CONFIG_PTE_64BIT is set and 'unsigned long' otherwise. Refactor pte_update() using pte_basic_t. While we are at it, drop the comment on 44x which is not applicable to book3s version of pte_update(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c78912bc8613fb249c3d80aeb1062796b5c49400.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 58 +++++++------------- 1 file changed, 20 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 0d4bccb4b9f2..d2fc324cdf07 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -253,53 +253,35 @@ extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, * and the PTE may be either 32 or 64 bit wide. In the later case, * when using atomic updates, only the low part of the PTE is * accessed atomically. - * - * In addition, on 44x, we also maintain a global flag indicating - * that an executable user mapping was modified, which is needed - * to properly flush the virtually tagged instruction cache of - * those implementations. */ -#ifndef CONFIG_PTE_64BIT -static inline unsigned long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) +static inline pte_basic_t pte_update(pte_t *p, unsigned long clr, unsigned long set) { - unsigned long old, tmp; - - __asm__ __volatile__("\ -1: lwarx %0,0,%3\n\ - andc %1,%0,%4\n\ - or %1,%1,%5\n" -" stwcx. %1,0,%3\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" (clr), "r" (set), "m" (*p) - : "cc" ); - - return old; -} -#else /* CONFIG_PTE_64BIT */ -static inline unsigned long long pte_update(pte_t *p, - unsigned long clr, - unsigned long set) -{ - unsigned long long old; + pte_basic_t old; unsigned long tmp; - __asm__ __volatile__("\ -1: lwarx %L0,0,%4\n\ - lwzx %0,0,%3\n\ - andc %1,%L0,%5\n\ - or %1,%1,%6\n" -" stwcx. %1,0,%4\n\ - bne- 1b" + __asm__ __volatile__( +#ifndef CONFIG_PTE_64BIT +"1: lwarx %0, 0, %3\n" +" andc %1, %0, %4\n" +#else +"1: lwarx %L0, 0, %3\n" +" lwz %0, -4(%3)\n" +" andc %1, %L0, %4\n" +#endif +" or %1, %1, %5\n" +" stwcx. %1, 0, %3\n" +" bne- 1b" : "=&r" (old), "=&r" (tmp), "=m" (*p) - : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p) +#ifndef CONFIG_PTE_64BIT + : "r" (p), +#else + : "b" ((unsigned long)(p) + 4), +#endif + "r" (clr), "r" (set), "m" (*p) : "cc" ); return old; } -#endif /* CONFIG_PTE_64BIT */ /* * 2.6 calls this without flushing the TLB entry; this is wrong From c7fa77016eb6093df38fdabdb7a89bb9617e7185 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:03 +0000 Subject: [PATCH 18/43] powerpc/mm: Standardise __ptep_test_and_clear_young() params between PPC32 and PPC64 On PPC32, __ptep_test_and_clear_young() takes the mm->context.id In preparation of standardising pte_update() params between PPC32 and PPC64, __ptep_test_and_clear_young() need mm instead of mm->context.id Replace context param by mm. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0a65470e50a14373b7c2291184514aa982462255.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 7 ++++--- arch/powerpc/include/asm/nohash/32/pgtable.h | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index d2fc324cdf07..25c59511fcab 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -288,18 +288,19 @@ static inline pte_basic_t pte_update(pte_t *p, unsigned long clr, unsigned long * for our hash-based implementation, we fix that up here. */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old; old = pte_update(ptep, _PAGE_ACCESSED, 0); if (old & _PAGE_HASHPTE) { unsigned long ptephys = __pa(ptep) & PAGE_MASK; - flush_hash_pages(context, addr, ptephys, 1); + flush_hash_pages(mm->context.id, addr, ptephys, 1); } return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ - __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep) + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index db17f50d6ac3..e963e6880d7c 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -256,14 +256,15 @@ static inline pte_basic_t pte_update(pte_t *p, unsigned long clr, unsigned long } #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old; old = pte_update(ptep, _PAGE_ACCESSED, 0); return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ - __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep) + __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, From 06f52524870122fb43b214d27e8f4546da36f8ba Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:04 +0000 Subject: [PATCH 19/43] powerpc/mm: Standardise pte_update() prototype between PPC32 and PPC64 PPC64 takes 3 additional parameters compared to PPC32: - mm - address - huge These 3 parameters will be needed in order to perform different action depending on the page size on the 8xx. Make pte_update() prototype identical for PPC32 and PPC64. This allows dropping an #ifdef in huge_ptep_get_and_clear(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/38111acf6841047a8addde37c63e92d611ee38c2.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 15 ++++++++------- arch/powerpc/include/asm/hugetlb.h | 4 ---- arch/powerpc/include/asm/nohash/32/pgtable.h | 13 +++++++------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 25c59511fcab..8a091d125f2d 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -218,7 +218,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #define pte_clear(mm, addr, ptep) \ - do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0) + do { pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0); } while (0) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) @@ -254,7 +254,8 @@ extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, * when using atomic updates, only the low part of the PTE is * accessed atomically. */ -static inline pte_basic_t pte_update(pte_t *p, unsigned long clr, unsigned long set) +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) { pte_basic_t old; unsigned long tmp; @@ -292,7 +293,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long old; - old = pte_update(ptep, _PAGE_ACCESSED, 0); + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); if (old & _PAGE_HASHPTE) { unsigned long ptephys = __pa(ptep) & PAGE_MASK; flush_hash_pages(mm->context.id, addr, ptephys, 1); @@ -306,14 +307,14 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0)); + return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(ptep, _PAGE_RW, 0); + pte_update(mm, addr, ptep, _PAGE_RW, 0, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -324,7 +325,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - pte_update(ptep, 0, set); + pte_update(vma->vm_mm, address, ptep, 0, set, 0); flush_tlb_page(vma, address); } @@ -522,7 +523,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) | (pte_val(pte) & ~_PAGE_HASHPTE)); else - pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); + pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0); #elif defined(CONFIG_PTE_64BIT) /* Second case is 32-bit with 64-bit PTE. In this case, we diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index bd6504c28c2f..e4276af034e9 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -40,11 +40,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_PPC64 return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1)); -#else - return __pte(pte_update(ptep, ~0UL, 0)); -#endif } #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index e963e6880d7c..474dd1db065f 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -166,7 +166,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #ifndef __ASSEMBLY__ #define pte_clear(mm, addr, ptep) \ - do { pte_update(ptep, ~0, 0); } while (0) + do { pte_update(mm, addr, ptep, ~0, 0, 0); } while (0) #ifndef pte_mkwrite static inline pte_t pte_mkwrite(pte_t pte) @@ -222,7 +222,8 @@ static inline void pmd_clear(pmd_t *pmdp) * to properly flush the virtually tagged instruction cache of * those implementations. */ -static inline pte_basic_t pte_update(pte_t *p, unsigned long clr, unsigned long set) +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) { #if defined(PTE_ATOMIC_UPDATES) && !defined(CONFIG_PTE_64BIT) unsigned long old, tmp; @@ -260,7 +261,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long old; - old = pte_update(ptep, _PAGE_ACCESSED, 0); + old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; } #define ptep_test_and_clear_young(__vma, __addr, __ptep) \ @@ -270,7 +271,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return __pte(pte_update(ptep, ~0, 0)); + return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT @@ -280,7 +281,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0))); unsigned long set = pte_val(pte_wrprotect(__pte(0))); - pte_update(ptep, clr, set); + pte_update(mm, addr, ptep, clr, set, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -293,7 +294,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long set = pte_val(entry) & pte_val(pte_set); unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr); - pte_update(ptep, clr, set); + pte_update(vma->vm_mm, address, ptep, clr, set, 0); flush_tlb_page(vma, address); } From 6ad41bfbc907be0cd414f09fa5382d2133376595 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:05 +0000 Subject: [PATCH 20/43] powerpc/mm: Create a dedicated pte_update() for 8xx pte_update() is a bit special for the 8xx. At the time being, that's an #ifdef inside the nohash/32 pte_update(). As we are going to make it even more special in the coming patches, create a dedicated version for pte_update() for 8xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a103be0099ac2360f8c44f4a1a63cc03713a1360.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 29 +++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 474dd1db065f..5fb3f6798e22 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -221,7 +221,31 @@ static inline void pmd_clear(pmd_t *pmdp) * that an executable user mapping was modified, which is needed * to properly flush the virtually tagged instruction cache of * those implementations. + * + * On the 8xx, the page tables are a bit special. For 16k pages, we have + * 4 identical entries. For other page sizes, we have a single entry in the + * table. */ +#ifdef CONFIG_PPC_8xx +static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, + unsigned long clr, unsigned long set, int huge) +{ + pte_basic_t *entry = &p->pte; + pte_basic_t old = pte_val(*p); + pte_basic_t new = (old & ~(pte_basic_t)clr) | set; + int num, i; + + if (!huge) + num = PAGE_SIZE / SZ_4K; + else + num = 1; + + for (i = 0; i < num; i++, entry++) + *entry = new; + + return old; +} +#else static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) { @@ -242,11 +266,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) - p->pte = p->pte1 = p->pte2 = p->pte3 = new; -#else *p = __pte(new); -#endif #endif /* !PTE_ATOMIC_UPDATES */ #ifdef CONFIG_44x @@ -255,6 +275,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p #endif return old; } +#endif #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int __ptep_test_and_clear_young(struct mm_struct *mm, From b12c07a4bb064c0a8db7554557b89d40f57c936f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:06 +0000 Subject: [PATCH 21/43] powerpc/mm: Reduce hugepd size for 8M hugepages on 8xx Commit 55c8fc3f4930 ("powerpc/8xx: reintroduce 16K pages with HW assistance") redefined pte_t as a struct of 4 pte_basic_t, because in 16K pages mode there are four identical entries in the page table. But hugepd entries for 8M pages require only one entry of size pte_basic_t. So there is no point in creating a cache for 4 entries page tables. Calculate PTE_T_ORDER using the size of pte_basic_t instead of pte_t. Define specific huge_pte helpers (set_huge_pte_at(), huge_pte_clear(), huge_ptep_set_wrprotect()) to write the pte in a single entry instead of using set_pte_at() which writes 4 identical entries in 16k pages mode. Also make sure that __ptep_set_access_flags() properly handle the huge_pte case. Define set_pte_filter() inline otherwise GCC doesn't inline it anymore because it is now used twice, and that gives a pretty suboptimal code because of pte_t being a struct of 4 entries. Those functions are also used for 512k pages which only require one entry as well allthough replicating it four times was harmless as 512k pages entries are spread every 128 bytes in the table. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/43050d1a0c2d6e1541cab9c1126fc80bc7015ebd.1589866984.git.christophe.leroy@csgroup.eu --- .../include/asm/nohash/32/hugetlb-8xx.h | 20 ++++++++++++++ arch/powerpc/include/asm/nohash/32/pgtable.h | 3 ++- arch/powerpc/mm/hugetlbpage.c | 3 ++- arch/powerpc/mm/pgtable.c | 26 ++++++++++++++++--- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index a46616937d20..785437323576 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -41,4 +41,24 @@ static inline int check_and_get_huge_psize(int shift) return shift_to_mmu_psize(shift); } +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + +#define __HAVE_ARCH_HUGE_PTE_CLEAR +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) +{ + pte_update(mm, addr, ptep, ~0UL, 0, 1); +} + +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0))); + unsigned long set = pte_val(pte_wrprotect(__pte(0))); + + pte_update(mm, addr, ptep, clr, set, 1); +} + #endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 5fb3f6798e22..ff78bf25f832 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -314,8 +314,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0))))); unsigned long set = pte_val(entry) & pte_val(pte_set); unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr); + int huge = psize > mmu_virtual_psize ? 1 : 0; - pte_update(vma->vm_mm, address, ptep, clr, set, 0); + pte_update(vma->vm_mm, address, ptep, clr, set, huge); flush_tlb_page(vma, address); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index d06efb946c7d..521929a371af 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -30,7 +30,8 @@ bool hugetlb_disabled = false; #define hugepd_none(hpd) (hpd_val(hpd) == 0) -#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) +#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \ + __builtin_ffs(sizeof(void *))) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index e3759b69f81b..214a5f4beb6c 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -100,7 +100,7 @@ static pte_t set_pte_filter_hash(pte_t pte) { return pte; } * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so * instead we "filter out" the exec permission for non clean pages. */ -static pte_t set_pte_filter(pte_t pte) +static inline pte_t set_pte_filter(pte_t pte) { struct page *pg; @@ -249,16 +249,34 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, #else /* - * Not used on non book3s64 platforms. But 8xx - * can possibly use tsize derived from hstate. + * Not used on non book3s64 platforms. + * 8xx compares it with mmu_virtual_psize to + * know if it is a huge page or not. */ - psize = 0; + psize = MMU_PAGE_COUNT; #endif __ptep_set_access_flags(vma, ptep, pte, addr, psize); } return changed; #endif } + +#if defined(CONFIG_PPC_8xx) +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + /* + * Make sure hardware valid bit is not set. We don't do + * tlb flush for this update. + */ + VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); + + pte = pte_mkpte(pte); + + pte = set_pte_filter(pte); + + ptep->pte = pte_val(pte); +} +#endif #endif /* CONFIG_HUGETLB_PAGE */ #ifdef CONFIG_DEBUG_VM From d3efcd38c0b99162d889e36a30425345a18edb33 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:07 +0000 Subject: [PATCH 22/43] powerpc/8xx: Drop CONFIG_8xx_COPYBACK option CONFIG_8xx_COPYBACK was there to help disabling copyback cache mode for debuging hardware. But nobody will design new boards with 8xx now. All 8xx platforms select it, so make it the default and remove the option. Also remove the Mx_RESETVAL values which are pretty useless and hide the real value while reading code. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bcc968cda075516eb76e2f25e09821f582c566b4.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/configs/adder875_defconfig | 1 - arch/powerpc/configs/ep88xc_defconfig | 1 - arch/powerpc/configs/mpc866_ads_defconfig | 1 - arch/powerpc/configs/mpc885_ads_defconfig | 1 - arch/powerpc/configs/tqm8xx_defconfig | 1 - arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 2 -- arch/powerpc/kernel/head_8xx.S | 15 +-------------- arch/powerpc/platforms/8xx/Kconfig | 9 --------- 8 files changed, 1 insertion(+), 30 deletions(-) diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index f55e23cb176c..5326bc739279 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -10,7 +10,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_PPC_ADDER875=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_1000=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index 0e2e5e81a359..f5c3e72da719 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -12,7 +12,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_PPC_EP88XC=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 5320735395e7..5c56d36cdfc5 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -12,7 +12,6 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MPC86XADS=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_1000=y CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 82a008c04eae..949ff9ccda5e 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -11,7 +11,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y -CONFIG_8xx_COPYBACK=y CONFIG_GEN_RTC=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index eda8bfb2d0a3..77857d513022 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -15,7 +15,6 @@ CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_TQM8XX=y -CONFIG_8xx_COPYBACK=y # CONFIG_8xx_CPU15 is not set CONFIG_GEN_RTC=y CONFIG_HZ_100=y diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 76af5b0cb16e..26b7cee34dfe 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -19,7 +19,6 @@ #define MI_RSV4I 0x08000000 /* Reserve 4 TLB entries */ #define MI_PPCS 0x02000000 /* Use MI_RPN prob/priv state */ #define MI_IDXMASK 0x00001f00 /* TLB index to be loaded */ -#define MI_RESETVAL 0x00000000 /* Value of register at reset */ /* These are the Ks and Kp from the PowerPC books. For proper operation, * Ks = 0, Kp = 1. @@ -95,7 +94,6 @@ #define MD_TWAM 0x04000000 /* Use 4K page hardware assist */ #define MD_PPCS 0x02000000 /* Use MI_RPN prob/priv state */ #define MD_IDXMASK 0x00001f00 /* TLB index to be loaded */ -#define MD_RESETVAL 0x04000000 /* Value of register at reset */ #define SPRN_M_CASID 793 /* Address space ID (context) to match */ #define MC_ASIDMASK 0x0000000f /* Bits used for ASID value */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 073a651787df..905205c79a25 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -779,10 +779,7 @@ start_here: initial_mmu: li r8, 0 mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ - lis r10, MD_RESETVAL@h -#ifndef CONFIG_8xx_COPYBACK - oris r10, r10, MD_WTDEF@h -#endif + lis r10, MD_TWAM@h mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ tlbia /* Invalidate all TLB entries */ @@ -857,17 +854,7 @@ initial_mmu: mtspr SPRN_DC_CST, r8 lis r8, IDC_ENABLE@h mtspr SPRN_IC_CST, r8 -#ifdef CONFIG_8xx_COPYBACK mtspr SPRN_DC_CST, r8 -#else - /* For a debug option, I left this here to easily enable - * the write through cache mode - */ - lis r8, DC_SFWT@h - mtspr SPRN_DC_CST, r8 - lis r8, IDC_ENABLE@h - mtspr SPRN_DC_CST, r8 -#endif /* Disable debug mode entry on breakpoints */ mfspr r8, SPRN_DER #ifdef CONFIG_PERF_EVENTS diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index e0fe670f06f6..b37de62d7e7f 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -98,15 +98,6 @@ menu "MPC8xx CPM Options" # 8xx specific questions. comment "Generic MPC8xx Options" -config 8xx_COPYBACK - bool "Copy-Back Data Cache (else Writethrough)" - help - Saying Y here will cause the cache on an MPC8xx processor to be used - in Copy-Back mode. If you say N here, it is used in Writethrough - mode. - - If in doubt, say Y here. - config 8xx_GPIO bool "GPIO API Support" select GPIOLIB From a891c43b97d315ee5f9fe8e797d3d48fc351e053 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:08 +0000 Subject: [PATCH 23/43] powerpc/8xx: Prepare handlers for _PAGE_HUGE for 512k pages. Prepare ITLB handler to handle _PAGE_HUGE when CONFIG_HUGETLBFS is enabled. This means that the L1 entry has to be kept in r11 until L2 entry is read, in order to insert _PAGE_HUGE into it. Also move pgd_offset helpers before pte_update() as they will be needed there in next patch. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/21fd1de8fba781bededa9474a5a9374aefb1f849.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 13 +++++------ arch/powerpc/kernel/head_8xx.S | 23 +++++++++++--------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ff78bf25f832..9a287a95acad 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -206,6 +206,12 @@ static inline void pmd_clear(pmd_t *pmdp) } +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* to find an entry in a page-table-directory */ +#define pgd_index(address) ((address) >> PGDIR_SHIFT) +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) /* * PTE updates. This function is called whenever an existing @@ -348,13 +354,6 @@ static inline int pte_young(pte_t pte) pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT)) #endif -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) - -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - /* Find an entry in the third-level page table.. */ #define pte_index(address) \ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 905205c79a25..adad8baadcf5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -196,7 +196,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mtspr SPRN_SPRG_SCRATCH1, r11 #endif @@ -235,16 +235,19 @@ InstructionTLBMiss: rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: -#endif - lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ - mtspr SPRN_MI_TWC, r10 /* Set segment attributes */ - - mtspr SPRN_MD_TWC, r10 - mfspr r10, SPRN_MD_TWC - lwz r10, 0(r10) /* Get the pte */ -#ifdef ITLB_MISS_KERNEL mtcr r11 #endif +#ifdef CONFIG_HUGETLBFS + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + mtspr SPRN_MD_TWC, r11 +#else + lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ + mtspr SPRN_MI_TWC, r10 /* Set segment attributes */ + mtspr SPRN_MD_TWC, r10 +#endif + mfspr r10, SPRN_MD_TWC + lwz r10, 0(r10) /* Get the pte */ #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 @@ -263,7 +266,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 #endif rfi From b250c8c08c79d1eb5354c7eaa84b7505f5f2d921 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:09 +0000 Subject: [PATCH 24/43] powerpc/8xx: Manage 512k huge pages as standard pages. At the time being, 512k huge pages are handled through hugepd page tables. The PMD entry is flagged as a hugepd pointer and it means that only 512k hugepages can be managed in that 4M block. However, the hugepd table has the same size as a normal page table, and 512k entries can therefore be nested with normal pages. On the 8xx, TLB loading is performed by software and allthough the page tables are organised to match the L1 and L2 level defined by the HW, all TLB entries have both L1 and L2 independent entries. It means that even if two TLB entries are associated with the same PMD entry, they can be loaded with different values in L1 part. The L1 entry contains the page size (PS field): - 00 for 4k and 16 pages - 01 for 512k pages - 11 for 8M pages By adding a flag for hugepages in the PTE (_PAGE_HUGE) and copying it into the lower bit of PS, we can then manage 512k pages with normal page tables: - PMD entry has PS=11 for 8M pages - PMD entry has PS=00 for other pages. As a PMD entry covers 4M areas, a PMD will either point to a hugepd table having a single entry to an 8M page, or the PMD will point to a standard page table which will have either entries to 4k or 16k or 512k pages. For 512k pages, as the L1 entry will not know it is a 512k page before the PTE is read, there will be 128 entries in the PTE as if it was 4k pages. But when loading the TLB, it will be flagged as a 512k page. Note that we can't use pmd_ptr() in asm/nohash/32/pgtable.h because it is not defined yet. In ITLB miss, we keep the possibility to opt it out as when kernel text is pinned and no user hugepages are used, we can save several instruction by not using r11. In DTLB miss, that's just one instruction so it's not worth bothering with it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/002819e8e166bf81d24b24782d98de7c40905d8f.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++--- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 4 +++- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- arch/powerpc/kernel/head_8xx.S | 12 +++++------ arch/powerpc/mm/hugetlbpage.c | 22 +++++++++++++++++--- arch/powerpc/mm/pgtable.c | 10 ++++++++- 6 files changed, 44 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 9a287a95acad..717f995d21b8 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -229,8 +229,9 @@ static inline void pmd_clear(pmd_t *pmdp) * those implementations. * * On the 8xx, the page tables are a bit special. For 16k pages, we have - * 4 identical entries. For other page sizes, we have a single entry in the - * table. + * 4 identical entries. For 512k pages, we have 128 entries as if it was + * 4k pages, but they are flagged as 512k pages for the hardware. + * For other page sizes, we have a single entry in the table. */ #ifdef CONFIG_PPC_8xx static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, @@ -240,13 +241,16 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; int num, i; + pmd_t *pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); if (!huge) num = PAGE_SIZE / SZ_4K; + else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) != _PMD_PAGE_8M) + num = SZ_512K / SZ_4K; else num = 1; - for (i = 0; i < num; i++, entry++) + for (i = 0; i < num; i++, entry++, new += SZ_4K) *entry = new; return old; diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index c9e4b2d90f65..66f403a7da44 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -46,6 +46,8 @@ #define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ +#define _PAGE_HUGE 0x0800 /* Copied to L1 PS bit 29 */ + /* cache related flags non existing on 8xx */ #define _PAGE_COHERENT 0 #define _PAGE_WRITETHRU 0 @@ -128,7 +130,7 @@ static inline pte_t pte_mkuser(pte_t pte) static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPS); + return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE); } #define pte_mkhuge pte_mkhuge diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 7fed9dc0f147..f27c967d9269 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -267,7 +267,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, static inline int hugepd_ok(hugepd_t hpd) { #ifdef CONFIG_PPC_8xx - return ((hpd_val(hpd) & 0x4) != 0); + return ((hpd_val(hpd) & _PMD_PAGE_MASK) == _PMD_PAGE_8M); #else /* We clear the top bit to indicate hugepd */ return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index adad8baadcf5..423465b10c82 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -239,7 +239,6 @@ InstructionTLBMiss: #endif #ifdef CONFIG_HUGETLBFS lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ - mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ mtspr SPRN_MD_TWC, r11 #else lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ @@ -248,6 +247,10 @@ InstructionTLBMiss: #endif mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ +#ifdef CONFIG_HUGETLBFS + rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K + mtspr SPRN_MI_TWC, r11 +#endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 @@ -353,6 +356,7 @@ DataStoreTLBMiss: * above. */ rlwimi r11, r10, 0, _PAGE_GUARDED + rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MD_TWC, r11 /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. @@ -584,7 +588,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r11, SPRN_MD_TWC lwz r11, 0(r11) /* Get the pte */ bt 28,200f /* bit 28 = Large page (8M) */ - bt 29,202f /* bit 29 = Large page (8M or 512K) */ /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 201: lwz r11,0(r11) @@ -611,11 +614,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 b 201b -202: - /* concat physical page address(r11) and page offset(r10) */ - rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31 - b 201b - 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 521929a371af..38bad839e608 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -189,6 +189,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz if (!hpdp) return NULL; + if (IS_ENABLED(CONFIG_PPC_8xx) && sz == SZ_512K) + return pte_alloc_map(mm, (pmd_t *)hpdp, addr); + BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, @@ -331,13 +334,20 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); - else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); } +static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) +{ + pgtable_t token = pmd_pgtable(*pmd); + + pmd_clear(pmd); + pte_free_tlb(tlb, token, addr); + mm_dec_nr_ptes(tlb->mm); +} + static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) @@ -353,11 +363,17 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, pmd = pmd_offset(pud, addr); next = pmd_addr_end(addr, end); if (!is_hugepd(__hugepd(pmd_val(*pmd)))) { + if (pmd_none_or_clear_bad(pmd)) + continue; + /* * if it is not hugepd pointer, we should already find * it cleared. */ - WARN_ON(!pmd_none_or_clear_bad(pmd)); + WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx)); + + hugetlb_free_pte_range(tlb, pmd, addr); + continue; } /* diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 214a5f4beb6c..60c4b8ff046c 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -264,6 +264,12 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, #if defined(CONFIG_PPC_8xx) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { + pmd_t *pmd = pmd_ptr(mm, addr); + pte_basic_t val; + pte_basic_t *entry = &ptep->pte; + int num = is_hugepd(*((hugepd_t *)pmd)) ? 1 : SZ_512K / SZ_4K; + int i; + /* * Make sure hardware valid bit is not set. We don't do * tlb flush for this update. @@ -274,7 +280,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_ pte = set_pte_filter(pte); - ptep->pte = pte_val(pte); + val = pte_val(pte); + for (i = 0; i < num; i++, entry++, val += SZ_4K) + *entry = val; } #endif #endif /* CONFIG_HUGETLB_PAGE */ From d4870b89acd7c362ded08f9295e8d143cf7e0024 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:10 +0000 Subject: [PATCH 25/43] powerpc/8xx: Only 8M pages are hugepte pages now 512k pages are now standard pages, so only 8M pages are hugepte. No more handling of normal page tables through hugepd allocation and freeing, and hugepte helpers can also be simplified. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2c6135d57fb76eebf70673fbac3dc9e740767879.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 7 +++---- arch/powerpc/mm/hugetlbpage.c | 16 +++------------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index 785437323576..1c7d4693a78e 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -13,13 +13,13 @@ static inline pte_t *hugepd_page(hugepd_t hpd) static inline unsigned int hugepd_shift(hugepd_t hpd) { - return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; + return PAGE_SHIFT_8M; } static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned int pdshift) { - unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; + unsigned long idx = (addr & (SZ_4M - 1)) >> PAGE_SHIFT; return hugepd_page(hpd) + idx; } @@ -32,8 +32,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) { - *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | - (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K)); + *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | _PMD_PAGE_8M); } static inline int check_and_get_huge_psize(int shift) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 38bad839e608..cfacd364c7aa 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -54,24 +54,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); - new = NULL; - } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = NULL; - num_hugepd = 1; - new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; - new = NULL; } - if (!cachep && !new) { + if (!cachep) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - if (cachep) - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -102,10 +95,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - if (cachep) - kmem_cache_free(cachep, new); - else - pte_free(mm, new); + kmem_cache_free(cachep, new); } else { kmemleak_ignore(new); } From 555904d07eef3a2e5fc458419edf6174362c4ddd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:11 +0000 Subject: [PATCH 26/43] powerpc/8xx: MM_SLICE is not needed anymore As the 8xx now manages 512k pages in standard page tables, it doesn't need CONFIG_PPC_MM_SLICES anymore. Don't select it anymore and remove all related code. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/98e8ccd424476ea73cced2b89ba38eb2ed8144fb.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 64 -------------------- arch/powerpc/include/asm/nohash/32/slice.h | 20 ------ arch/powerpc/include/asm/slice.h | 2 - arch/powerpc/platforms/Kconfig.cputype | 1 - 4 files changed, 87 deletions(-) delete mode 100644 arch/powerpc/include/asm/nohash/32/slice.h diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 26b7cee34dfe..a092e6434bda 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -176,12 +176,6 @@ */ #define SPRN_M_TW 799 -#ifdef CONFIG_PPC_MM_SLICES -#include -#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) -#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE -#endif - #if defined(CONFIG_PPC_4K_PAGES) #define mmu_virtual_psize MMU_PAGE_4K #elif defined(CONFIG_PPC_16K_PAGES) @@ -199,71 +193,13 @@ #include -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, 0); -}; - typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; -#ifdef CONFIG_PPC_MM_SLICES - u16 user_psize; /* page size index */ - unsigned char low_slices_psize[SLICE_ARRAY_SIZE]; - unsigned char high_slices_psize[0]; - unsigned long slb_addr_limit; - struct slice_mask mask_base_psize; /* 4k or 16k */ - struct slice_mask mask_512k; - struct slice_mask mask_8m; -#endif void *pte_frag; } mm_context_t; -#ifdef CONFIG_PPC_MM_SLICES -static inline u16 mm_ctx_user_psize(mm_context_t *ctx) -{ - return ctx->user_psize; -} - -static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) -{ - ctx->user_psize = user_psize; -} - -static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) -{ - return ctx->low_slices_psize; -} - -static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) -{ - return ctx->high_slices_psize; -} - -static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) -{ - return ctx->slb_addr_limit; -} - -static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) -{ - ctx->slb_addr_limit = limit; -} - -static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) -{ - if (psize == MMU_PAGE_512K) - return &ctx->mask_512k; - if (psize == MMU_PAGE_8M) - return &ctx->mask_8m; - - BUG_ON(psize != mmu_virtual_psize); - - return &ctx->mask_base_psize; -} -#endif /* CONFIG_PPC_MM_SLICE */ - #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h deleted file mode 100644 index 39eb0154ae2d..000000000000 --- a/arch/powerpc/include/asm/nohash/32/slice.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H -#define _ASM_POWERPC_NOHASH_32_SLICE_H - -#ifdef CONFIG_PPC_MM_SLICES - -#define SLICE_LOW_SHIFT 26 /* 64 slices */ -#define SLICE_LOW_TOP (0x100000000ull) -#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) -#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) - -#define SLICE_HIGH_SHIFT 0 -#define SLICE_NUM_HIGH 0ul -#define GET_HIGH_SLICE_INDEX(addr) (addr & 0) - -#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW - -#endif /* CONFIG_PPC_MM_SLICES */ - -#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index c6f466f4c241..0bdd9c62eca0 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -4,8 +4,6 @@ #ifdef CONFIG_PPC_BOOK3S_64 #include -#elif defined(CONFIG_PPC_MMU_NOHASH_32) -#include #endif #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 0c3c1902135c..b0587b833517 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -55,7 +55,6 @@ config PPC_8xx select SYS_SUPPORTS_HUGETLBFS select PPC_HAVE_KUEP select PPC_HAVE_KUAP - select PPC_MM_SLICES if HUGETLB_PAGE select HAVE_ARCH_VMAP_STACK config 40x From 5d4656696c30cef56b2ab506b203533c818af04d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:12 +0000 Subject: [PATCH 27/43] powerpc/8xx: Move PPC_PIN_TLB options into 8xx Kconfig PPC_PIN_TLB options are dedicated to the 8xx, move them into the 8xx Kconfig. While we are at it, add some text to explain what it does. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1ece39fac6312e1d14e6a67b3f9d9f9f91990a7b.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 20 --------------- arch/powerpc/platforms/8xx/Kconfig | 41 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 30e2111ca15d..1d4ef4f27dec 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1227,26 +1227,6 @@ config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx default "0xc0000000" - -config PIN_TLB - bool "Pinned Kernel TLBs (860 ONLY)" - depends on ADVANCED_OPTIONS && PPC_8xx && \ - !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX - -config PIN_TLB_DATA - bool "Pinned TLB for DATA" - depends on PIN_TLB - default y - -config PIN_TLB_IMMR - bool "Pinned TLB for IMMR" - depends on PIN_TLB || PPC_EARLY_DEBUG_CPM - default y - -config PIN_TLB_TEXT - bool "Pinned TLB for TEXT" - depends on PIN_TLB - default y endmenu if PPC64 diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index b37de62d7e7f..0d036cd868ef 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -162,4 +162,45 @@ config UCODE_PATCH default y depends on !NO_UCODE_PATCH +menu "8xx advanced setup" + depends on PPC_8xx + +config PIN_TLB + bool "Pinned Kernel TLBs" + depends on ADVANCED_OPTIONS && !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX + help + On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each + table 4 TLBs can be pinned. + + It reduces the amount of usable TLBs to 28 (ie by 12%). That's the + reason why we make it selectable. + + This option does nothing, it just activate the selection of what + to pin. + +config PIN_TLB_DATA + bool "Pinned TLB for DATA" + depends on PIN_TLB + default y + help + This pins the first 32 Mbytes of memory with 8M pages. + +config PIN_TLB_IMMR + bool "Pinned TLB for IMMR" + depends on PIN_TLB || PPC_EARLY_DEBUG_CPM + default y + help + This pins the IMMR area with a 512kbytes page. In case + CONFIG_PIN_TLB_DATA is also selected, it will reduce + CONFIG_PIN_TLB_DATA to 24 Mbytes. + +config PIN_TLB_TEXT + bool "Pinned TLB for TEXT" + depends on PIN_TLB + default y + help + This pins kernel text with 8M pages. + +endmenu + endmenu From f76c8f6d257cefda60221c83af7f97d9f74cb3ce Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:13 +0000 Subject: [PATCH 28/43] powerpc/8xx: Add function to set pinned TLBs Pinned TLBs cannot be modified when the MMU is enabled. Create a function to rewrite the pinned TLB entries with MMU off. To set pinned TLB, we have to turn off MMU, disable pinning, do a TLB flush (Either with tlbie and tlbia) then reprogam the TLB entries, enable pinning and turn on MMU. If using tlbie, it cleared entries in both instruction and data TLB regardless whether pinning is disabled or not. If using tlbia, it clears all entries of the TLB which has disabled pinning. To make it easy, just clear all entries in both TLBs, and reprogram them. The function takes two arguments, the top of the memory to consider and whether data is RO under _sinittext. When DEBUG_PAGEALLOC is set, the top is the end of kernel rodata. Otherwise, that's the top of physical RAM. Everything below _sinittext is set RX, over _sinittext that's RW. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c17806014bb1c06513ad1e1d510faea31984b177.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 2 + arch/powerpc/kernel/head_8xx.S | 103 +++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index a092e6434bda..4d3ef3841b00 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -193,6 +193,8 @@ #include +void mmu_pin_tlb(unsigned long top, bool readonly); + typedef struct { unsigned int id; unsigned int active; diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 423465b10c82..c9e3d54e6a6f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -866,6 +867,108 @@ initial_mmu: mtspr SPRN_DER, r8 blr +#ifdef CONFIG_PIN_TLB +_GLOBAL(mmu_pin_tlb) + lis r9, (1f - PAGE_OFFSET)@h + ori r9, r9, (1f - PAGE_OFFSET)@l + mfmsr r10 + mflr r11 + li r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI) + rlwinm r0, r10, 0, ~MSR_RI + rlwinm r0, r0, 0, ~MSR_EE + mtmsr r0 + isync + .align 4 + mtspr SPRN_SRR0, r9 + mtspr SPRN_SRR1, r12 + rfi +1: + li r5, 0 + lis r6, MD_TWAM@h + mtspr SPRN_MI_CTR, r5 + mtspr SPRN_MD_CTR, r6 + tlbia + +#ifdef CONFIG_PIN_TLB_TEXT + LOAD_REG_IMMEDIATE(r5, 28 << 8) + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) + LOAD_REG_ADDR(r9, _sinittext) + li r0, 4 + mtctr r0 + +2: ori r0, r6, MI_EVALID + mtspr SPRN_MI_CTR, r5 + mtspr SPRN_MI_EPN, r0 + mtspr SPRN_MI_TWC, r7 + mtspr SPRN_MI_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r9 + bdnzt lt, 2b + lis r0, MI_RSV4I@h + mtspr SPRN_MI_CTR, r0 +#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) +#ifdef CONFIG_PIN_TLB_DATA + LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) +#ifdef CONFIG_PIN_TLB_IMMR + li r0, 3 +#else + li r0, 4 +#endif + mtctr r0 + cmpwi r4, 0 + beq 4f + LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) + LOAD_REG_ADDR(r9, _sinittext) + +2: ori r0, r6, MD_EVALID + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r9 + bdnzt lt, 2b + +4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +2: ori r0, r6, MD_EVALID + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 + addi r5, r5, 0x100 + addis r6, r6, SZ_8M@h + addis r8, r8, SZ_8M@h + cmplw r6, r3 + bdnzt lt, 2b +#endif +#ifdef CONFIG_PIN_TLB_IMMR + LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) + LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED) + mfspr r8, SPRN_IMMR + rlwinm r8, r8, 0, 0xfff80000 + ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ + _PAGE_NO_CACHE | _PAGE_PRESENT + mtspr SPRN_MD_CTR, r5 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r7 + mtspr SPRN_MD_RPN, r8 +#endif +#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA) + lis r0, (MD_RSV4I | MD_TWAM)@h + mtspr SPRN_MI_CTR, r0 +#endif + mtspr SPRN_SRR1, r10 + mtspr SPRN_SRR0, r11 + rfi +#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. From 136a9a0f74d2e0d9de5515190fe80344b86b45cf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:14 +0000 Subject: [PATCH 29/43] powerpc/8xx: Don't set IMMR map anymore at boot Only early debug requires IMMR to be mapped early. No need to set it up and pin it in assembly. Map it through page tables at udbg init when necessary. If CONFIG_PIN_TLB_IMMR is selected, pin it once we don't need the 32 Mb pinned RAM anymore. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/13c1e8539fdf363d3146f4884e5c3c76c6c308b5.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 39 +++++++++++++----------------- arch/powerpc/mm/mmu_decl.h | 4 +++ arch/powerpc/mm/nohash/8xx.c | 15 +++++++++--- arch/powerpc/platforms/8xx/Kconfig | 2 +- arch/powerpc/sysdev/cpm_common.c | 2 ++ 5 files changed, 35 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c9e3d54e6a6f..d607f4b53e0f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -749,6 +749,23 @@ start_here: rfi /* Load up the kernel context */ 2: +#ifdef CONFIG_PIN_TLB_IMMR + lis r0, MD_TWAM@h + oris r0, r0, 0x1f00 + mtspr SPRN_MD_CTR, r0 + LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) + tlbie r0 + mtspr SPRN_MD_EPN, r0 + LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED) + mtspr SPRN_MD_TWC, r0 + mfspr r0, SPRN_IMMR + rlwinm r0, r0, 0, 0xfff80000 + ori r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ + _PAGE_NO_CACHE | _PAGE_PRESENT + mtspr SPRN_MD_RPN, r0 + lis r0, (MD_TWAM | MD_RSV4I)@h + mtspr SPRN_MD_CTR, r0 +#endif tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ @@ -797,28 +814,6 @@ initial_mmu: ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 - /* Map a 512k page for the IMMR to get the processor - * internal registers (among other things). - */ -#ifdef CONFIG_PIN_TLB_IMMR - oris r10, r10, MD_RSV4I@h - ori r10, r10, 0x1c00 - mtspr SPRN_MD_CTR, r10 - - mfspr r9, 638 /* Get current IMMR */ - andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ - - lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */ - ori r8, r8, MD_EVALID /* Mark it valid */ - mtspr SPRN_MD_EPN, r8 - li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ - ori r8, r8, MD_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r8 - mr r8, r9 /* Create paddr for TLB */ - ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ - mtspr SPRN_MD_RPN, r8 -#endif - /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */ #ifdef CONFIG_PIN_TLB_TEXT lis r8, MI_RSV4I@h diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 7097e07a209a..1b6d39e9baed 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -182,6 +182,10 @@ static inline void mmu_mark_initmem_nx(void) { } static inline void mmu_mark_rodata_ro(void) { } #endif +#ifdef CONFIG_PPC_8xx +void __init mmu_mapin_immr(void); +#endif + #ifdef CONFIG_PPC_DEBUG_WX void ptdump_check_wx(void); #else diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index b27017109a36..44cbde7612cb 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -66,7 +66,7 @@ void __init MMU_init_hw(void) if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; - int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28; + int i = 28; unsigned long addr = 0; unsigned long mem = total_lowmem; @@ -81,12 +81,19 @@ void __init MMU_init_hw(void) } } -static void __init mmu_mapin_immr(void) +static bool immr_is_mapped __initdata; + +void __init mmu_mapin_immr(void) { unsigned long p = PHYS_IMMR_BASE; unsigned long v = VIRT_IMMR_BASE; int offset; + if (immr_is_mapped) + return; + + immr_is_mapped = true; + for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); } @@ -122,9 +129,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long mapped; + mmu_mapin_immr(); + if (__map_without_ltlbs) { mapped = 0; - mmu_mapin_immr(); if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) patch_instruction_site(&patch__dtlbmiss_immr_jmp, ppc_inst(PPC_INST_NOP)); if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) @@ -143,7 +151,6 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) */ mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X); mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL); - mmu_mapin_immr(); } mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 0d036cd868ef..04ea1a8a0bdc 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -187,7 +187,7 @@ config PIN_TLB_DATA config PIN_TLB_IMMR bool "Pinned TLB for IMMR" - depends on PIN_TLB || PPC_EARLY_DEBUG_CPM + depends on PIN_TLB default y help This pins the IMMR area with a 512kbytes page. In case diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 71660bacb264..7dc1960f8bdb 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -68,6 +68,8 @@ static void udbg_putc_cpm(char c) void __init udbg_init_cpm(void) { #ifdef CONFIG_PPC_8xx + mmu_mapin_immr(); + cpm_udbg_txdesc = (u32 __iomem __force *) (CONFIG_PPC_EARLY_DEBUG_CPM_ADDR - PHYS_IMMR_BASE + VIRT_IMMR_BASE); From 684c1664e0de63398aceb748343541b48d398710 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:15 +0000 Subject: [PATCH 30/43] powerpc/8xx: Always pin TLBs at startup. At startup, map 32 Mbytes of memory through 4 pages of 8M, and PIN them inconditionnaly. They need to be pinned because KASAN is using page tables early and the TLBs might be dynamically replaced otherwise. Remove RSV4I flag after installing mappings unless CONFIG_PIN_TLB_XXXX is selected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b27c5767d18053b59f7eefddc189fcc3acf7b9c2.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 31 +++++++++++++++++-------------- arch/powerpc/mm/nohash/8xx.c | 19 +------------------ 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d607f4b53e0f..b0cceee6405c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -765,6 +765,14 @@ start_here: mtspr SPRN_MD_RPN, r0 lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 +#endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif +#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) + lis r0, MD_TWAM@h + mtspr SPRN_MD_CTR, r0 #endif tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ @@ -802,10 +810,6 @@ initial_mmu: mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ tlbia /* Invalidate all TLB entries */ -#ifdef CONFIG_PIN_TLB_DATA - oris r10, r10, MD_RSV4I@h - mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ -#endif lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l @@ -814,33 +818,32 @@ initial_mmu: ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 - /* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */ -#ifdef CONFIG_PIN_TLB_TEXT + /* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */ lis r8, MI_RSV4I@h ori r8, r8, 0x1c00 -#endif + oris r12, r10, MD_RSV4I@h + ori r12, r12, 0x1c00 li r9, 4 /* up to 4 pages of 8M */ mtctr r9 lis r9, KERNELBASE@h /* Create vaddr for TLB */ li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */ li r11, MI_BOOTINIT /* Create RPN for address 0 */ - lis r12, _einittext@h - ori r12, r12, _einittext@l 1: -#ifdef CONFIG_PIN_TLB_TEXT mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ addi r8, r8, 0x100 -#endif - ori r0, r9, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r0 mtspr SPRN_MI_TWC, r10 mtspr SPRN_MI_RPN, r11 /* Store TLB entry */ + mtspr SPRN_MD_CTR, r12 + addi r12, r12, 0x100 + mtspr SPRN_MD_EPN, r0 + mtspr SPRN_MD_TWC, r10 + mtspr SPRN_MD_RPN, r11 addis r9, r9, 0x80 addis r11, r11, 0x80 - cmpl cr0, r9, r12 - bdnzf gt, 1b + bdnz 1b /* Since the cache is enabled according to the information we * just loaded into the TLB, invalidate and enable the caches here. diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 44cbde7612cb..96e7a58ca5ee 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -62,23 +62,6 @@ unsigned long p_block_mapped(phys_addr_t pa) */ void __init MMU_init_hw(void) { - /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ - if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { - unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; - int i = 28; - unsigned long addr = 0; - unsigned long mem = total_lowmem; - - for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { - mtspr(SPRN_MD_CTR, ctr | (i << 8)); - mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); - mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); - addr += LARGE_PAGE_SIZE_8M; - mem -= LARGE_PAGE_SIZE_8M; - } - } } static bool immr_is_mapped __initdata; @@ -223,7 +206,7 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, BUG_ON(first_memblock_base != 0); /* 8xx can only access 32MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000)); + memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } /* From 400dc0f86102d2ad11d3601f1948fbb02e926431 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:16 +0000 Subject: [PATCH 31/43] powerpc/8xx: Drop special handling of Linear and IMMR mappings in I/D TLB handlers Up to now, linear and IMMR mappings are managed via huge TLB entries through specific code directly in TLB miss handlers. This implies some patching of the TLB miss handlers at startup, and a lot of dedicated code. Remove all this specific dedicated code. For now we are back to normal handling via standard 4k pages. In the next patches, linear memory mapping and IMMR mapping will be managed through huge pages. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/221b7e3ead80a5969629938c023f8cfe45fdd2fb.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 29 +--------- arch/powerpc/mm/nohash/8xx.c | 103 +-------------------------------- 2 files changed, 3 insertions(+), 129 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b0cceee6405c..d1546f379757 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -207,31 +207,21 @@ InstructionTLBMiss: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10) mtspr SPRN_MD_EPN, r10 - /* Only modules will cause ITLB Misses as we always - * pin the first 8MB of kernel memory */ #ifdef ITLB_MISS_KERNEL mfcr r11 -#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) +#if defined(SIMPLE_KERNEL_ADDRESS) cmpi cr0, r10, 0 /* Address >= 0x80000000 */ #else rlwinm r10, r10, 16, 0xfff8 cmpli cr0, r10, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB_TEXT - /* It is assumed that kernel code fits into the first 32M */ -0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h - patch_site 0b, patch__itlbmiss_linmem_top -#endif #endif #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ #ifdef ITLB_MISS_KERNEL -#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) +#if defined(SIMPLE_KERNEL_ADDRESS) bge+ 3f #else blt+ 3f -#endif -#ifndef CONFIG_PIN_TLB_TEXT - blt cr7, ITLBMissLinear #endif rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -327,19 +317,9 @@ DataStoreTLBMiss: mfspr r10, SPRN_MD_EPN rlwinm r10, r10, 16, 0xfff8 cmpli cr0, r10, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB_IMMR - cmpli cr6, r10, VIRT_IMMR_BASE@h -#endif -0: cmpli cr7, r10, (PAGE_OFFSET + 0x2000000)@h - patch_site 0b, patch__dtlbmiss_linmem_top mfspr r10, SPRN_M_TWB /* Get level 1 table */ blt+ 3f -#ifndef CONFIG_PIN_TLB_IMMR -0: beq- cr6, DTLBMissIMMR - patch_site 0b, patch__dtlbmiss_immr_jmp -#endif - blt cr7, DTLBMissLinear rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: @@ -571,14 +551,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ cmpli cr1, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f - rlwinm r11, r10, 16, 0xfff8 - -0: cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h - patch_site 0b, patch__fixupdar_linmem_top /* create physical page address from effective address */ tophys(r11, r10) - blt- cr7, 201f mfspr r11, SPRN_M_TWB /* Get level 1 table */ rlwinm r11, r11, 0, 20, 31 oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 96e7a58ca5ee..b735482e1529 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -55,8 +55,6 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -#define LARGE_PAGE_SIZE_8M (1<<23) - /* * MMU_init_hw does the chip-specific initialization of the MMU hardware. */ @@ -81,119 +79,20 @@ void __init mmu_mapin_immr(void) map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); } -static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped) -{ - modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); -} - -static void mmu_patch_addis(s32 *site, long simm) -{ - unsigned int instr = *(unsigned int *)patch_site_addr(site); - - instr &= 0xffff0000; - instr |= ((unsigned long)simm) >> 16; - patch_instruction_site(site, ppc_inst(instr)); -} - -static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot) -{ - unsigned long s = offset; - unsigned long v = PAGE_OFFSET + s; - phys_addr_t p = memstart_addr + s; - - for (; s < top; s += PAGE_SIZE) { - map_kernel_page(v, p, prot); - v += PAGE_SIZE; - p += PAGE_SIZE; - } -} - unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { - unsigned long mapped; - mmu_mapin_immr(); - if (__map_without_ltlbs) { - mapped = 0; - if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) - patch_instruction_site(&patch__dtlbmiss_immr_jmp, ppc_inst(PPC_INST_NOP)); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); - } else { - unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - - mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8); - - /* - * Populate page tables to: - * - have them appear in /sys/kernel/debug/kernel_page_tables - * - allow the BDI to find the pages when they are not PINNED - */ - mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X); - mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL); - } - - mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); - mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped); - - /* If the size of RAM is not an exact power of two, we may not - * have covered RAM in its entirety with 8 MiB - * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" - * coverage with normal-sized pages (or other reasons) do not - * attempt to allocate outside the allowed range. - */ - if (mapped) - memblock_set_current_limit(mapped); - - block_mapped_ram = mapped; - - return mapped; + return 0; } void mmu_mark_initmem_nx(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) - mmu_patch_addis(&patch__itlbmiss_linmem_top8, - -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); - if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) { - unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); - unsigned long etext = __pa(_etext); - - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); - - /* Update page tables for PTDUMP and BDI */ - mmu_mapin_ram_chunk(0, einittext8, __pgprot(0)); - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { - mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT); - mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL); - } else { - mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT); - mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL); - } - } } #ifdef CONFIG_STRICT_KERNEL_RWX void mmu_mark_rodata_ro(void) { - unsigned long sinittext = __pa(_sinittext); - unsigned long etext = __pa(_etext); - - if (CONFIG_DATA_SHIFT < 23) - mmu_patch_addis(&patch__dtlbmiss_romem_top8, - -__pa(((unsigned long)_sinittext) & - ~(LARGE_PAGE_SIZE_8M - 1))); - mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); - - /* Update page tables for PTDUMP and BDI */ - mmu_mapin_ram_chunk(0, sinittext, __pgprot(0)); - mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX); - mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO); } #endif From 1251288e64ba44969e1c4d59e5ee88a6e873447b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:17 +0000 Subject: [PATCH 32/43] powerpc/8xx: Remove now unused TLB miss functions The code to setup linear and IMMR mapping via huge TLB entries is not called anymore. Remove it. Also remove the handling of removed code exits in the perf driver. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/75750d25849cb8e73ca519866bb892d7eb9649c0.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/mmu-8xx.h | 8 +- arch/powerpc/kernel/head_8xx.S | 83 -------------------- arch/powerpc/perf/8xx-pmu.c | 10 --- 3 files changed, 1 insertion(+), 100 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 4d3ef3841b00..e82368838416 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -240,13 +240,7 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) } /* patch sites */ -extern s32 patch__itlbmiss_linmem_top, patch__itlbmiss_linmem_top8; -extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp; -extern s32 patch__fixupdar_linmem_top; -extern s32 patch__dtlbmiss_romem_top, patch__dtlbmiss_romem_top8; - -extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2; -extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3; +extern s32 patch__itlbmiss_exit_1, patch__dtlbmiss_exit_1; extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d1546f379757..fb5d17187772 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -278,33 +278,6 @@ InstructionTLBMiss: rfi #endif -#ifndef CONFIG_PIN_TLB_TEXT -ITLBMissLinear: - mtcr r11 -#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23 - patch_site 0f, patch__itlbmiss_linmem_top8 - - mfspr r10, SPRN_SRR0 -0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha - rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K - ori r11, r11, MI_PS512K | MI_SVALID - rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ -#else - /* Set 8M byte page and mark it valid */ - li r11, MI_PS8MEG | MI_SVALID - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ -#endif - mtspr SPRN_MI_TWC, r11 - ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT - mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 - rfi - patch_site 0b, patch__itlbmiss_exit_2 -#endif - . = 0x1200 DataStoreTLBMiss: mtspr SPRN_DAR, r10 @@ -371,62 +344,6 @@ DataStoreTLBMiss: rfi patch_site 0b, patch__dtlbmiss_exit_1 -DTLBMissIMMR: - mtcr r11 - /* Set 512k byte guarded page and mark it valid */ - li r10, MD_PS512K | MD_GUARDED | MD_SVALID - mtspr SPRN_MD_TWC, r10 - mfspr r10, SPRN_IMMR /* Get current IMMR */ - rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT | _PAGE_NO_CACHE - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - li r11, RPN_PATTERN - -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi - patch_site 0b, patch__dtlbmiss_exit_2 - -DTLBMissLinear: - mtcr r11 - rlwinm r10, r10, 20, 0x0f800000 /* 8xx supports max 256Mb RAM */ -#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23 - patch_site 0f, patch__dtlbmiss_romem_top8 - -0: subis r11, r10, (PAGE_OFFSET - 0x80000000)@ha - rlwinm r11, r11, 0, 0xff800000 - neg r10, r11 - or r11, r11, r10 - rlwinm r11, r11, 4, MI_PS8MEG ^ MI_PS512K - ori r11, r11, MI_PS512K | MI_SVALID - mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 0, 0x0ff80000 /* 8xx supports max 256Mb RAM */ -#else - /* Set 8M byte page and mark it valid */ - li r11, MD_PS8MEG | MD_SVALID -#endif - mtspr SPRN_MD_TWC, r11 -#ifdef CONFIG_STRICT_KERNEL_RWX - patch_site 0f, patch__dtlbmiss_romem_top - -0: subis r11, r10, 0 - rlwimi r10, r11, 11, _PAGE_RO -#endif - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ - _PAGE_PRESENT - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - li r11, RPN_PATTERN - -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi - patch_site 0b, patch__dtlbmiss_exit_3 - /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction * addresses. There is nothing to do but handle a big time error fault. diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index acc27fc63eb7..e53c3c161257 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -100,9 +100,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) unsigned long target = patch_site_addr(&patch__itlbmiss_perf); patch_branch_site(&patch__itlbmiss_exit_1, target, 0); -#ifndef CONFIG_PIN_TLB_TEXT - patch_branch_site(&patch__itlbmiss_exit_2, target, 0); -#endif } val = itlb_miss_counter; break; @@ -111,8 +108,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags) unsigned long target = patch_site_addr(&patch__dtlbmiss_perf); patch_branch_site(&patch__dtlbmiss_exit_1, target, 0); - patch_branch_site(&patch__dtlbmiss_exit_2, target, 0); - patch_branch_site(&patch__dtlbmiss_exit_3, target, 0); } val = dtlb_miss_counter; break; @@ -175,9 +170,6 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) __PPC_SPR(SPRN_SPRG_SCRATCH0)); patch_instruction_site(&patch__itlbmiss_exit_1, insn); -#ifndef CONFIG_PIN_TLB_TEXT - patch_instruction_site(&patch__itlbmiss_exit_2, insn); -#endif } break; case PERF_8xx_ID_DTLB_LOAD_MISS: @@ -187,8 +179,6 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags) __PPC_SPR(SPRN_DAR)); patch_instruction_site(&patch__dtlbmiss_exit_1, insn); - patch_instruction_site(&patch__dtlbmiss_exit_2, insn); - patch_instruction_site(&patch__dtlbmiss_exit_3, insn); } break; } From 0c8c2c9c201b44eed6c10d7c5c8d25fe5aab87ce Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:18 +0000 Subject: [PATCH 33/43] powerpc/8xx: Move DTLB perf handling closer. Now that space have been freed next to the DTLB miss handler, it's associated DTLB perf handling can be brought back in the same place. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/97f48cc1a2ea6b895bfac0752cbe59deaf2eecda.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fb5d17187772..9f3f7f3d03a7 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -344,6 +344,17 @@ DataStoreTLBMiss: rfi patch_site 0b, patch__dtlbmiss_exit_1 +#ifdef CONFIG_PERF_EVENTS + patch_site 0f, patch__dtlbmiss_perf +0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + addi r10, r10, 1 + stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) + mfspr r10, SPRN_DAR + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_M_TW + rfi +#endif + /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction * addresses. There is nothing to do but handle a big time error fault. @@ -390,18 +401,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) -/* Called from DataStoreTLBMiss when perf TLB misses events are activated */ -#ifdef CONFIG_PERF_EVENTS - patch_site 0f, patch__dtlbmiss_perf -0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - addi r10, r10, 1 - stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_M_TW - rfi -#endif - stack_overflow: vmap_stack_overflow_exception From a0591b60eef965f7f5255ad4696bbba9af4b43d0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:19 +0000 Subject: [PATCH 34/43] powerpc/mm: Don't be too strict with _etext alignment on PPC32 Similar to PPC64, accept to map RO data as ROX as a trade off between between security and memory usage. Having RO data executable is not a high risk as RO data can't be modified to forge an exploit. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/8c4a0d89d944eed984dd941e509614031a5ace2b.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 26 -------------------------- arch/powerpc/kernel/vmlinux.lds.S | 3 +-- 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1d4ef4f27dec..d147d379b1b9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -778,32 +778,6 @@ config THREAD_SHIFT Used to define the stack size. The default is almost always what you want. Only change this if you know what you are doing. -config ETEXT_SHIFT_BOOL - bool "Set custom etext alignment" if STRICT_KERNEL_RWX && \ - (PPC_BOOK3S_32 || PPC_8xx) - depends on ADVANCED_OPTIONS - help - This option allows you to set the kernel end of text alignment. When - RAM is mapped by blocks, the alignment needs to fit the size and - number of possible blocks. The default should be OK for most configs. - - Say N here unless you know what you are doing. - -config ETEXT_SHIFT - int "_etext shift" if ETEXT_SHIFT_BOOL - range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - range 19 23 if STRICT_KERNEL_RWX && PPC_8xx - default 17 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - default 19 if STRICT_KERNEL_RWX && PPC_8xx - default PPC_PAGE_SHIFT - help - On Book3S 32 (603+), IBATs are used to map kernel text. - Smaller is the alignment, greater is the number of necessary IBATs. - - On 8xx, large pages (512kb or 8M) are used to map kernel linear - memory. Aligning to 8M reduces TLB misses as only 8M pages are used - in that case. - config DATA_SHIFT_BOOL bool "Set custom data alignment" if STRICT_KERNEL_RWX && \ (PPC_BOOK3S_32 || PPC_8xx) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 31a0f201fb6f..54f23205c2b9 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -15,7 +15,6 @@ #include #define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT) -#define ETEXT_ALIGN_SIZE (1 << CONFIG_ETEXT_SHIFT) ENTRY(_stext) @@ -116,7 +115,7 @@ SECTIONS } :text - . = ALIGN(ETEXT_ALIGN_SIZE); + . = ALIGN(PAGE_SIZE); _etext = .; PROVIDE32 (etext = .); From c8bef10a9f17b2b9549e37878b2bcd48039c136b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:20 +0000 Subject: [PATCH 35/43] powerpc/8xx: Refactor kernel address boundary comparison Now that linear and IMMR dedicated TLB handling is gone, kernel boundary address comparison is similar in ITLB miss handler and in DTLB miss handler. Create a macro named compare_to_kernel_boundary. When TASK_SIZE is strictly below 0x80000000 and PAGE_OFFSET is above 0x80000000, it is enough to compare to 0x8000000, and this can be done with a single instruction. Using not. instruction, we get to use 'blt' conditional branch as when doing a regular comparison: 0x00000000 <= addr <= 0x7fffffff ==> 0xffffffff >= NOT(addr) >= 0x80000000 The above test corresponds to a 'blt' Otherwise, do a regular comparison using two instructions. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6312575d06a8813105e6564a3b12e1d373aa1b2f.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9f3f7f3d03a7..9a117b9f0998 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -32,10 +32,15 @@ #include "head_32.h" +.macro compare_to_kernel_boundary scratch, addr #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ -#define SIMPLE_KERNEL_ADDRESS 1 + not. \scratch, \addr +#else + rlwinm \scratch, \addr, 16, 0xfff8 + cmpli cr0, \scratch, PAGE_OFFSET@h #endif +.endm /* * We need an ITLB miss handler for kernel addresses if: @@ -209,20 +214,11 @@ InstructionTLBMiss: mtspr SPRN_MD_EPN, r10 #ifdef ITLB_MISS_KERNEL mfcr r11 -#if defined(SIMPLE_KERNEL_ADDRESS) - cmpi cr0, r10, 0 /* Address >= 0x80000000 */ -#else - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h -#endif + compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ #ifdef ITLB_MISS_KERNEL -#if defined(SIMPLE_KERNEL_ADDRESS) - bge+ 3f -#else blt+ 3f -#endif rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha 3: @@ -288,9 +284,7 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h - + compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ blt+ 3f rlwinm r10, r10, 0, 20, 31 From 34536d78068318def0a370462cbc3319e1ca9014 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:22 +0000 Subject: [PATCH 36/43] powerpc/8xx: Add a function to early map kernel via huge pages Add a function to early map kernel memory using huge pages. For 512k pages, just use standard page table and map in using 512k pages. For 8M pages, create a hugepd table and populate the two PGD entries with it. This function can only be used to create page tables at startup. Once the regular SLAB allocation functions replace memblock functions, this function cannot allocate new pages anymore. However it can still update existing mappings with new protections. hugepd_none() macro is moved into asm/hugetlb.h to be usable outside of mm/hugetlbpage.c early_pte_alloc_kernel() is made visible. _PAGE_HUGE flag is now displayed by ptdump. Signed-off-by: Christophe Leroy [mpe: Change ptdump display to use "huge"] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/68325bcd3b6f93127f7810418a2352c3519066d6.1589866984.git.christophe.leroy@csgroup.eu --- .../include/asm/nohash/32/hugetlb-8xx.h | 5 ++ arch/powerpc/include/asm/pgtable.h | 2 + arch/powerpc/mm/nohash/8xx.c | 52 +++++++++++++++++++ arch/powerpc/mm/pgtable_32.c | 2 +- arch/powerpc/mm/ptdump/8xx.c | 5 ++ arch/powerpc/platforms/Kconfig.cputype | 1 + 6 files changed, 66 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h index 1c7d4693a78e..e752a5807a59 100644 --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -35,6 +35,11 @@ static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshi *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | _PMD_PAGE_8M); } +static inline void hugepd_populate_kernel(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | _PMD_PRESENT | _PMD_PAGE_8M); +} + static inline int check_and_get_huge_psize(int shift) { return shift_to_mmu_psize(shift); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index b1f1d5339735..961895be932a 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -107,6 +107,8 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned int shift); +pte_t *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va); + #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32) void mark_initmem_nx(void); #else diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index b735482e1529..72fb75f2a5f1 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -9,9 +9,11 @@ #include #include +#include #include #include #include +#include #include @@ -55,6 +57,56 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } +static pte_t __init *early_hugepd_alloc_kernel(hugepd_t *pmdp, unsigned long va) +{ + if (hpd_val(*pmdp) == 0) { + pte_t *ptep = memblock_alloc(sizeof(pte_basic_t), SZ_4K); + + if (!ptep) + return NULL; + + hugepd_populate_kernel((hugepd_t *)pmdp, ptep, PAGE_SHIFT_8M); + hugepd_populate_kernel((hugepd_t *)pmdp + 1, ptep, PAGE_SHIFT_8M); + } + return hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT); +} + +static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, + pgprot_t prot, int psize, bool new) +{ + pmd_t *pmdp = pmd_ptr_k(va); + pte_t *ptep; + + if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M)) + return -EINVAL; + + if (new) { + if (WARN_ON(slab_is_available())) + return -EINVAL; + + if (psize == MMU_PAGE_512K) + ptep = early_pte_alloc_kernel(pmdp, va); + else + ptep = early_hugepd_alloc_kernel((hugepd_t *)pmdp, va); + } else { + if (psize == MMU_PAGE_512K) + ptep = pte_offset_kernel(pmdp, va); + else + ptep = hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT); + } + + if (WARN_ON(!ptep)) + return -ENOMEM; + + /* The PTE should never be already present */ + if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot))) + return -EINVAL; + + set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot))); + + return 0; +} + /* * MMU_init_hw does the chip-specific initialization of the MMU hardware. */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index bd0cb6e3573e..05902bbff8d6 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -61,7 +61,7 @@ static void __init *early_alloc_pgtable(unsigned long size) return ptr; } -static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) +pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) { if (pmd_none(*pmdp)) { pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE); diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c index 9e2d8e847d6e..4bc350736c1d 100644 --- a/arch/powerpc/mm/ptdump/8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -11,6 +11,11 @@ static const struct flag_info flag_array[] = { { + .mask = _PAGE_HUGE, + .val = _PAGE_HUGE, + .set = "huge", + .clear = " ", + }, { .mask = _PAGE_SH, .val = 0, .set = "user", diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index b0587b833517..404f26917da7 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -56,6 +56,7 @@ config PPC_8xx select PPC_HAVE_KUEP select PPC_HAVE_KUAP select HAVE_ARCH_VMAP_STACK + select HUGETLBFS config 40x bool "AMCC 40x" From a623bb5861dc442dc8de9edc9b3116f8b7c235c4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:23 +0000 Subject: [PATCH 37/43] powerpc/8xx: Map IMMR with a huge page Map the IMMR area with a single 512k huge page. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9495dba06669da40e133f24607758fa6dcc65f66.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/8xx.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 72fb75f2a5f1..f8fff1fa72e3 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -118,17 +118,13 @@ static bool immr_is_mapped __initdata; void __init mmu_mapin_immr(void) { - unsigned long p = PHYS_IMMR_BASE; - unsigned long v = VIRT_IMMR_BASE; - int offset; - if (immr_is_mapped) return; immr_is_mapped = true; - for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) - map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); + __early_map_kernel_hugepage(VIRT_IMMR_BASE, PHYS_IMMR_BASE, + PAGE_KERNEL_NCG, MMU_PAGE_512K, true); } unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) From cf209951fa7f2e7a8ec92f45f27ea11bc024bbfc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:24 +0000 Subject: [PATCH 38/43] powerpc/8xx: Map linear memory with huge pages Map linear memory space with 512k and 8M pages whenever possible. Three mappings are performed: - One for kernel text - One for RO data - One for the rest Separating the mappings is done to be able to update the protection later when using STRICT_KERNEL_RWX. The ITLB miss handler now need to also handle huge TLBs unless kernel text in pinned. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c44f0ab5510474f25123d904cd1f4e5c6aa3c1ac.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 4 +-- arch/powerpc/mm/nohash/8xx.c | 50 +++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9a117b9f0998..abb71fad7d6a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -224,7 +224,7 @@ InstructionTLBMiss: 3: mtcr r11 #endif -#ifdef CONFIG_HUGETLBFS +#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 #else @@ -234,7 +234,7 @@ InstructionTLBMiss: #endif mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ -#ifdef CONFIG_HUGETLBFS +#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MI_TWC, r11 #endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index f8fff1fa72e3..ec3ef75895d8 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -127,20 +127,68 @@ void __init mmu_mapin_immr(void) PAGE_KERNEL_NCG, MMU_PAGE_512K, true); } +static void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, + pgprot_t prot, bool new) +{ + unsigned long v = PAGE_OFFSET + offset; + unsigned long p = offset; + + WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K)); + + for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); + for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K) + __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + + if (!new) + flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top); +} + unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long sinittext = __pa(_sinittext); + unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + + WARN_ON(top < einittext8); + mmu_mapin_immr(); - return 0; + if (__map_without_ltlbs) + return 0; + + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); + + if (top > SZ_32M) + memblock_set_current_limit(top); + + block_mapped_ram = top; + + return top; } void mmu_mark_initmem_nx(void) { + unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); + unsigned long sinittext = __pa(_sinittext); + unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; + unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + + mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); } #ifdef CONFIG_STRICT_KERNEL_RWX void mmu_mark_rodata_ro(void) { + unsigned long sinittext = __pa(_sinittext); + + mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); } #endif From da1adea07576722da4597b0df7d00931f0203229 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:25 +0000 Subject: [PATCH 39/43] powerpc/8xx: Allow STRICT_KERNEL_RwX with pinned TLB Pinned TLB are 8M. Now that there is no strict boundary anymore between text and RO data, it is possible to use 8M pinned executable TLB that covers both text and RO data. When PIN_TLB_DATA or PIN_TLB_TEXT is selected, enforce 8M RW data alignment and allow STRICT_KERNEL_RWX. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c535fc97bf0dd8693192e25feeed8088701e00c6.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 8 +++++--- arch/powerpc/mm/nohash/8xx.c | 9 +++++++-- arch/powerpc/platforms/8xx/Kconfig | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d147d379b1b9..f5e82629e2cd 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -779,9 +779,10 @@ config THREAD_SHIFT want. Only change this if you know what you are doing. config DATA_SHIFT_BOOL - bool "Set custom data alignment" if STRICT_KERNEL_RWX && \ - (PPC_BOOK3S_32 || PPC_8xx) + bool "Set custom data alignment" depends on ADVANCED_OPTIONS + depends on STRICT_KERNEL_RWX + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !PIN_TLB_TEXT) help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and @@ -803,7 +804,8 @@ config DATA_SHIFT On 8xx, large pages (512kb or 8M) are used to map kernel linear memory. Aligning to 8M reduces TLB misses as only 8M pages are used - in that case. + in that case. If PIN_TLB is selected, it must be aligned to 8M as + 8M pages will be pinned. config FORCE_MAX_ZONEORDER int "Maximum zone order" diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index ec3ef75895d8..d8697f535c3e 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -127,8 +127,8 @@ void __init mmu_mapin_immr(void) PAGE_KERNEL_NCG, MMU_PAGE_512K, true); } -static void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, - pgprot_t prot, bool new) +static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, + pgprot_t prot, bool new) { unsigned long v = PAGE_OFFSET + offset; unsigned long p = offset; @@ -181,6 +181,9 @@ void mmu_mark_initmem_nx(void) mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + + if (IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_pin_tlb(block_mapped_ram, false); } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -189,6 +192,8 @@ void mmu_mark_rodata_ro(void) unsigned long sinittext = __pa(_sinittext); mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); + if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) + mmu_pin_tlb(block_mapped_ram, true); } #endif diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 04ea1a8a0bdc..05669f2fadce 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -167,7 +167,7 @@ menu "8xx advanced setup" config PIN_TLB bool "Pinned Kernel TLBs" - depends on ADVANCED_OPTIONS && !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX + depends on ADVANCED_OPTIONS && !DEBUG_PAGEALLOC help On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each table 4 TLBs can be pinned. From fcdafd10a363cf3278ce29c6c9a92930380c6cd8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:26 +0000 Subject: [PATCH 40/43] powerpc/8xx: Allow large TLBs with DEBUG_PAGEALLOC DEBUG_PAGEALLOC only manages RW data. Text and RO data can still be mapped with hugepages and pinned TLB. In order to map with hugepages, also enforce a 512kB data alignment minimum. That's a trade-off between size of speed, taking into account that DEBUG_PAGEALLOC is a debug option. Anyway the alignment is still tunable. We also allow tuning of alignment for book3s to limit the complexity of the test in Kconfig that will anyway disappear in the following patches once DEBUG_PAGEALLOC is handled together with BATs. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c13256f2d356a316715da61fe089b3623ef217a5.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 11 +++++++---- arch/powerpc/mm/init_32.c | 5 ++++- arch/powerpc/mm/nohash/8xx.c | 11 ++++++++--- arch/powerpc/platforms/8xx/Kconfig | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f5e82629e2cd..fcb0a9ae9872 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -781,8 +781,9 @@ config THREAD_SHIFT config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS - depends on STRICT_KERNEL_RWX - depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !PIN_TLB_TEXT) + depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC + depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && \ + (!PIN_TLB_TEXT || !STRICT_KERNEL_RWX)) help This option allows you to set the kernel data alignment. When RAM is mapped by blocks, the alignment needs to fit the size and @@ -793,10 +794,12 @@ config DATA_SHIFT_BOOL config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 - range 17 28 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - range 19 23 if STRICT_KERNEL_RWX && PPC_8xx + range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32 + range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx + default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA + default 19 if DEBUG_PAGEALLOC && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index a6991ef8727d..8977a7c2543d 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -96,11 +96,14 @@ static void __init MMU_setup(void) if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } + if (IS_ENABLED(CONFIG_PPC_8xx)) + return; + if (debug_pagealloc_enabled()) { __map_without_bats = 1; __map_without_ltlbs = 1; } - if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx)) + if (strict_kernel_rwx_enabled()) __map_without_ltlbs = 1; } diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index d8697f535c3e..286441bbbe49 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -150,7 +150,8 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); unsigned long sinittext = __pa(_sinittext); - unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + unsigned long boundary = strict_boundary ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); WARN_ON(top < einittext8); @@ -161,8 +162,12 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return 0; mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); - mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); + if (debug_pagealloc_enabled()) { + top = boundary; + } else { + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); + mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true); + } if (top > SZ_32M) memblock_set_current_limit(top); diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 05669f2fadce..abb2b45b2789 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -167,7 +167,7 @@ menu "8xx advanced setup" config PIN_TLB bool "Pinned Kernel TLBs" - depends on ADVANCED_OPTIONS && !DEBUG_PAGEALLOC + depends on ADVANCED_OPTIONS help On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each table 4 TLBs can be pinned. From a2feeb2c2ecbd9c9206d66f238ca710b760c9ef5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:27 +0000 Subject: [PATCH 41/43] powerpc/8xx: Implement dedicated kasan_init_region() Implement a kasan_init_region() dedicated to 8xx that allocates KASAN regions using huge pages. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d2d60202a8821dc81cffe6ff59cc13c15b7e4bb6.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/kasan/8xx.c | 74 ++++++++++++++++++++++++++++++++++ arch/powerpc/mm/kasan/Makefile | 1 + 2 files changed, 75 insertions(+) create mode 100644 arch/powerpc/mm/kasan/8xx.c diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c new file mode 100644 index 000000000000..db4ef44af22f --- /dev/null +++ b/arch/powerpc/mm/kasan/8xx.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include +#include + +static int __init +kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block) +{ + pmd_t *pmd = pmd_ptr_k(k_start); + unsigned long k_cur, k_next; + + for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block += SZ_8M) { + pte_basic_t *new; + + k_next = pgd_addr_end(k_cur, k_end); + k_next = pgd_addr_end(k_next, k_end); + if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) + continue; + + new = memblock_alloc(sizeof(pte_basic_t), SZ_4K); + if (!new) + return -ENOMEM; + + *new = pte_val(pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block)), PAGE_KERNEL))); + + hugepd_populate_kernel((hugepd_t *)pmd, (pte_t *)new, PAGE_SHIFT_8M); + hugepd_populate_kernel((hugepd_t *)pmd + 1, (pte_t *)new, PAGE_SHIFT_8M); + } + return 0; +} + +int __init kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur; + int ret; + void *block; + + block = memblock_alloc(k_end - k_start, SZ_8M); + if (!block) + return -ENOMEM; + + if (IS_ALIGNED(k_start, SZ_8M)) { + kasan_init_shadow_8M(k_start, ALIGN_DOWN(k_end, SZ_8M), block); + k_cur = ALIGN_DOWN(k_end, SZ_8M); + if (k_cur == k_end) + goto finish; + } else { + k_cur = k_start; + } + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + for (; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_ptr_k(k_cur); + void *va = block + k_cur - k_start; + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + if (k_cur < ALIGN_DOWN(k_end, SZ_512K)) + pte = pte_mkhuge(pte); + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } +finish: + flush_tlb_kernel_range(k_start, k_end); + return 0; +} diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 6577897673dd..440038ea79f1 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -3,3 +3,4 @@ KASAN_SANITIZE := n obj-$(CONFIG_PPC32) += kasan_init_32.o +obj-$(CONFIG_PPC_8xx) += 8xx.o From 2b279c0348af62f42be346c1ea6d70bac98df0f9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:28 +0000 Subject: [PATCH 42/43] powerpc/32s: Allow mapping with BATs with DEBUG_PAGEALLOC DEBUG_PAGEALLOC only manages RW data. Text and RO data can still be mapped with BATs. In order to map with BATs, also enforce data alignment. Set by default to 256M which is a good compromise for keeping enough BATs for also KASAN and IMMR. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fd29c1718ee44d82115d0e835ced808eb4ccbf51.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 1 + arch/powerpc/mm/book3s32/mmu.c | 6 ++++++ arch/powerpc/mm/init_32.c | 5 ++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fcb0a9ae9872..752deddc9ed9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -797,6 +797,7 @@ config DATA_SHIFT range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32 range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 + default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA default 19 if DEBUG_PAGEALLOC && PPC_8xx diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a9b2cbc74797..a6dcc708eee3 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -170,6 +170,12 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pr_debug("RAM mapped without BATs\n"); return base; } + if (debug_pagealloc_enabled()) { + if (base >= border) + return base; + if (top >= border) + top = border; + } if (!strict_kernel_rwx_enabled() || base >= border || top <= border) return __mmu_mapin_ram(base, top); diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 8977a7c2543d..36c39bd37256 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -99,10 +99,9 @@ static void __init MMU_setup(void) if (IS_ENABLED(CONFIG_PPC_8xx)) return; - if (debug_pagealloc_enabled()) { - __map_without_bats = 1; + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; - } + if (strict_kernel_rwx_enabled()) __map_without_ltlbs = 1; } From 7974c4732642f710b5111165ae1f7f7fed822282 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 19 May 2020 05:49:29 +0000 Subject: [PATCH 43/43] powerpc/32s: Implement dedicated kasan_init_region() Implement a kasan_init_region() dedicated to book3s/32 that allocates KASAN regions using BATs. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/709e821602b48a1d7c211a9b156da26db98c3e9d.1589866984.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/kasan.h | 1 + arch/powerpc/mm/kasan/Makefile | 1 + arch/powerpc/mm/kasan/book3s_32.c | 57 +++++++++++++++++++++++++++ arch/powerpc/mm/kasan/kasan_init_32.c | 2 +- 4 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/mm/kasan/book3s_32.c diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 107a24c3f7b3..be85c7005fb1 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -34,6 +34,7 @@ static inline void kasan_init(void) { } static inline void kasan_late_init(void) { } #endif +void kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte); int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end); int kasan_init_region(void *start, size_t size); diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile index 440038ea79f1..bb1a5408b86b 100644 --- a/arch/powerpc/mm/kasan/Makefile +++ b/arch/powerpc/mm/kasan/Makefile @@ -4,3 +4,4 @@ KASAN_SANITIZE := n obj-$(CONFIG_PPC32) += kasan_init_32.o obj-$(CONFIG_PPC_8xx) += 8xx.o +obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c new file mode 100644 index 000000000000..4bc491a4a1fd --- /dev/null +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include +#include + +int __init kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur = k_start; + int k_size = k_end - k_start; + int k_size_base = 1 << (ffs(k_size) - 1); + int ret; + void *block; + + block = memblock_alloc(k_size, k_size_base); + + if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { + int k_size_more = 1 << (ffs(k_size - k_size_base) - 1); + + setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); + if (k_size_more >= SZ_128K) + setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, + k_size_more, PAGE_KERNEL); + if (v_block_mapped(k_start)) + k_cur = k_start + k_size_base; + if (v_block_mapped(k_start + k_size_base)) + k_cur = k_start + k_size_base + k_size_more; + + update_bats(); + } + + if (!block) + block = memblock_alloc(k_size, PAGE_SIZE); + if (!block) + return -ENOMEM; + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + kasan_update_early_region(k_start, k_cur, __pte(0)); + + for (; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_ptr_k(k_cur); + void *va = block + k_cur - k_start; + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } + flush_tlb_kernel_range(k_start, k_end); + return 0; +} diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 76d418af4ce8..c42085801c04 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -79,7 +79,7 @@ int __init __weak kasan_init_region(void *start, size_t size) return 0; } -static void __init +void __init kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte) { unsigned long k_cur;