diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 60cda8fb0677..fc7f056e9d97 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -521,13 +521,9 @@ extern void slb_set_size(u16 size); * from mmu context id and effective segment id of the address. * * For user processes max context id is limited to MAX_USER_CONTEXT. - - * For kernel space, we use context ids 1-4 to map addresses as below: - * NOTE: each context only support 64TB now. - * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ] - * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ] - * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ] - * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ] + * more details in get_user_context + * + * For kernel space get_kernel_context * * The proto-VSIDs are then scrambled into real VSIDs with the * multiplicative hash: @@ -567,6 +563,21 @@ extern void slb_set_size(u16 size); #define ESID_BITS_MASK ((1 << ESID_BITS) - 1) #define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1) +/* + * Now certain config support MAX_PHYSMEM more than 512TB. Hence we will need + * to use more than one context for linear mapping the kernel. + * For vmalloc and memmap, we use just one context with 512TB. With 64 byte + * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)). + */ +#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) +#define MAX_KERNEL_CTX_CNT (1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT)) +#else +#define MAX_KERNEL_CTX_CNT 1 +#endif + +#define MAX_VMALLOC_CTX_CNT 1 +#define MAX_MEMMAP_CTX_CNT 1 + /* * 256MB segment * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments @@ -577,12 +588,13 @@ extern void slb_set_size(u16 size); * We also need to avoid the last segment of the last context, because that * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 * because of the modulo operation in vsid scramble. + * + * We add one extra context to MIN_USER_CONTEXT so that we can map kernel + * context easily. The +1 is to map the unused 0xe region mapping. */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) -#define MIN_USER_CONTEXT (5) - -/* Would be nice to use KERNEL_REGION_ID here */ -#define KERNEL_REGION_CONTEXT_OFFSET (0xc - 1) +#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ + MAX_MEMMAP_CTX_CNT + 2) /* * For platforms that support on 65bit VA we limit the context bits @@ -742,6 +754,39 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits); } +/* + * For kernel space, we use context ids as below + * below. Range is 512TB per context. + * + * 0x00001 - [ 0xc000000000000000 - 0xc001ffffffffffff] + * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff] + * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff] + * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff] + + * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ] + * 0x00006 - Not used - Can map 0xe000000000000000 range. + * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ] + * + * So we can compute the context from the region (top nibble) by + * subtracting 11, or 0xc - 1. + */ +static inline unsigned long get_kernel_context(unsigned long ea) +{ + unsigned long region_id = REGION_ID(ea); + unsigned long ctx; + /* + * For linear mapping we do support multiple context + */ + if (region_id == KERNEL_REGION_ID) { + /* + * We already verified ea to be not beyond the addr limit. + */ + ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT); + } else + ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT; + return ctx; +} + /* * This is only valid for addresses >= PAGE_OFFSET */ @@ -752,20 +797,7 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) if (!is_kernel_addr(ea)) return 0; - /* - * For kernel space, we use context ids 1-4 to map the address space as - * below: - * - * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ] - * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ] - * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ] - * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ] - * - * So we can compute the context from the region (top nibble) by - * subtracting 11, or 0xc - 1. - */ - context = (ea >> 60) - KERNEL_REGION_CONTEXT_OFFSET; - + context = get_kernel_context(ea); return get_vsid(context, ea, ssize); } diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 13ea441ac531..eb20eb3b8fb0 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -309,6 +309,21 @@ static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) */ #define MMU_PAGE_COUNT 16 +/* + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and + * page_to_nid does a page->section->node lookup + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce + * memory requirements with large number of sections. + * 51 bits is the max physical real address on POWER9 + */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ + defined (CONFIG_PPC_64K_PAGES) +#define MAX_PHYSMEM_BITS 51 +#else +#define MAX_PHYSMEM_BITS 46 +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #include #else /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 28f5dae25db6..68da49320592 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -9,17 +9,6 @@ * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space */ #define SECTION_SIZE_BITS 24 -/* - * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS - * if we increase SECTIONS_WIDTH we will not store node details in page->flags and - * page_to_nid does a page->section->node lookup - * Hence only increase for VMEMMAP. - */ -#ifdef CONFIG_SPARSEMEM_VMEMMAP -#define MAX_PHYSMEM_BITS 47 -#else -#define MAX_PHYSMEM_BITS 46 -#endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 4fe5cb5052b6..c3fdf2969d9f 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -693,16 +693,27 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) unsigned long flags; int ssize; - if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) - return -EFAULT; - if (id == KERNEL_REGION_ID) { + + /* We only support upto MAX_PHYSMEM_BITS */ + if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS)) + return -EFAULT; + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; + #ifdef CONFIG_SPARSEMEM_VMEMMAP } else if (id == VMEMMAP_REGION_ID) { + + if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) + return -EFAULT; + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; #endif } else if (id == VMALLOC_REGION_ID) { + + if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) + return -EFAULT; + if (ea < H_VMALLOC_END) flags = get_paca()->vmalloc_sllp; else @@ -715,8 +726,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) ssize = MMU_SEGSIZE_256M; - context = id - KERNEL_REGION_CONTEXT_OFFSET; - + context = get_kernel_context(ea); return slb_insert_entry(ea, context, flags, ssize, true); }