Notable changes: - Removal of the NPU DMA code, used by the out-of-tree Nvidia driver, as well as some other functions only used by drivers that haven't (yet?) made it upstream. - A fix for a bug in our handling of hardware watchpoints (eg. perf record -e mem: ...) which could lead to register corruption and kernel crashes. - Enable HAVE_ARCH_HUGE_VMAP, which allows us to use large pages for vmalloc when using the Radix MMU. - A large but incremental rewrite of our exception handling code to use gas macros rather than multiple levels of nested CPP macros. And the usual small fixes, cleanups and improvements. Thanks to: Alastair D'Silva, Alexey Kardashevskiy, Andreas Schwab, Aneesh Kumar K.V, Anju T Sudhakar, Anton Blanchard, Arnd Bergmann, Athira Rajeev, Cédric Le Goater, Christian Lamparter, Christophe Leroy, Christophe Lombard, Christoph Hellwig, Daniel Axtens, Denis Efremov, Enrico Weigelt, Frederic Barrat, Gautham R. Shenoy, Geert Uytterhoeven, Geliang Tang, Gen Zhang, Greg Kroah-Hartman, Greg Kurz, Gustavo Romero, Krzysztof Kozlowski, Madhavan Srinivasan, Masahiro Yamada, Mathieu Malaterre, Michael Neuling, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Nishad Kamdar, Oliver O'Halloran, Qian Cai, Ravi Bangoria, Sachin Sant, Sam Bobroff, Satheesh Rajendran, Segher Boessenkool, Shaokun Zhang, Shawn Anastasio, Stewart Smith, Suraj Jitindar Singh, Thiago Jung Bauermann, YueHaibing. -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJdKVoLAAoJEFHr6jzI4aWA0kIP/A6shIbbE7H5W2hFrqt/PPPK 3+VrvPKbOFF+W6hcE/RgSZmEnUo0svdNjHUd/eMfFS1vb/uRt2QDdrsHUNNwURQL M2mcLXFwYpnjSjb/XMgDbHpAQxjeGfTdYLonUIejN7Rk8KQUeLyKQ3SBn6kfMc46 DnUUcPcjuRGaETUmVuZZ4e40ZWbJp8PKDrSJOuUrTPXMaK5ciNbZk5mCWXGbYl6G BMQAyv4ld/417rNTjBEP/T2foMJtioAt4W6mtlgdkOTdIEZnFU67nNxDBthNSu2c 95+I+/sML4KOp1R4yhqLSLIDDbc3bg3c99hLGij0d948z3bkSZ8bwnPaUuy70C4v U8rvl/+N6C6H3DgSsPE/Gnkd8DnudqWY8nULc+8p3fXljGwww6/Qgt+6yCUn8BdW WgixkSjKgjDmzTw8trIUNEqORrTVle7cM2hIyIK2Q5T4kWzNQxrLZ/x/3wgoYjUa 1KwIzaRo5JKZ9D3pJnJ5U+knE2/90rJIyfcp0W6ygyJsWKi2GNmq1eN3sKOw0IxH Tg86RENIA/rEMErNOfP45sLteMuTR7of7peCG3yumIOZqsDVYAzerpvtSgip2cvK aG+9HcYlBFOOOF9Dabi8GXsTBLXLfwiyjjLSpA9eXPwW8KObgiNfTZa7ujjTPvis 4mk9oukFTFUpfhsMmI3T =3dBZ -----END PGP SIGNATURE----- Merge tag 'powerpc-5.3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc updates from Michael Ellerman: "Notable changes: - Removal of the NPU DMA code, used by the out-of-tree Nvidia driver, as well as some other functions only used by drivers that haven't (yet?) made it upstream. - A fix for a bug in our handling of hardware watchpoints (eg. perf record -e mem: ...) which could lead to register corruption and kernel crashes. - Enable HAVE_ARCH_HUGE_VMAP, which allows us to use large pages for vmalloc when using the Radix MMU. - A large but incremental rewrite of our exception handling code to use gas macros rather than multiple levels of nested CPP macros. And the usual small fixes, cleanups and improvements. Thanks to: Alastair D'Silva, Alexey Kardashevskiy, Andreas Schwab, Aneesh Kumar K.V, Anju T Sudhakar, Anton Blanchard, Arnd Bergmann, Athira Rajeev, Cédric Le Goater, Christian Lamparter, Christophe Leroy, Christophe Lombard, Christoph Hellwig, Daniel Axtens, Denis Efremov, Enrico Weigelt, Frederic Barrat, Gautham R. Shenoy, Geert Uytterhoeven, Geliang Tang, Gen Zhang, Greg Kroah-Hartman, Greg Kurz, Gustavo Romero, Krzysztof Kozlowski, Madhavan Srinivasan, Masahiro Yamada, Mathieu Malaterre, Michael Neuling, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Nishad Kamdar, Oliver O'Halloran, Qian Cai, Ravi Bangoria, Sachin Sant, Sam Bobroff, Satheesh Rajendran, Segher Boessenkool, Shaokun Zhang, Shawn Anastasio, Stewart Smith, Suraj Jitindar Singh, Thiago Jung Bauermann, YueHaibing" * tag 'powerpc-5.3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (163 commits) powerpc/powernv/idle: Fix restore of SPRN_LDBAR for POWER9 stop state. powerpc/eeh: Handle hugepages in ioremap space ocxl: Update for AFU descriptor template version 1.1 powerpc/boot: pass CONFIG options in a simpler and more robust way powerpc/boot: add {get, put}_unaligned_be32 to xz_config.h powerpc/irq: Don't WARN continuously in arch_local_irq_restore() powerpc/module64: Use symbolic instructions names. powerpc/module32: Use symbolic instructions names. powerpc: Move PPC_HA() PPC_HI() and PPC_LO() to ppc-opcode.h powerpc/module64: Fix comment in R_PPC64_ENTRY handling powerpc/boot: Add lzo support for uImage powerpc/boot: Add lzma support for uImage powerpc/boot: don't force gzipped uImage powerpc/8xx: Add microcode patch to move SMC parameter RAM. powerpc/8xx: Use IO accessors in microcode programming. powerpc/8xx: replace #ifdefs by IS_ENABLED() in microcode.c powerpc/8xx: refactor programming of microcode CPM params. powerpc/8xx: refactor printing of microcode patch name. powerpc/8xx: Refactor microcode write powerpc/8xx: refactor writing of CPM microcode arrays ...
297 lines
7.2 KiB
C
297 lines
7.2 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* MMU context allocation for 64-bit kernels.
|
|
*
|
|
* Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/string.h>
|
|
#include <linux/types.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/pkeys.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/export.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/pgalloc.h>
|
|
|
|
static DEFINE_IDA(mmu_context_ida);
|
|
|
|
static int alloc_context_id(int min_id, int max_id)
|
|
{
|
|
return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
|
|
}
|
|
|
|
void hash__reserve_context_id(int id)
|
|
{
|
|
int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
|
|
|
|
WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
|
|
}
|
|
|
|
int hash__alloc_context_id(void)
|
|
{
|
|
unsigned long max;
|
|
|
|
if (mmu_has_feature(MMU_FTR_68_BIT_VA))
|
|
max = MAX_USER_CONTEXT;
|
|
else
|
|
max = MAX_USER_CONTEXT_65BIT_VA;
|
|
|
|
return alloc_context_id(MIN_USER_CONTEXT, max);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hash__alloc_context_id);
|
|
|
|
void slb_setup_new_exec(void);
|
|
|
|
static int realloc_context_ids(mm_context_t *ctx)
|
|
{
|
|
int i, id;
|
|
|
|
/*
|
|
* id 0 (aka. ctx->id) is special, we always allocate a new one, even if
|
|
* there wasn't one allocated previously (which happens in the exec
|
|
* case where ctx is newly allocated).
|
|
*
|
|
* We have to be a bit careful here. We must keep the existing ids in
|
|
* the array, so that we can test if they're non-zero to decide if we
|
|
* need to allocate a new one. However in case of error we must free the
|
|
* ids we've allocated but *not* any of the existing ones (or risk a
|
|
* UAF). That's why we decrement i at the start of the error handling
|
|
* loop, to skip the id that we just tested but couldn't reallocate.
|
|
*/
|
|
for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
|
|
if (i == 0 || ctx->extended_id[i]) {
|
|
id = hash__alloc_context_id();
|
|
if (id < 0)
|
|
goto error;
|
|
|
|
ctx->extended_id[i] = id;
|
|
}
|
|
}
|
|
|
|
/* The caller expects us to return id */
|
|
return ctx->id;
|
|
|
|
error:
|
|
for (i--; i >= 0; i--) {
|
|
if (ctx->extended_id[i])
|
|
ida_free(&mmu_context_ida, ctx->extended_id[i]);
|
|
}
|
|
|
|
return id;
|
|
}
|
|
|
|
static int hash__init_new_context(struct mm_struct *mm)
|
|
{
|
|
int index;
|
|
|
|
mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
|
|
GFP_KERNEL);
|
|
if (!mm->context.hash_context)
|
|
return -ENOMEM;
|
|
|
|
/*
|
|
* The old code would re-promote on fork, we don't do that when using
|
|
* slices as it could cause problem promoting slices that have been
|
|
* forced down to 4K.
|
|
*
|
|
* For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
|
|
* explicitly against context.id == 0. This ensures that we properly
|
|
* initialize context slice details for newly allocated mm's (which will
|
|
* have id == 0) and don't alter context slice inherited via fork (which
|
|
* will have id != 0).
|
|
*
|
|
* We should not be calling init_new_context() on init_mm. Hence a
|
|
* check against 0 is OK.
|
|
*/
|
|
if (mm->context.id == 0) {
|
|
memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
|
|
slice_init_new_context_exec(mm);
|
|
} else {
|
|
/* This is fork. Copy hash_context details from current->mm */
|
|
memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
|
|
#ifdef CONFIG_PPC_SUBPAGE_PROT
|
|
/* inherit subpage prot detalis if we have one. */
|
|
if (current->mm->context.hash_context->spt) {
|
|
mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
|
|
GFP_KERNEL);
|
|
if (!mm->context.hash_context->spt) {
|
|
kfree(mm->context.hash_context);
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
index = realloc_context_ids(&mm->context);
|
|
if (index < 0) {
|
|
#ifdef CONFIG_PPC_SUBPAGE_PROT
|
|
kfree(mm->context.hash_context->spt);
|
|
#endif
|
|
kfree(mm->context.hash_context);
|
|
return index;
|
|
}
|
|
|
|
pkey_mm_init(mm);
|
|
return index;
|
|
}
|
|
|
|
void hash__setup_new_exec(void)
|
|
{
|
|
slice_setup_new_exec();
|
|
|
|
slb_setup_new_exec();
|
|
}
|
|
|
|
static int radix__init_new_context(struct mm_struct *mm)
|
|
{
|
|
unsigned long rts_field;
|
|
int index, max_id;
|
|
|
|
max_id = (1 << mmu_pid_bits) - 1;
|
|
index = alloc_context_id(mmu_base_pid, max_id);
|
|
if (index < 0)
|
|
return index;
|
|
|
|
/*
|
|
* set the process table entry,
|
|
*/
|
|
rts_field = radix__get_tree_size();
|
|
process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
|
|
|
|
/*
|
|
* Order the above store with subsequent update of the PID
|
|
* register (at which point HW can start loading/caching
|
|
* the entry) and the corresponding load by the MMU from
|
|
* the L2 cache.
|
|
*/
|
|
asm volatile("ptesync;isync" : : : "memory");
|
|
|
|
mm->context.hash_context = NULL;
|
|
|
|
return index;
|
|
}
|
|
|
|
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
|
|
{
|
|
int index;
|
|
|
|
if (radix_enabled())
|
|
index = radix__init_new_context(mm);
|
|
else
|
|
index = hash__init_new_context(mm);
|
|
|
|
if (index < 0)
|
|
return index;
|
|
|
|
mm->context.id = index;
|
|
|
|
mm->context.pte_frag = NULL;
|
|
mm->context.pmd_frag = NULL;
|
|
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
|
mm_iommu_init(mm);
|
|
#endif
|
|
atomic_set(&mm->context.active_cpus, 0);
|
|
atomic_set(&mm->context.copros, 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __destroy_context(int context_id)
|
|
{
|
|
ida_free(&mmu_context_ida, context_id);
|
|
}
|
|
EXPORT_SYMBOL_GPL(__destroy_context);
|
|
|
|
static void destroy_contexts(mm_context_t *ctx)
|
|
{
|
|
int index, context_id;
|
|
|
|
for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
|
|
context_id = ctx->extended_id[index];
|
|
if (context_id)
|
|
ida_free(&mmu_context_ida, context_id);
|
|
}
|
|
kfree(ctx->hash_context);
|
|
}
|
|
|
|
static void pmd_frag_destroy(void *pmd_frag)
|
|
{
|
|
int count;
|
|
struct page *page;
|
|
|
|
page = virt_to_page(pmd_frag);
|
|
/* drop all the pending references */
|
|
count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
|
|
/* We allow PTE_FRAG_NR fragments from a PTE page */
|
|
if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
|
|
pgtable_pmd_page_dtor(page);
|
|
__free_page(page);
|
|
}
|
|
}
|
|
|
|
static void destroy_pagetable_cache(struct mm_struct *mm)
|
|
{
|
|
void *frag;
|
|
|
|
frag = mm->context.pte_frag;
|
|
if (frag)
|
|
pte_frag_destroy(frag);
|
|
|
|
frag = mm->context.pmd_frag;
|
|
if (frag)
|
|
pmd_frag_destroy(frag);
|
|
return;
|
|
}
|
|
|
|
void destroy_context(struct mm_struct *mm)
|
|
{
|
|
#ifdef CONFIG_SPAPR_TCE_IOMMU
|
|
WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
|
|
#endif
|
|
if (radix_enabled())
|
|
WARN_ON(process_tb[mm->context.id].prtb0 != 0);
|
|
else
|
|
subpage_prot_free(mm);
|
|
destroy_contexts(&mm->context);
|
|
mm->context.id = MMU_NO_CONTEXT;
|
|
}
|
|
|
|
void arch_exit_mmap(struct mm_struct *mm)
|
|
{
|
|
destroy_pagetable_cache(mm);
|
|
|
|
if (radix_enabled()) {
|
|
/*
|
|
* Radix doesn't have a valid bit in the process table
|
|
* entries. However we know that at least P9 implementation
|
|
* will avoid caching an entry with an invalid RTS field,
|
|
* and 0 is invalid. So this will do.
|
|
*
|
|
* This runs before the "fullmm" tlb flush in exit_mmap,
|
|
* which does a RIC=2 tlbie to clear the process table
|
|
* entry. See the "fullmm" comments in tlb-radix.c.
|
|
*
|
|
* No barrier required here after the store because
|
|
* this process will do the invalidate, which starts with
|
|
* ptesync.
|
|
*/
|
|
process_tb[mm->context.id].prtb0 = 0;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_PPC_RADIX_MMU
|
|
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
|
|
{
|
|
mtspr(SPRN_PID, next->context.id);
|
|
isync();
|
|
}
|
|
#endif
|