6c3ac11343
Notable changes: - Enable THREAD_INFO_IN_TASK to move thread_info off the stack. - A big series from Christoph reworking our DMA code to use more of the generic infrastructure, as he said: "This series switches the powerpc port to use the generic swiotlb and noncoherent dma ops, and to use more generic code for the coherent direct mapping, as well as removing a lot of dead code." - Increase our vmalloc space to 512T with the Hash MMU on modern CPUs, allowing us to support machines with larger amounts of total RAM or distance between nodes. - Two series from Christophe, one to optimise TLB miss handlers on 6xx, and another to optimise the way STRICT_KERNEL_RWX is implemented on some 32-bit CPUs. - Support for KCOV coverage instrumentation which means we can run syzkaller and discover even more bugs in our code. And as always many clean-ups, reworks and minor fixes etc. Thanks to: Alan Modra, Alexey Kardashevskiy, Alistair Popple, Andrea Arcangeli, Andrew Donnellan, Aneesh Kumar K.V, Aravinda Prasad, Balbir Singh, Brajeswar Ghosh, Breno Leitao, Christian Lamparter, Christian Zigotzky, Christophe Leroy, Christoph Hellwig, Corentin Labbe, Daniel Axtens, David Gibson, Diana Craciun, Firoz Khan, Gustavo A. R. Silva, Igor Stoppa, Joe Lawrence, Joel Stanley, Jonathan Neuschäfer, Jordan Niethe, Laurent Dufour, Madhavan Srinivasan, Mahesh Salgaonkar, Mark Cave-Ayland, Masahiro Yamada, Mathieu Malaterre, Matteo Croce, Meelis Roos, Michael W. Bringmann, Nathan Chancellor, Nathan Fontenot, Nicholas Piggin, Nick Desaulniers, Nicolai Stange, Oliver O'Halloran, Paul Mackerras, Peter Xu, PrasannaKumar Muralidharan, Qian Cai, Rashmica Gupta, Reza Arbab, Robert P. J. Day, Russell Currey, Sabyasachi Gupta, Sam Bobroff, Sandipan Das, Sergey Senozhatsky, Souptick Joarder, Stewart Smith, Tyrel Datwyler, Vaibhav Jain, YueHaibing. -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJcgRJlAAoJEFHr6jzI4aWAL9oP+gPlrZgyaAg/51lmubLtlbtk QuGU8EiuJZoJD1OHrMPtppBOY7rQZOxJe58AoPig8wTvs+j/TxJ25fmiZncnf5U2 PC8QAjbj0UmQHgy+K30sUeOnDg9tdkHKHJ5/ecjJcvykkqsjyMnV7biFQ1cOA0HT LflXHEEtiG9P9u7jZoAhtnfpgn1/l9mhTYMe26J1fqvC0164qMDFaXDTQXyDfyvG gmuqccGMawSk7IdagmQxwXtwyfwOnarmGn+n31XKRejApGZ/pjiEA23JOJOaJcia m76Jy3roao6sEtCUNpBFXEtwOy9POy3OiGy6yg/9896tDMvG84OuO6ltV1nFGawL PmwE+ug63L4g/HWxZyAeb26T2oTTp/YIaKQPtsq4d286pvg/qr2KPNzFoAEhmJqU yLrebv276pVeiLpLmCLPvcPj9t76vWKZaUm0FoE+zUDg7Rl7Alow8A/c4tdjOI6y QwpbCiYseyiJ32lCZZdbN7Cy6+iM6vb3i1oNKc8MVqhBGTwLJnTU0ruPBSvCaRvD NoQWO1RWpNu/BuivuLEKS9q3AoxenGwiqowxGhdVmI3Oc9jGWcEYlduR00VDYPVp /RCfwtTY5NyC++h5cnbz8aLJ1hBXG5m79CXfprV+zPWeiLPCaMT6w9Y5QUS2wqA+ EZ734NknDJOjaHc4cGdZ =Z9bb -----END PGP SIGNATURE----- Merge tag 'powerpc-5.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc updates from Michael Ellerman: "Notable changes: - Enable THREAD_INFO_IN_TASK to move thread_info off the stack. - A big series from Christoph reworking our DMA code to use more of the generic infrastructure, as he said: "This series switches the powerpc port to use the generic swiotlb and noncoherent dma ops, and to use more generic code for the coherent direct mapping, as well as removing a lot of dead code." - Increase our vmalloc space to 512T with the Hash MMU on modern CPUs, allowing us to support machines with larger amounts of total RAM or distance between nodes. - Two series from Christophe, one to optimise TLB miss handlers on 6xx, and another to optimise the way STRICT_KERNEL_RWX is implemented on some 32-bit CPUs. - Support for KCOV coverage instrumentation which means we can run syzkaller and discover even more bugs in our code. And as always many clean-ups, reworks and minor fixes etc. Thanks to: Alan Modra, Alexey Kardashevskiy, Alistair Popple, Andrea Arcangeli, Andrew Donnellan, Aneesh Kumar K.V, Aravinda Prasad, Balbir Singh, Brajeswar Ghosh, Breno Leitao, Christian Lamparter, Christian Zigotzky, Christophe Leroy, Christoph Hellwig, Corentin Labbe, Daniel Axtens, David Gibson, Diana Craciun, Firoz Khan, Gustavo A. R. Silva, Igor Stoppa, Joe Lawrence, Joel Stanley, Jonathan Neuschäfer, Jordan Niethe, Laurent Dufour, Madhavan Srinivasan, Mahesh Salgaonkar, Mark Cave-Ayland, Masahiro Yamada, Mathieu Malaterre, Matteo Croce, Meelis Roos, Michael W. Bringmann, Nathan Chancellor, Nathan Fontenot, Nicholas Piggin, Nick Desaulniers, Nicolai Stange, Oliver O'Halloran, Paul Mackerras, Peter Xu, PrasannaKumar Muralidharan, Qian Cai, Rashmica Gupta, Reza Arbab, Robert P. J. Day, Russell Currey, Sabyasachi Gupta, Sam Bobroff, Sandipan Das, Sergey Senozhatsky, Souptick Joarder, Stewart Smith, Tyrel Datwyler, Vaibhav Jain, YueHaibing" * tag 'powerpc-5.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (200 commits) powerpc/32: Clear on-stack exception marker upon exception return powerpc: Remove export of save_stack_trace_tsk_reliable() powerpc/mm: fix "section_base" set but not used powerpc/mm: Fix "sz" set but not used warning powerpc/mm: Check secondary hash page table powerpc: remove nargs from __SYSCALL powerpc/64s: Fix unrelocated interrupt trampoline address test powerpc/powernv/ioda: Fix locked_vm counting for memory used by IOMMU tables powerpc/fsl: Fix the flush of branch predictor. powerpc/powernv: Make opal log only readable by root powerpc/xmon: Fix opcode being uninitialized in print_insn_powerpc powerpc/powernv: move OPAL call wrapper tracing and interrupt handling to C powerpc/64s: Fix data interrupts vs d-side MCE reentrancy powerpc/64s: Prepare to handle data interrupts vs d-side MCE reentrancy powerpc/64s: system reset interrupt preserve HSRRs powerpc/64s: Fix HV NMI vs HV interrupt recoverability test powerpc/mm/hash: Handle mmap_min_addr correctly in get_unmapped_area topdown search powerpc/hugetlb: Handle mmap_min_addr correctly in get_unmapped_area callback selftests/powerpc: Remove duplicate header powerpc sstep: Add support for modsd, modud instructions ...
148 lines
4.2 KiB
C
148 lines
4.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
|
|
*
|
|
* Copyright (C) 2003 David Gibson, IBM Corporation.
|
|
*
|
|
* Based on the IA-32 version:
|
|
* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/machdep.h>
|
|
|
|
extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
|
|
unsigned long pa, unsigned long rlags,
|
|
unsigned long vflags, int psize, int ssize);
|
|
|
|
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
|
|
pte_t *ptep, unsigned long trap, unsigned long flags,
|
|
int ssize, unsigned int shift, unsigned int mmu_psize)
|
|
{
|
|
real_pte_t rpte;
|
|
unsigned long vpn;
|
|
unsigned long old_pte, new_pte;
|
|
unsigned long rflags, pa;
|
|
long slot, offset;
|
|
|
|
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
|
|
|
|
/* Search the Linux page table for a match with va */
|
|
vpn = hpt_vpn(ea, vsid, ssize);
|
|
|
|
/* At this point, we have a pte (old_pte) which can be used to build
|
|
* or update an HPTE. There are 2 cases:
|
|
*
|
|
* 1. There is a valid (present) pte with no associated HPTE (this is
|
|
* the most common case)
|
|
* 2. There is a valid (present) pte with an associated HPTE. The
|
|
* current values of the pp bits in the HPTE prevent access
|
|
* because we are doing software DIRTY bit management and the
|
|
* page is currently not DIRTY.
|
|
*/
|
|
|
|
|
|
do {
|
|
old_pte = pte_val(*ptep);
|
|
/* If PTE busy, retry the access */
|
|
if (unlikely(old_pte & H_PAGE_BUSY))
|
|
return 0;
|
|
/* If PTE permissions don't match, take page fault */
|
|
if (unlikely(!check_pte_access(access, old_pte)))
|
|
return 1;
|
|
|
|
/* Try to lock the PTE, add ACCESSED and DIRTY if it was
|
|
* a write access */
|
|
new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
|
|
if (access & _PAGE_WRITE)
|
|
new_pte |= _PAGE_DIRTY;
|
|
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
|
|
|
|
/* Make sure this is a hugetlb entry */
|
|
if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
|
|
return 0;
|
|
|
|
rflags = htab_convert_pte_flags(new_pte);
|
|
if (unlikely(mmu_psize == MMU_PAGE_16G))
|
|
offset = PTRS_PER_PUD;
|
|
else
|
|
offset = PTRS_PER_PMD;
|
|
rpte = __real_pte(__pte(old_pte), ptep, offset);
|
|
|
|
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
|
|
/* No CPU has hugepages but lacks no execute, so we
|
|
* don't need to worry about that case */
|
|
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
|
|
|
|
/* Check if pte already has an hpte (case 2) */
|
|
if (unlikely(old_pte & H_PAGE_HASHPTE)) {
|
|
/* There MIGHT be an HPTE for this pte */
|
|
unsigned long gslot;
|
|
|
|
gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
|
|
if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
|
|
mmu_psize, ssize, flags) == -1)
|
|
old_pte &= ~_PAGE_HPTEFLAGS;
|
|
}
|
|
|
|
if (likely(!(old_pte & H_PAGE_HASHPTE))) {
|
|
unsigned long hash = hpt_hash(vpn, shift, ssize);
|
|
|
|
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
|
|
|
|
/* clear HPTE slot informations in new PTE */
|
|
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
|
|
|
|
slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
|
|
mmu_psize, ssize);
|
|
|
|
/*
|
|
* Hypervisor failure. Restore old pte and return -1
|
|
* similar to __hash_page_*
|
|
*/
|
|
if (unlikely(slot == -2)) {
|
|
*ptep = __pte(old_pte);
|
|
hash_failure_debug(ea, access, vsid, trap, ssize,
|
|
mmu_psize, mmu_psize, old_pte);
|
|
return -1;
|
|
}
|
|
|
|
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
|
|
}
|
|
|
|
/*
|
|
* No need to use ldarx/stdcx here
|
|
*/
|
|
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
|
|
return 0;
|
|
}
|
|
|
|
pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
|
|
unsigned long addr, pte_t *ptep)
|
|
{
|
|
unsigned long pte_val;
|
|
/*
|
|
* Clear the _PAGE_PRESENT so that no hardware parallel update is
|
|
* possible. Also keep the pte_present true so that we don't take
|
|
* wrong fault.
|
|
*/
|
|
pte_val = pte_update(vma->vm_mm, addr, ptep,
|
|
_PAGE_PRESENT, _PAGE_INVALID, 1);
|
|
|
|
return __pte(pte_val);
|
|
}
|
|
|
|
void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
|
|
pte_t *ptep, pte_t old_pte, pte_t pte)
|
|
{
|
|
|
|
if (radix_enabled())
|
|
return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
|
|
old_pte, pte);
|
|
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
|
|
}
|