linux/arch/riscv/mm/fault.c
Linus Torvalds f1b744f65e RISC-V Patches for the 5.17 Merge Window, Part 1
* Support for the DA9063 as used on the HiFive Unmatched.
 * Support for relative extables, which puts us in line with other
   architectures and save some space in vmlinux.
 * A handful of kexec fixes/improvements, including the ability to run
   crash kernels from PCI-addressable memory on the HiFive Unmatched.
 * Support for the SBI SRST extension, which allows systems that do not
   have an explicit driver in Linux to reboot.
 * A handful of fixes and cleanups, including to the defconfigs and
   device trees.
 
 ---
 This time I do expect to have a part 2, as there's still some smaller
 patches on the list.  I was hoping to get through more of that over the
 weekend, but I got distracted with the ABI issues.  Figured it's better
 to send this sooner rather than waiting.
 
 Included are my merge resolutions against a master from this morning, if
 that helps any:
 
 diff --cc arch/riscv/include/asm/sbi.h
 index 289621da4a2a,9c46dd3ff4a2..000000000000
 --- a/arch/riscv/include/asm/sbi.h
 +++ b/arch/riscv/include/asm/sbi.h
 @@@ -27,7 -27,14 +27,15 @@@ enum sbi_ext_id
         SBI_EXT_IPI = 0x735049,
         SBI_EXT_RFENCE = 0x52464E43,
         SBI_EXT_HSM = 0x48534D,
  +      SBI_EXT_SRST = 0x53525354,
 +
 +       /* Experimentals extensions must lie within this range */
 +       SBI_EXT_EXPERIMENTAL_START = 0x08000000,
 +       SBI_EXT_EXPERIMENTAL_END = 0x08FFFFFF,
 +
 +       /* Vendor extensions must lie within this range */
 +       SBI_EXT_VENDOR_START = 0x09000000,
 +       SBI_EXT_VENDOR_END = 0x09FFFFFF,
   };
 
   enum sbi_ext_base_fid {
 diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
 index e03a4c94cf3f..6bfa1f24d3de 100644
 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
 +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
 @@ -188,14 +188,6 @@ vdd_ldo11: ldo11 {
                                 regulator-always-on;
                         };
                 };
 -
 -               rtc {
 -                       compatible = "dlg,da9063-rtc";
 -               };
 -
 -               wdt {
 -                       compatible = "dlg,da9063-watchdog";
 -               };
         };
  };
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEKzw3R0RoQ7JKlDp6LhMZ81+7GIkFAmHnDV4THHBhbG1lckBk
 YWJiZWx0LmNvbQAKCRAuExnzX7sYiaGWD/wOMHVLrkLZDxKHY3lFU7S7FanpFgcU
 L265fgKtoG/QOI9WPuQlN7pYvrC4ssUvtQ23WwZ+iz4pJlUwoMb2TAqBBeTXxEbW
 pVF2QqnlPdv2ZEn95MFxZ0HQB2+xgJKPL5gdD6Iz7oe2378lf7tywSF7MYpxG/AA
 CeHUxzhEPhQJntufTievMhvYpM7ZyhCr19ZAHXRaPoGReJK5ZMCeYHGTrHD4EisG
 hO/Pg2vx/Ynxi/vb/C69kpTBvu4Qsxnbhgfy1SowrO3FhxcZTbyrZ6l8uRxSAHIg
 dA0NLPh/YDQCPXYnphQcLo+Q9Gy4Sz5es7ULnnMyyEOZxoVyy4up3rCAFAL3Ubav
 CNQdk/ZWtrZ+s4chilA1kW97apxocvmq5ULg+7Hi58ZUzk+y7MQBVCClohyONVEU
 /leJzJ3nq3YHFgfo8Uh7L+iPzlNgycfi4gRnGJIkEVRhXBPTfJ/Pc5wjPoPVsFvt
 pjEYT4YaXITZ0QBLdcuPex5h3PXkRsORsZl8eJGnIz8742KA4tfFraZ4BkbrjoqC
 tLsi7Si9hN3JKhLsNgclb76tDkoz4CY7yZ7TT7hRbKdZZJkVRu1XqUq75X18CVQv
 9p7Q7j1b5H3Z+/5KOxwS0UO73y92yvyVvi0cLqBoD2Tkeq3beumxmy50Qy+O+h1D
 Ut7GwcyavzfS8Q==
 =uqtf
 -----END PGP SIGNATURE-----

Merge tag 'riscv-for-linus-5.17-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull RISC-V updates from Palmer Dabbelt:

 - Support for the DA9063 as used on the HiFive Unmatched.

 - Support for relative extables, which puts us in line with other
   architectures and save some space in vmlinux.

 - A handful of kexec fixes/improvements, including the ability to run
   crash kernels from PCI-addressable memory on the HiFive Unmatched.

 - Support for the SBI SRST extension, which allows systems that do not
   have an explicit driver in Linux to reboot.

 - A handful of fixes and cleanups, including to the defconfigs and
   device trees.

* tag 'riscv-for-linus-5.17-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (52 commits)
  RISC-V: Use SBI SRST extension when available
  riscv: mm: fix wrong phys_ram_base value for RV64
  RISC-V: Use common riscv_cpuid_to_hartid_mask() for both SMP=y and SMP=n
  riscv: head: remove useless __PAGE_ALIGNED_BSS and .balign
  riscv: errata: alternative: mark vendor_patch_func __initdata
  riscv: head: make secondary_start_common() static
  riscv: remove cpu_stop()
  riscv: try to allocate crashkern region from 32bit addressible memory
  riscv: use hart id instead of cpu id on machine_kexec
  riscv: Don't use va_pa_offset on kdump
  riscv: dts: sifive: fu540-c000: Fix PLIC node
  riscv: dts: sifive: fu540-c000: Drop bogus soc node compatible values
  riscv: dts: sifive: Group tuples in register properties
  riscv: dts: sifive: Group tuples in interrupt properties
  riscv: dts: microchip: mpfs: Group tuples in interrupt properties
  riscv: dts: microchip: mpfs: Fix clock controller node
  riscv: dts: microchip: mpfs: Fix reference clock node
  riscv: dts: microchip: mpfs: Fix PLIC node
  riscv: dts: microchip: mpfs: Drop empty chosen node
  riscv: dts: canaan: Group tuples in interrupt properties
  ...
2022-01-19 11:38:21 +02:00

354 lines
8.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
* Lennox Wu <lennox.wu@sunplusct.com>
* Chen Liqin <liqin.chen@sunplusct.com>
* Copyright (C) 2012 Regents of the University of California
*/
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/perf_event.h>
#include <linux/signal.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
#include <linux/kfence.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
#include "../kernel/head.h"
static void die_kernel_fault(const char *msg, unsigned long addr,
struct pt_regs *regs)
{
bust_spinlocks(1);
pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg,
addr);
bust_spinlocks(0);
die(regs, "Oops");
make_task_dead(SIGKILL);
}
static inline void no_context(struct pt_regs *regs, unsigned long addr)
{
const char *msg;
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
if (addr < PAGE_SIZE)
msg = "NULL pointer dereference";
else {
if (kfence_handle_page_fault(addr, regs->cause == EXC_STORE_PAGE_FAULT, regs))
return;
msg = "paging request";
}
die_kernel_fault(msg, addr, regs);
}
static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
{
if (fault & VM_FAULT_OOM) {
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
if (!user_mode(regs)) {
no_context(regs, addr);
return;
}
pagefault_out_of_memory();
return;
} else if (fault & VM_FAULT_SIGBUS) {
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs)) {
no_context(regs, addr);
return;
}
do_trap(regs, SIGBUS, BUS_ADRERR, addr);
return;
}
BUG();
}
static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
{
/*
* Something tried to access memory that isn't in our memory map.
* Fix it, but check if it's kernel or user first.
*/
mmap_read_unlock(mm);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
do_trap(regs, SIGSEGV, code, addr);
return;
}
no_context(regs, addr);
}
static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
{
pgd_t *pgd, *pgd_k;
pud_t *pud, *pud_k;
p4d_t *p4d, *p4d_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
int index;
unsigned long pfn;
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs))
return do_trap(regs, SIGSEGV, code, addr);
/*
* Synchronize this task's top level page-table
* with the 'reference' page table.
*
* Do _not_ use "tsk->active_mm->pgd" here.
* We might be inside an interrupt in the middle
* of a task switch.
*/
index = pgd_index(addr);
pfn = csr_read(CSR_SATP) & SATP_PPN;
pgd = (pgd_t *)pfn_to_virt(pfn) + index;
pgd_k = init_mm.pgd + index;
if (!pgd_present(*pgd_k)) {
no_context(regs, addr);
return;
}
set_pgd(pgd, *pgd_k);
p4d = p4d_offset(pgd, addr);
p4d_k = p4d_offset(pgd_k, addr);
if (!p4d_present(*p4d_k)) {
no_context(regs, addr);
return;
}
pud = pud_offset(p4d, addr);
pud_k = pud_offset(p4d_k, addr);
if (!pud_present(*pud_k)) {
no_context(regs, addr);
return;
}
/*
* Since the vmalloc area is global, it is unnecessary
* to copy individual PTEs
*/
pmd = pmd_offset(pud, addr);
pmd_k = pmd_offset(pud_k, addr);
if (!pmd_present(*pmd_k)) {
no_context(regs, addr);
return;
}
set_pmd(pmd, *pmd_k);
/*
* Make sure the actual PTE exists as well to
* catch kernel vmalloc-area accesses to non-mapped
* addresses. If we don't do this, this will just
* silently loop forever.
*/
pte_k = pte_offset_kernel(pmd_k, addr);
if (!pte_present(*pte_k)) {
no_context(regs, addr);
return;
}
/*
* The kernel assumes that TLBs don't cache invalid
* entries, but in RISC-V, SFENCE.VMA specifies an
* ordering constraint, not a cache flush; it is
* necessary even after writing invalid entries.
*/
local_flush_tlb_page(addr);
}
static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
{
switch (cause) {
case EXC_INST_PAGE_FAULT:
if (!(vma->vm_flags & VM_EXEC)) {
return true;
}
break;
case EXC_LOAD_PAGE_FAULT:
if (!(vma->vm_flags & VM_READ)) {
return true;
}
break;
case EXC_STORE_PAGE_FAULT:
if (!(vma->vm_flags & VM_WRITE)) {
return true;
}
break;
default:
panic("%s: unhandled cause %lu", __func__, cause);
}
return false;
}
/*
* This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines.
*/
asmlinkage void do_page_fault(struct pt_regs *regs)
{
struct task_struct *tsk;
struct vm_area_struct *vma;
struct mm_struct *mm;
unsigned long addr, cause;
unsigned int flags = FAULT_FLAG_DEFAULT;
int code = SEGV_MAPERR;
vm_fault_t fault;
cause = regs->cause;
addr = regs->badaddr;
tsk = current;
mm = tsk->mm;
if (kprobe_page_fault(regs, cause))
return;
/*
* Fault-in kernel-space virtual memory on-demand.
* The 'reference' page table is init_mm.pgd.
*
* NOTE! We MUST NOT take any locks for this case. We may
* be in an interrupt or a critical region, and should
* only copy the information from the master page table,
* nothing more.
*/
if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) {
vmalloc_fault(regs, code, addr);
return;
}
#ifdef CONFIG_64BIT
/*
* Modules in 64bit kernels lie in their own virtual region which is not
* in the vmalloc region, but dealing with page faults in this region
* or the vmalloc region amounts to doing the same thing: checking that
* the mapping exists in init_mm.pgd and updating user page table, so
* just use vmalloc_fault.
*/
if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
vmalloc_fault(regs, code, addr);
return;
}
#endif
/* Enable interrupts if they were enabled in the parent context. */
if (likely(regs->status & SR_PIE))
local_irq_enable();
/*
* If we're in an interrupt, have no user context, or are running
* in an atomic region, then we must not take the fault.
*/
if (unlikely(faulthandler_disabled() || !mm)) {
tsk->thread.bad_cause = cause;
no_context(regs, addr);
return;
}
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (!user_mode(regs) && addr < TASK_SIZE &&
unlikely(!(regs->status & SR_SUM)))
die_kernel_fault("access to user memory without uaccess routines",
addr, regs);
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (cause == EXC_STORE_PAGE_FAULT)
flags |= FAULT_FLAG_WRITE;
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
retry:
mmap_read_lock(mm);
vma = find_vma(mm, addr);
if (unlikely(!vma)) {
tsk->thread.bad_cause = cause;
bad_area(regs, mm, code, addr);
return;
}
if (likely(vma->vm_start <= addr))
goto good_area;
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
tsk->thread.bad_cause = cause;
bad_area(regs, mm, code, addr);
return;
}
if (unlikely(expand_stack(vma, addr))) {
tsk->thread.bad_cause = cause;
bad_area(regs, mm, code, addr);
return;
}
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it.
*/
good_area:
code = SEGV_ACCERR;
if (unlikely(access_error(cause, vma))) {
tsk->thread.bad_cause = cause;
bad_area(regs, mm, code, addr);
return;
}
/*
* If for any reason at all we could not handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
fault = handle_mm_fault(vma, addr, flags, regs);
/*
* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because it
* would already be released in __lock_page_or_retry in mm/filemap.c.
*/
if (fault_signal_pending(fault, regs))
return;
if (unlikely(fault & VM_FAULT_RETRY)) {
flags |= FAULT_FLAG_TRIED;
/*
* No need to mmap_read_unlock(mm) as we would
* have already released it in __lock_page_or_retry
* in mm/filemap.c.
*/
goto retry;
}
mmap_read_unlock(mm);
if (unlikely(fault & VM_FAULT_ERROR)) {
tsk->thread.bad_cause = cause;
mm_fault_error(regs, addr, fault);
return;
}
return;
}
NOKPROBE_SYMBOL(do_page_fault);