forked from Minki/linux
759496ba64
Unlike global OOM handling, memory cgroup code will invoke the OOM killer in any OOM situation because it has no way of telling faults occuring in kernel context - which could be handled more gracefully - from user-triggered faults. Pass a flag that identifies faults originating in user space from the architecture-specific fault handlers to generic code so that memcg OOM handling can be improved. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: azurIt <azurit@pobox.sk> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
259 lines
6.2 KiB
C
259 lines
6.2 KiB
C
/*
|
|
* linux/arch/alpha/mm/fault.c
|
|
*
|
|
* Copyright (C) 1995 Linus Torvalds
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <asm/io.h>
|
|
|
|
#define __EXTERN_INLINE inline
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/tlbflush.h>
|
|
#undef __EXTERN_INLINE
|
|
|
|
#include <linux/signal.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/string.h>
|
|
#include <linux/types.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/module.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
|
|
extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
|
|
|
|
|
|
/*
|
|
* Force a new ASN for a task.
|
|
*/
|
|
|
|
#ifndef CONFIG_SMP
|
|
unsigned long last_asn = ASN_FIRST_VERSION;
|
|
#endif
|
|
|
|
void
|
|
__load_new_mm_context(struct mm_struct *next_mm)
|
|
{
|
|
unsigned long mmc;
|
|
struct pcb_struct *pcb;
|
|
|
|
mmc = __get_new_mm_context(next_mm, smp_processor_id());
|
|
next_mm->context[smp_processor_id()] = mmc;
|
|
|
|
pcb = ¤t_thread_info()->pcb;
|
|
pcb->asn = mmc & HARDWARE_ASN_MASK;
|
|
pcb->ptbr = ((unsigned long) next_mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
|
|
|
|
__reload_thread(pcb);
|
|
}
|
|
|
|
|
|
/*
|
|
* This routine handles page faults. It determines the address,
|
|
* and the problem, and then passes it off to handle_mm_fault().
|
|
*
|
|
* mmcsr:
|
|
* 0 = translation not valid
|
|
* 1 = access violation
|
|
* 2 = fault-on-read
|
|
* 3 = fault-on-execute
|
|
* 4 = fault-on-write
|
|
*
|
|
* cause:
|
|
* -1 = instruction fetch
|
|
* 0 = load
|
|
* 1 = store
|
|
*
|
|
* Registers $9 through $15 are saved in a block just prior to `regs' and
|
|
* are saved and restored around the call to allow exception code to
|
|
* modify them.
|
|
*/
|
|
|
|
/* Macro for exception fixup code to access integer registers. */
|
|
#define dpf_reg(r) \
|
|
(((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \
|
|
(r) <= 18 ? (r)+8 : (r)-10])
|
|
|
|
asmlinkage void
|
|
do_page_fault(unsigned long address, unsigned long mmcsr,
|
|
long cause, struct pt_regs *regs)
|
|
{
|
|
struct vm_area_struct * vma;
|
|
struct mm_struct *mm = current->mm;
|
|
const struct exception_table_entry *fixup;
|
|
int fault, si_code = SEGV_MAPERR;
|
|
siginfo_t info;
|
|
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
|
|
|
/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
|
|
(or is suppressed by the PALcode). Support that for older CPUs
|
|
by ignoring such an instruction. */
|
|
if (cause == 0) {
|
|
unsigned int insn;
|
|
__get_user(insn, (unsigned int __user *)regs->pc);
|
|
if ((insn >> 21 & 0x1f) == 0x1f &&
|
|
/* ldq ldl ldt lds ldg ldf ldwu ldbu */
|
|
(1ul << (insn >> 26) & 0x30f00001400ul)) {
|
|
regs->pc += 4;
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* If we're in an interrupt context, or have no user context,
|
|
we must not take the fault. */
|
|
if (!mm || in_atomic())
|
|
goto no_context;
|
|
|
|
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
|
|
if (address >= TASK_SIZE)
|
|
goto vmalloc_fault;
|
|
#endif
|
|
if (user_mode(regs))
|
|
flags |= FAULT_FLAG_USER;
|
|
retry:
|
|
down_read(&mm->mmap_sem);
|
|
vma = find_vma(mm, address);
|
|
if (!vma)
|
|
goto bad_area;
|
|
if (vma->vm_start <= address)
|
|
goto good_area;
|
|
if (!(vma->vm_flags & VM_GROWSDOWN))
|
|
goto bad_area;
|
|
if (expand_stack(vma, address))
|
|
goto bad_area;
|
|
|
|
/* Ok, we have a good vm_area for this memory access, so
|
|
we can handle it. */
|
|
good_area:
|
|
si_code = SEGV_ACCERR;
|
|
if (cause < 0) {
|
|
if (!(vma->vm_flags & VM_EXEC))
|
|
goto bad_area;
|
|
} else if (!cause) {
|
|
/* Allow reads even for write-only mappings */
|
|
if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
|
|
goto bad_area;
|
|
} else {
|
|
if (!(vma->vm_flags & VM_WRITE))
|
|
goto bad_area;
|
|
flags |= FAULT_FLAG_WRITE;
|
|
}
|
|
|
|
/* If for any reason at all we couldn't handle the fault,
|
|
make sure we exit gracefully rather than endlessly redo
|
|
the fault. */
|
|
fault = handle_mm_fault(mm, vma, address, flags);
|
|
|
|
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
|
return;
|
|
|
|
if (unlikely(fault & VM_FAULT_ERROR)) {
|
|
if (fault & VM_FAULT_OOM)
|
|
goto out_of_memory;
|
|
else if (fault & VM_FAULT_SIGBUS)
|
|
goto do_sigbus;
|
|
BUG();
|
|
}
|
|
|
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
|
if (fault & VM_FAULT_MAJOR)
|
|
current->maj_flt++;
|
|
else
|
|
current->min_flt++;
|
|
if (fault & VM_FAULT_RETRY) {
|
|
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
|
|
|
/* No need to up_read(&mm->mmap_sem) as we would
|
|
* have already released it in __lock_page_or_retry
|
|
* in mm/filemap.c.
|
|
*/
|
|
|
|
goto retry;
|
|
}
|
|
}
|
|
|
|
up_read(&mm->mmap_sem);
|
|
|
|
return;
|
|
|
|
/* Something tried to access memory that isn't in our memory map.
|
|
Fix it, but check if it's kernel or user first. */
|
|
bad_area:
|
|
up_read(&mm->mmap_sem);
|
|
|
|
if (user_mode(regs))
|
|
goto do_sigsegv;
|
|
|
|
no_context:
|
|
/* Are we prepared to handle this fault as an exception? */
|
|
if ((fixup = search_exception_tables(regs->pc)) != 0) {
|
|
unsigned long newpc;
|
|
newpc = fixup_exception(dpf_reg, fixup, regs->pc);
|
|
regs->pc = newpc;
|
|
return;
|
|
}
|
|
|
|
/* Oops. The kernel tried to access some bad page. We'll have to
|
|
terminate things with extreme prejudice. */
|
|
printk(KERN_ALERT "Unable to handle kernel paging request at "
|
|
"virtual address %016lx\n", address);
|
|
die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16);
|
|
do_exit(SIGKILL);
|
|
|
|
/* We ran out of memory, or some other thing happened to us that
|
|
made us unable to handle the page fault gracefully. */
|
|
out_of_memory:
|
|
up_read(&mm->mmap_sem);
|
|
if (!user_mode(regs))
|
|
goto no_context;
|
|
pagefault_out_of_memory();
|
|
return;
|
|
|
|
do_sigbus:
|
|
up_read(&mm->mmap_sem);
|
|
/* Send a sigbus, regardless of whether we were in kernel
|
|
or user mode. */
|
|
info.si_signo = SIGBUS;
|
|
info.si_errno = 0;
|
|
info.si_code = BUS_ADRERR;
|
|
info.si_addr = (void __user *) address;
|
|
force_sig_info(SIGBUS, &info, current);
|
|
if (!user_mode(regs))
|
|
goto no_context;
|
|
return;
|
|
|
|
do_sigsegv:
|
|
info.si_signo = SIGSEGV;
|
|
info.si_errno = 0;
|
|
info.si_code = si_code;
|
|
info.si_addr = (void __user *) address;
|
|
force_sig_info(SIGSEGV, &info, current);
|
|
return;
|
|
|
|
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
|
|
vmalloc_fault:
|
|
if (user_mode(regs))
|
|
goto do_sigsegv;
|
|
else {
|
|
/* Synchronize this task's top level page-table
|
|
with the "reference" page table from init. */
|
|
long index = pgd_index(address);
|
|
pgd_t *pgd, *pgd_k;
|
|
|
|
pgd = current->active_mm->pgd + index;
|
|
pgd_k = swapper_pg_dir + index;
|
|
if (!pgd_present(*pgd) && pgd_present(*pgd_k)) {
|
|
pgd_val(*pgd) = pgd_val(*pgd_k);
|
|
return;
|
|
}
|
|
goto no_context;
|
|
}
|
|
#endif
|
|
}
|