forked from Minki/linux
15759cb054
read_user_stack_slow is called with interrupts soft disabled and it copies contents from the page which we find mapped to a specific address. To convert userspace address to pfn, the kernel now uses lockless page table walk. The kernel needs to make sure the pfn value read remains stable and is not released and reused for another process while the contents are read from the page. This can only be achieved by holding a page reference. One of the first approaches I tried was to check the pte value after the kernel copies the contents from the page. But as shown below we can still get it wrong CPU0 CPU1 pte = READ_ONCE(*ptep); pte_clear(pte); put_page(page); page = alloc_page(); memcpy(page_address(page), "secret password", nr); memcpy(buf, kaddr + offset, nb); put_page(page); handle_mm_fault() page = alloc_page(); set_pte(pte, page); if (pte_val(pte) != pte_val(*ptep)) Hence switch to __get_user_pages_fast. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20200505071729.54912-8-aneesh.kumar@linux.ibm.com
157 lines
4.0 KiB
C
157 lines
4.0 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Performance counter callchain support - powerpc architecture code
|
|
*
|
|
* Copyright © 2009 Paul Mackerras, IBM Corporation.
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/mm.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/sigcontext.h>
|
|
#include <asm/ucontext.h>
|
|
#include <asm/vdso.h>
|
|
#include <asm/pte-walk.h>
|
|
|
|
#include "callchain.h"
|
|
|
|
/*
|
|
* On 64-bit we don't want to invoke hash_page on user addresses from
|
|
* interrupt context, so if the access faults, we read the page tables
|
|
* to find which page (if any) is mapped and access it directly.
|
|
*/
|
|
int read_user_stack_slow(void __user *ptr, void *buf, int nb)
|
|
{
|
|
|
|
unsigned long addr = (unsigned long) ptr;
|
|
unsigned long offset;
|
|
struct page *page;
|
|
int nrpages;
|
|
void *kaddr;
|
|
|
|
nrpages = __get_user_pages_fast(addr, 1, 1, &page);
|
|
if (nrpages == 1) {
|
|
kaddr = page_address(page);
|
|
|
|
/* align address to page boundary */
|
|
offset = addr & ~PAGE_MASK;
|
|
|
|
memcpy(buf, kaddr + offset, nb);
|
|
put_page(page);
|
|
return 0;
|
|
}
|
|
return -EFAULT;
|
|
}
|
|
|
|
static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
|
|
{
|
|
if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
|
|
((unsigned long)ptr & 7))
|
|
return -EFAULT;
|
|
|
|
if (!probe_user_read(ret, ptr, sizeof(*ret)))
|
|
return 0;
|
|
|
|
return read_user_stack_slow(ptr, ret, 8);
|
|
}
|
|
|
|
/*
|
|
* 64-bit user processes use the same stack frame for RT and non-RT signals.
|
|
*/
|
|
struct signal_frame_64 {
|
|
char dummy[__SIGNAL_FRAMESIZE];
|
|
struct ucontext uc;
|
|
unsigned long unused[2];
|
|
unsigned int tramp[6];
|
|
struct siginfo *pinfo;
|
|
void *puc;
|
|
struct siginfo info;
|
|
char abigap[288];
|
|
};
|
|
|
|
static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
|
|
{
|
|
if (nip == fp + offsetof(struct signal_frame_64, tramp))
|
|
return 1;
|
|
if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
|
|
nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Do some sanity checking on the signal frame pointed to by sp.
|
|
* We check the pinfo and puc pointers in the frame.
|
|
*/
|
|
static int sane_signal_64_frame(unsigned long sp)
|
|
{
|
|
struct signal_frame_64 __user *sf;
|
|
unsigned long pinfo, puc;
|
|
|
|
sf = (struct signal_frame_64 __user *) sp;
|
|
if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
|
|
read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
|
|
return 0;
|
|
return pinfo == (unsigned long) &sf->info &&
|
|
puc == (unsigned long) &sf->uc;
|
|
}
|
|
|
|
void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
|
|
struct pt_regs *regs)
|
|
{
|
|
unsigned long sp, next_sp;
|
|
unsigned long next_ip;
|
|
unsigned long lr;
|
|
long level = 0;
|
|
struct signal_frame_64 __user *sigframe;
|
|
unsigned long __user *fp, *uregs;
|
|
|
|
next_ip = perf_instruction_pointer(regs);
|
|
lr = regs->link;
|
|
sp = regs->gpr[1];
|
|
perf_callchain_store(entry, next_ip);
|
|
|
|
while (entry->nr < entry->max_stack) {
|
|
fp = (unsigned long __user *) sp;
|
|
if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
|
|
return;
|
|
if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
|
|
return;
|
|
|
|
/*
|
|
* Note: the next_sp - sp >= signal frame size check
|
|
* is true when next_sp < sp, which can happen when
|
|
* transitioning from an alternate signal stack to the
|
|
* normal stack.
|
|
*/
|
|
if (next_sp - sp >= sizeof(struct signal_frame_64) &&
|
|
(is_sigreturn_64_address(next_ip, sp) ||
|
|
(level <= 1 && is_sigreturn_64_address(lr, sp))) &&
|
|
sane_signal_64_frame(sp)) {
|
|
/*
|
|
* This looks like an signal frame
|
|
*/
|
|
sigframe = (struct signal_frame_64 __user *) sp;
|
|
uregs = sigframe->uc.uc_mcontext.gp_regs;
|
|
if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
|
|
read_user_stack_64(&uregs[PT_LNK], &lr) ||
|
|
read_user_stack_64(&uregs[PT_R1], &sp))
|
|
return;
|
|
level = 0;
|
|
perf_callchain_store_context(entry, PERF_CONTEXT_USER);
|
|
perf_callchain_store(entry, next_ip);
|
|
continue;
|
|
}
|
|
|
|
if (level == 0)
|
|
next_ip = lr;
|
|
perf_callchain_store(entry, next_ip);
|
|
++level;
|
|
sp = next_sp;
|
|
}
|
|
}
|