654443e20d
Pull user-space probe instrumentation from Ingo Molnar: "The uprobes code originates from SystemTap and has been used for years in Fedora and RHEL kernels. This version is much rewritten, reviews from PeterZ, Oleg and myself shaped the end result. This tree includes uprobes support in 'perf probe' - but SystemTap (and other tools) can take advantage of user probe points as well. Sample usage of uprobes via perf, for example to profile malloc() calls without modifying user-space binaries. First boot a new kernel with CONFIG_UPROBE_EVENT=y enabled. If you don't know which function you want to probe you can pick one from 'perf top' or can get a list all functions that can be probed within libc (binaries can be specified as well): $ perf probe -F -x /lib/libc.so.6 To probe libc's malloc(): $ perf probe -x /lib64/libc.so.6 malloc Added new event: probe_libc:malloc (on 0x7eac0) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 Make use of it to create a call graph (as the flat profile is going to look very boring): $ perf record -e probe_libc:malloc -gR make [ perf record: Woken up 173 times to write data ] [ perf record: Captured and wrote 44.190 MB perf.data (~1930712 $ perf report | less 32.03% git libc-2.15.so [.] malloc | --- malloc 29.49% cc1 libc-2.15.so [.] malloc | --- malloc | |--0.95%-- 0x208eb1000000000 | |--0.63%-- htab_traverse_noresize 11.04% as libc-2.15.so [.] malloc | --- malloc | 7.15% ld libc-2.15.so [.] malloc | --- malloc | 5.07% sh libc-2.15.so [.] malloc | --- malloc | 4.99% python-config libc-2.15.so [.] malloc | --- malloc | 4.54% make libc-2.15.so [.] malloc | --- malloc | |--7.34%-- glob | | | |--93.18%-- 0x41588f | | | --6.82%-- glob | 0x41588f ... Or: $ perf report -g flat | less # Overhead Command Shared Object Symbol # ........ ............. ............. .......... # 32.03% git libc-2.15.so [.] malloc 27.19% malloc 29.49% cc1 libc-2.15.so [.] malloc 24.77% malloc 11.04% as libc-2.15.so [.] malloc 11.02% malloc 7.15% ld libc-2.15.so [.] malloc 6.57% malloc ... The core uprobes design is fairly straightforward: uprobes probe points register themselves at (inode:offset) addresses of libraries/binaries, after which all existing (or new) vmas that map that address will have a software breakpoint injected at that address. vmas are COW-ed to preserve original content. The probe points are kept in an rbtree. If user-space executes the probed inode:offset instruction address then an event is generated which can be recovered from the regular perf event channels and mmap-ed ring-buffer. Multiple probes at the same address are supported, they create a dynamic callback list of event consumers. The basic model is further complicated by the XOL speedup: the original instruction that is probed is copied (in an architecture specific fashion) and executed out of line when the probe triggers. The XOL area is a single vma per process, with a fixed number of entries (which limits probe execution parallelism). The API: uprobes are installed/removed via /sys/kernel/debug/tracing/uprobe_events, the API is integrated to align with the kprobes interface as much as possible, but is separate to it. Injecting a probe point is privileged operation, which can be relaxed by setting perf_paranoid to -1. You can use multiple probes as well and mix them with kprobes and regular PMU events or tracepoints, when instrumenting a task." Fix up trivial conflicts in mm/memory.c due to previous cleanup of unmap_single_vma(). * 'perf-uprobes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) perf probe: Detect probe target when m/x options are absent perf probe: Provide perf interface for uprobes tracing: Fix kconfig warning due to a typo tracing: Provide trace events interface for uprobes tracing: Extract out common code for kprobes/uprobes trace events tracing: Modify is_delete, is_return from int to bool uprobes/core: Decrement uprobe count before the pages are unmapped uprobes/core: Make background page replacement logic account for rss_stat counters uprobes/core: Optimize probe hits with the help of a counter uprobes/core: Allocate XOL slots for uprobes use uprobes/core: Handle breakpoint and singlestep exceptions uprobes/core: Rename bkpt to swbp uprobes/core: Make order of function parameters consistent across functions uprobes/core: Make macro names consistent uprobes: Update copyright notices uprobes/core: Move insn to arch specific structure uprobes/core: Remove uprobe_opcode_sz uprobes/core: Make instruction tables volatile uprobes: Move to kernel/events/ uprobes/core: Clean up, refactor and improve the code ...
273 lines
8.4 KiB
C
273 lines
8.4 KiB
C
/* thread_info.h: low-level thread information
|
|
*
|
|
* Copyright (C) 2002 David Howells (dhowells@redhat.com)
|
|
* - Incorporating suggestions made by Linus Torvalds and Dave Miller
|
|
*/
|
|
|
|
#ifndef _ASM_X86_THREAD_INFO_H
|
|
#define _ASM_X86_THREAD_INFO_H
|
|
|
|
#include <linux/compiler.h>
|
|
#include <asm/page.h>
|
|
#include <asm/types.h>
|
|
|
|
/*
|
|
* low level task data that entry.S needs immediate access to
|
|
* - this struct should fit entirely inside of one cache line
|
|
* - this struct shares the supervisor stack pages
|
|
*/
|
|
#ifndef __ASSEMBLY__
|
|
struct task_struct;
|
|
struct exec_domain;
|
|
#include <asm/processor.h>
|
|
#include <asm/ftrace.h>
|
|
#include <linux/atomic.h>
|
|
|
|
struct thread_info {
|
|
struct task_struct *task; /* main task structure */
|
|
struct exec_domain *exec_domain; /* execution domain */
|
|
__u32 flags; /* low level flags */
|
|
__u32 status; /* thread synchronous flags */
|
|
__u32 cpu; /* current CPU */
|
|
int preempt_count; /* 0 => preemptable,
|
|
<0 => BUG */
|
|
mm_segment_t addr_limit;
|
|
struct restart_block restart_block;
|
|
void __user *sysenter_return;
|
|
#ifdef CONFIG_X86_32
|
|
unsigned long previous_esp; /* ESP of the previous stack in
|
|
case of nested (IRQ) stacks
|
|
*/
|
|
__u8 supervisor_stack[0];
|
|
#endif
|
|
unsigned int sig_on_uaccess_error:1;
|
|
unsigned int uaccess_err:1; /* uaccess failed */
|
|
};
|
|
|
|
#define INIT_THREAD_INFO(tsk) \
|
|
{ \
|
|
.task = &tsk, \
|
|
.exec_domain = &default_exec_domain, \
|
|
.flags = 0, \
|
|
.cpu = 0, \
|
|
.preempt_count = INIT_PREEMPT_COUNT, \
|
|
.addr_limit = KERNEL_DS, \
|
|
.restart_block = { \
|
|
.fn = do_no_restart_syscall, \
|
|
}, \
|
|
}
|
|
|
|
#define init_thread_info (init_thread_union.thread_info)
|
|
#define init_stack (init_thread_union.stack)
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
#include <asm/asm-offsets.h>
|
|
|
|
#endif
|
|
|
|
/*
|
|
* thread information flags
|
|
* - these are process state flags that various assembly files
|
|
* may need to access
|
|
* - pending work-to-be-done flags are in LSW
|
|
* - other flags in MSW
|
|
* Warning: layout of LSW is hardcoded in entry.S
|
|
*/
|
|
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
|
|
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
|
|
#define TIF_SIGPENDING 2 /* signal pending */
|
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
|
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
|
#define TIF_IRET 5 /* force IRET */
|
|
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
|
|
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
|
|
#define TIF_SECCOMP 8 /* secure computing */
|
|
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
|
|
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
|
|
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
|
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
|
#define TIF_IA32 17 /* IA32 compatibility process */
|
|
#define TIF_FORK 18 /* ret_from_fork */
|
|
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
|
#define TIF_DEBUG 21 /* uses debug registers */
|
|
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
|
|
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
|
|
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
|
|
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
|
|
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
|
|
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
|
|
#define TIF_X32 30 /* 32-bit native x86-64 binary */
|
|
|
|
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
|
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
|
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
|
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
|
|
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
|
#define _TIF_IRET (1 << TIF_IRET)
|
|
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
|
|
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
|
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
|
|
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
|
|
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
|
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
|
#define _TIF_IA32 (1 << TIF_IA32)
|
|
#define _TIF_FORK (1 << TIF_FORK)
|
|
#define _TIF_DEBUG (1 << TIF_DEBUG)
|
|
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
|
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
|
|
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
|
|
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
|
|
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
|
|
#define _TIF_ADDR32 (1 << TIF_ADDR32)
|
|
#define _TIF_X32 (1 << TIF_X32)
|
|
|
|
/* work to do in syscall_trace_enter() */
|
|
#define _TIF_WORK_SYSCALL_ENTRY \
|
|
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
|
|
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
|
|
|
|
/* work to do in syscall_trace_leave() */
|
|
#define _TIF_WORK_SYSCALL_EXIT \
|
|
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
|
|
_TIF_SYSCALL_TRACEPOINT)
|
|
|
|
/* work to do on interrupt/exception return */
|
|
#define _TIF_WORK_MASK \
|
|
(0x0000FFFF & \
|
|
~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \
|
|
_TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
|
|
|
|
/* work to do on any return to user space */
|
|
#define _TIF_ALLWORK_MASK \
|
|
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT)
|
|
|
|
/* Only used for 64 bit */
|
|
#define _TIF_DO_NOTIFY_MASK \
|
|
(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
|
|
_TIF_USER_RETURN_NOTIFY)
|
|
|
|
/* flags to check in __switch_to() */
|
|
#define _TIF_WORK_CTXSW \
|
|
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
|
|
|
|
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
|
|
|
|
#define PREEMPT_ACTIVE 0x10000000
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
#define STACK_WARN (THREAD_SIZE/8)
|
|
/*
|
|
* macros/functions for gaining access to the thread information structure
|
|
*
|
|
* preempt_count needs to be 1 initially, until the scheduler is functional.
|
|
*/
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
/* how to get the current stack pointer from C */
|
|
register unsigned long current_stack_pointer asm("esp") __used;
|
|
|
|
/* how to get the thread information struct from C */
|
|
static inline struct thread_info *current_thread_info(void)
|
|
{
|
|
return (struct thread_info *)
|
|
(current_stack_pointer & ~(THREAD_SIZE - 1));
|
|
}
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
/* how to get the thread information struct from ASM */
|
|
#define GET_THREAD_INFO(reg) \
|
|
movl $-THREAD_SIZE, reg; \
|
|
andl %esp, reg
|
|
|
|
/* use this one if reg already contains %esp */
|
|
#define GET_THREAD_INFO_WITH_ESP(reg) \
|
|
andl $-THREAD_SIZE, reg
|
|
|
|
#endif
|
|
|
|
#else /* X86_32 */
|
|
|
|
#include <asm/percpu.h>
|
|
#define KERNEL_STACK_OFFSET (5*8)
|
|
|
|
/*
|
|
* macros/functions for gaining access to the thread information structure
|
|
* preempt_count needs to be 1 initially, until the scheduler is functional.
|
|
*/
|
|
#ifndef __ASSEMBLY__
|
|
DECLARE_PER_CPU(unsigned long, kernel_stack);
|
|
|
|
static inline struct thread_info *current_thread_info(void)
|
|
{
|
|
struct thread_info *ti;
|
|
ti = (void *)(this_cpu_read_stable(kernel_stack) +
|
|
KERNEL_STACK_OFFSET - THREAD_SIZE);
|
|
return ti;
|
|
}
|
|
|
|
#else /* !__ASSEMBLY__ */
|
|
|
|
/* how to get the thread information struct from ASM */
|
|
#define GET_THREAD_INFO(reg) \
|
|
movq PER_CPU_VAR(kernel_stack),reg ; \
|
|
subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
|
|
|
|
/*
|
|
* Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
|
|
* a certain register (to be used in assembler memory operands).
|
|
*/
|
|
#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
|
|
|
|
#endif
|
|
|
|
#endif /* !X86_32 */
|
|
|
|
/*
|
|
* Thread-synchronous status.
|
|
*
|
|
* This is different from the flags in that nobody else
|
|
* ever touches our thread-synchronous status, so we don't
|
|
* have to worry about atomic accesses.
|
|
*/
|
|
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
|
|
#define TS_POLLING 0x0004 /* idle task polling need_resched,
|
|
skip sending interrupt */
|
|
#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
|
|
|
|
#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#define HAVE_SET_RESTORE_SIGMASK 1
|
|
static inline void set_restore_sigmask(void)
|
|
{
|
|
struct thread_info *ti = current_thread_info();
|
|
ti->status |= TS_RESTORE_SIGMASK;
|
|
set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
|
|
}
|
|
|
|
static inline bool is_ia32_task(void)
|
|
{
|
|
#ifdef CONFIG_X86_32
|
|
return true;
|
|
#endif
|
|
#ifdef CONFIG_IA32_EMULATION
|
|
if (current_thread_info()->status & TS_COMPAT)
|
|
return true;
|
|
#endif
|
|
return false;
|
|
}
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#ifndef __ASSEMBLY__
|
|
extern void arch_task_cache_init(void);
|
|
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
|
|
extern void arch_release_task_struct(struct task_struct *tsk);
|
|
#endif
|
|
#endif /* _ASM_X86_THREAD_INFO_H */
|