mirror of
https://github.com/torvalds/linux.git
synced 2024-12-22 10:56:40 +00:00
e7ed9d9bd0
Uprobe is a tracing mechanism for userspace programs. Typical uprobe will incur overhead of two traps. First trap is caused by replaced trap insn, and the second trap is to execute the original displaced insn in user space. To reduce the overhead, kernel provides hooks for architectures to emulate the original insn and skip the second trap. In x86, emulation is done for certain branch insns. This patch extends the emulation to "push <reg>" insns. These insns are typical in the beginning of the function. For example, bcc in https://github.com/iovisor/bcc repo provides tools to measure funclantency, detect memleak, etc. The tools will place uprobes in the beginning of function and possibly uretprobes at the end of function. This patch is able to reduce the trap overhead for uprobe from 2 to 1. Without this patch, uretprobe will typically incur three traps. With this patch, if the function starts with "push" insn, the number of traps can be reduced from 3 to 2. An experiment was conducted on two local VMs, fedora 26 64-bit VM and 32-bit VM, both 4 processors and 4GB memory, booted with latest tip repo (and this patch). The host is MacBook with intel i7 processor. The test program looks like: #include <stdio.h> #include <stdlib.h> #include <time.h> #include <sys/time.h> static void test() __attribute__((noinline)); void test() {} int main() { struct timeval start, end; gettimeofday(&start, NULL); for (int i = 0; i < 1000000; i++) { test(); } gettimeofday(&end, NULL); printf("%ld\n", ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec))); return 0; } The program is compiled without optimization, and the first insn for function "test" is "push %rbp". The host is relatively idle. Before the test run, the uprobe is inserted as below for uprobe: echo 'p <binary>:<test_func_offset>' > /sys/kernel/debug/tracing/uprobe_events echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable and for uretprobe: echo 'r <binary>:<test_func_offset>' > /sys/kernel/debug/tracing/uprobe_events echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable Unit: microsecond(usec) per loop iteration x86_64 W/ this patch W/O this patch uprobe 1.55 3.1 uretprobe 2.0 3.6 x86_32 W/ this patch W/O this patch uprobe 1.41 3.5 uretprobe 1.75 4.0 You can see that this patch significantly reduced the overhead, 50% for uprobe and 44% for uretprobe on x86_64, and even more on x86_32. Signed-off-by: Yonghong Song <yhs@fb.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20171201001202.3706564-1-yhs@fb.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
72 lines
1.6 KiB
C
72 lines
1.6 KiB
C
#ifndef _ASM_UPROBES_H
|
|
#define _ASM_UPROBES_H
|
|
/*
|
|
* User-space Probes (UProbes) for x86
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
*
|
|
* Copyright (C) IBM Corporation, 2008-2011
|
|
* Authors:
|
|
* Srikar Dronamraju
|
|
* Jim Keniston
|
|
*/
|
|
|
|
#include <linux/notifier.h>
|
|
|
|
typedef u8 uprobe_opcode_t;
|
|
|
|
#define MAX_UINSN_BYTES 16
|
|
#define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */
|
|
|
|
#define UPROBE_SWBP_INSN 0xcc
|
|
#define UPROBE_SWBP_INSN_SIZE 1
|
|
|
|
struct uprobe_xol_ops;
|
|
|
|
struct arch_uprobe {
|
|
union {
|
|
u8 insn[MAX_UINSN_BYTES];
|
|
u8 ixol[MAX_UINSN_BYTES];
|
|
};
|
|
|
|
const struct uprobe_xol_ops *ops;
|
|
|
|
union {
|
|
struct {
|
|
s32 offs;
|
|
u8 ilen;
|
|
u8 opc1;
|
|
} branch;
|
|
struct {
|
|
u8 fixups;
|
|
u8 ilen;
|
|
} defparam;
|
|
struct {
|
|
u8 reg_offset; /* to the start of pt_regs */
|
|
u8 ilen;
|
|
} push;
|
|
};
|
|
};
|
|
|
|
struct arch_uprobe_task {
|
|
#ifdef CONFIG_X86_64
|
|
unsigned long saved_scratch_register;
|
|
#endif
|
|
unsigned int saved_trap_nr;
|
|
unsigned int saved_tf;
|
|
};
|
|
|
|
#endif /* _ASM_UPROBES_H */
|