From 4af4206be2bd1933cae20c2b6fb2058dbc887f7c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Apr 2014 20:58:54 +0200 Subject: [PATCH 1/4] tracing: Fix syscall_*regfunc() vs copy_process() race syscall_regfunc() and syscall_unregfunc() should set/clear TIF_SYSCALL_TRACEPOINT system-wide, but do_each_thread() can race with copy_process() and miss the new child which was not added to the process/thread lists yet. Change copy_process() to update the child's TIF_SYSCALL_TRACEPOINT under tasklist. Link: http://lkml.kernel.org/p/20140413185854.GB20668@redhat.com Cc: stable@vger.kernel.org # 2.6.33 Fixes: a871bd33a6c0 "tracing: Add syscall tracepoints" Acked-by: Frederic Weisbecker Acked-by: Paul E. McKenney Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- include/trace/syscall.h | 15 +++++++++++++++ kernel/fork.c | 2 ++ 2 files changed, 17 insertions(+) diff --git a/include/trace/syscall.h b/include/trace/syscall.h index fed853f3d7aa..9674145e2f6a 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -32,4 +33,18 @@ struct syscall_metadata { struct ftrace_event_call *exit_event; }; +#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS) +static inline void syscall_tracepoint_update(struct task_struct *p) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); + else + clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); +} +#else +static inline void syscall_tracepoint_update(struct task_struct *p) +{ +} +#endif + #endif /* _TRACE_SYSCALL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index d2799d1fc952..6a13c46cd87d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); + syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) From 8063e41d2ffc0b0ce974ea802158be35902072f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Apr 2014 20:59:18 +0200 Subject: [PATCH 2/4] tracing: Change syscall_*regfunc() to check PF_KTHREAD and use for_each_process_thread() 1. Remove _irqsafe from syscall_regfunc/syscall_unregfunc, read_lock(tasklist) doesn't need to disable irqs. 2. Change this code to avoid the deprecated do_each_thread() and use for_each_process_thread() (stolen from the patch from Frederic). 3. Change syscall_regfunc() to check PF_KTHREAD to skip the kernel threads, ->mm != NULL is the common mistake. Note: probably this check should be simply removed, needs another patch. [fweisbec@gmail.com: s/do_each_thread/for_each_process_thread/] Link: http://lkml.kernel.org/p/20140413185918.GC20668@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 33cbd8c203f8..9cf12640de5a 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -492,33 +492,31 @@ static int sys_tracepoint_refcount; void syscall_regfunc(void) { - unsigned long flags; - struct task_struct *g, *t; + struct task_struct *p, *t; if (!sys_tracepoint_refcount) { - read_lock_irqsave(&tasklist_lock, flags); - do_each_thread(g, t) { + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { /* Skip kernel threads. */ - if (t->mm) + if (!(t->flags & PF_KTHREAD)) set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); - } while_each_thread(g, t); - read_unlock_irqrestore(&tasklist_lock, flags); + } + read_unlock(&tasklist_lock); } sys_tracepoint_refcount++; } void syscall_unregfunc(void) { - unsigned long flags; - struct task_struct *g, *t; + struct task_struct *p, *t; sys_tracepoint_refcount--; if (!sys_tracepoint_refcount) { - read_lock_irqsave(&tasklist_lock, flags); - do_each_thread(g, t) { + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); - } while_each_thread(g, t); - read_unlock_irqrestore(&tasklist_lock, flags); + } + read_unlock(&tasklist_lock); } } #endif From ea73c79e33c45e1fa0071e216f06fd5682314490 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Apr 2014 20:59:38 +0200 Subject: [PATCH 3/4] tracing: syscall_regfunc() should not skip kernel threads syscall_regfunc() ignores the kernel threads because "it has no effect", see cc3b13c1 "Don't trace kernel thread syscalls" which added this check. However, this means that a user-space task spawned by call_usermodehelper() will run without TIF_SYSCALL_TRACEPOINT if sys_tracepoint_refcount != 0. Remove this check. The unnecessary report from ret_from_fork path mentioned by cc3b13c1 is no longer possible, see See commit fb45550d76bb5 "make sure that kernel_thread() callbacks call do_exit() themselves". A kernel_thread() callback can only return and take the int_ret_from_sys_call path after do_execve() succeeds, otherwise the kernel will crash. But in this case it is no longer a kernel thread and thus is needs TIF_SYSCALL_TRACEPOINT. Link: http://lkml.kernel.org/p/20140413185938.GD20668@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 9cf12640de5a..3490407dc7b7 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -497,9 +497,7 @@ void syscall_regfunc(void) if (!sys_tracepoint_refcount) { read_lock(&tasklist_lock); for_each_process_thread(p, t) { - /* Skip kernel threads. */ - if (!(t->flags & PF_KTHREAD)) - set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); + set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT); } read_unlock(&tasklist_lock); } From 4d4c9cc839a308be3289a361ccba4447ee140552 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Jun 2014 08:59:16 -0400 Subject: [PATCH 4/4] tracing: Add __field_struct macro for TRACE_EVENT() Currently the __field() macro in TRACE_EVENT is only good for primitive values, such as integers and pointers, but it fails on complex data types such as structures or unions. This is because the __field() macro determines if the variable is signed or not with the test of: (((type)(-1)) < (type)1) Unfortunately, that fails when type is a structure. Since trace events should support structures as fields a new macro is created for such a case called __field_struct() which acts exactly the same as __field() does but it does not do the signed type check and just uses a constant false for that answer. Cc: Tony Luck Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 33 ++++++++++++++++++++++ samples/trace_events/trace-events-sample.h | 3 +- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 0fd06fef9fac..26b4f2e13275 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -44,6 +44,12 @@ #undef __field_ext #define __field_ext(type, item, filter_type) type item; +#undef __field_struct +#define __field_struct(type, item) type item; + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) type item; + #undef __array #define __array(type, item, len) type item[len]; @@ -122,6 +128,12 @@ #undef __field_ext #define __field_ext(type, item, filter_type) +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -315,9 +327,21 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ if (ret) \ return ret; +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), \ + 0, filter_type); \ + if (ret) \ + return ret; + #undef __field #define __field(type, item) __field_ext(type, item, FILTER_OTHER) +#undef __field_struct +#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER) + #undef __array #define __array(type, item, len) \ do { \ @@ -379,6 +403,12 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __field_ext #define __field_ext(type, item, filter_type) +#undef __field_struct +#define __field_struct(type, item) + +#undef __field_struct_ext +#define __field_struct_ext(type, item, filter_type) + #undef __array #define __array(type, item, len) @@ -550,6 +580,9 @@ static inline notrace int ftrace_get_offsets_##call( \ #undef __field #define __field(type, item) +#undef __field_struct +#define __field_struct(type, item) + #undef __array #define __array(type, item, len) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 6af373236d73..4b0113f73ee9 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -56,7 +56,8 @@ * struct: This defines the way the data will be stored in the ring buffer. * There are currently two types of elements. __field and __array. * a __field is broken up into (type, name). Where type can be any - * type but an array. + * primitive type (integer, long or pointer). __field_struct() can + * be any static complex data value (struct, union, but not an array). * For an array. there are three fields. (type, name, size). The * type of elements in the array, the name of the field and the size * of the array.