From 6496bb72bf20c1c7e4d6be44dfa663163e709116 Mon Sep 17 00:00:00 2001
From: Kenny Yu <kennyyu@fb.com>
Date: Fri, 13 Jan 2017 08:58:34 -0800
Subject: [PATCH 01/25] uprobe: Find last occurrence of ':' when parsing uprobe
 PATH:OFFSET

Previously, `create_trace_uprobe` found the *first* occurence
of the ':' character when parsing `PATH:OFFSET` for a uprobe.
However, if the path contains a ':' character, then the function
would parse the path incorrectly. Even worse, if the path does not
exist, the subsequent call to `kern_path()` would set `ret` to
`ENOENT`, leading to very cryptic errno values in user space.

The fix is to find the *last* occurence of ':'.

How to repro:: The write fails with "No such file or directory", suggesting
incorrectly that the `uprobe_events` file does not exist.

  $ mkdir testing && cd testing
  $ cp /bin/bash .
  $ cp /bin/bash ./bash:with:colon
  $ echo "p:uprobes/p__root_testing_bash_0x6 /root/testing/bash:0x6" > /sys/kernel/debug/tracing/uprobe_events     # this works
  $ echo "p:uprobes/p__root_testing_bash_with_colon_0x6 /root/testing/bash:with:colon:0x6" >> /sys/kernel/debug/tracing/uprobe_events     # this doesn't
  -bash: echo: write error: No such file or directory

With the patch:

  $ echo "p:uprobes/p__root_testing_bash_0x6 /root/testing/bash:0x6" > /sys/kernel/debug/tracing/uprobe_events     # this still works
  $ echo "p:uprobes/p__root_testing_bash_with_colon_0x6 /root/testing/bash:with:colon:0x6" >> /sys/kernel/debug/tracing/uprobe_events     # this works now too!
  $ cat /sys/kernel/debug/tracing/uprobe_events
  p:uprobes/p__root_testing_bash_0x6 /root/testing/bash:0x0000000000000006
  p:uprobes/p__root_testing_bash_with_colon_0x6 /root/testing/bash:with:colon:0x0000000000000006

Link: http://lkml.kernel.org/r/20170113165834.4081016-1-kennyyu@fb.com

Signed-off-by: Kenny Yu <kennyyu@fb.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_uprobe.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 0913693caf6e..4f2ba2bb11e0 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -431,7 +431,8 @@ static int create_trace_uprobe(int argc, char **argv)
 		pr_info("Probe point is not specified.\n");
 		return -EINVAL;
 	}
-	arg = strchr(argv[1], ':');
+	/* Find the last occurrence, in case the path contains ':' too. */
+	arg = strrchr(argv[1], ':');
 	if (!arg) {
 		ret = -EINVAL;
 		goto fail_address_parse;

From d45ae1f7041ac52ade6c5ec76d96bbed765d67aa Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 17 Jan 2017 12:29:35 -0500
Subject: [PATCH 02/25] tracing: Process constants for (un)likely() profiler

When running the likely/unlikely profiler, one of the results did not look
accurate. It noted that the unlikely() in link_path_walk() was 100%
incorrect. When I added a trace_printk() to see what was happening there, it
became 80% correct! Looking deeper into what whas happening, I found that
gcc split that if statement into two paths. One where the if statement
became a constant, the other path a variable. The other path had the if
statement always hit (making the unlikely there, always false), but since
the #define unlikely() has:

  #define unlikely() (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))

Where constants are ignored by the branch profiler, the "constant" path
made by the compiler was ignored, even though it was hit 80% of the time.

By just passing the constant value to the __branch_check__() function and
tracing it out of line (as always correct, as likely/unlikely isn't a factor
for constants), then we get back the accurate readings of branches that were
optimized by gcc causing part of the execution to become constant.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/compiler.h    | 14 ++++++++------
 kernel/trace/trace_branch.c |  6 +++++-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index cf0fa5d86059..bbbe1570de1c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -107,12 +107,13 @@ struct ftrace_branch_data {
  */
 #if defined(CONFIG_TRACE_BRANCH_PROFILING) \
     && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
-void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
+void ftrace_likely_update(struct ftrace_branch_data *f, int val,
+			  int expect, int is_constant);
 
 #define likely_notrace(x)	__builtin_expect(!!(x), 1)
 #define unlikely_notrace(x)	__builtin_expect(!!(x), 0)
 
-#define __branch_check__(x, expect) ({					\
+#define __branch_check__(x, expect, is_constant) ({			\
 			int ______r;					\
 			static struct ftrace_branch_data		\
 				__attribute__((__aligned__(4)))		\
@@ -122,8 +123,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 				.file = __FILE__,			\
 				.line = __LINE__,			\
 			};						\
-			______r = likely_notrace(x);			\
-			ftrace_likely_update(&______f, ______r, expect); \
+			______r = __builtin_expect(!!(x), expect);	\
+			ftrace_likely_update(&______f, ______r,		\
+					     expect, is_constant);	\
 			______r;					\
 		})
 
@@ -133,10 +135,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
  * written by Daniel Walker.
  */
 # ifndef likely
-#  define likely(x)	(__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1))
+#  define likely(x)	(__branch_check__(x, 1, __builtin_constant_p(x)))
 # endif
 # ifndef unlikely
-#  define unlikely(x)	(__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0))
+#  define unlikely(x)	(__branch_check__(x, 0, __builtin_constant_p(x)))
 # endif
 
 #ifdef CONFIG_PROFILE_ALL_BRANCHES
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 75489de546b6..7afe426ea528 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -200,8 +200,12 @@ void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
-void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
+void ftrace_likely_update(struct ftrace_branch_data *f, int val,
+			  int expect, int is_constant)
 {
+	/* A constant is always correct */
+	if (is_constant)
+		val = expect;
 	/*
 	 * I would love to have a trace point here instead, but the
 	 * trace point code is so inundated with unlikely and likely

From 134e6a034cb004ed5acd3048792de70ced1c6cf5 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 19 Jan 2017 08:57:14 -0500
Subject: [PATCH 03/25] tracing: Show number of constants profiled in likely
 profiler

Now that constants are traced, it is useful to see the number of constants
that are traced in the likely/unlikely profiler in order to know if they
should be ignored or not.

The likely/unlikely will display a number after the "correct" number if a
"constant" count exists.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/compiler.h    | 15 +++++---
 kernel/trace/trace_branch.c | 68 ++++++++++++++++++++++++++++++-------
 2 files changed, 65 insertions(+), 18 deletions(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index bbbe1570de1c..a73cc9afa784 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -101,13 +101,18 @@ struct ftrace_branch_data {
 	};
 };
 
+struct ftrace_likely_data {
+	struct ftrace_branch_data	data;
+	unsigned long			constant;
+};
+
 /*
  * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
  * to disable branch tracing on a per file basis.
  */
 #if defined(CONFIG_TRACE_BRANCH_PROFILING) \
     && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
-void ftrace_likely_update(struct ftrace_branch_data *f, int val,
+void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 			  int expect, int is_constant);
 
 #define likely_notrace(x)	__builtin_expect(!!(x), 1)
@@ -115,13 +120,13 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val,
 
 #define __branch_check__(x, expect, is_constant) ({			\
 			int ______r;					\
-			static struct ftrace_branch_data		\
+			static struct ftrace_likely_data		\
 				__attribute__((__aligned__(4)))		\
 				__attribute__((section("_ftrace_annotated_branch"))) \
 				______f = {				\
-				.func = __func__,			\
-				.file = __FILE__,			\
-				.line = __LINE__,			\
+				.data.func = __func__,			\
+				.data.file = __FILE__,			\
+				.data.line = __LINE__,			\
 			};						\
 			______r = __builtin_expect(!!(x), expect);	\
 			ftrace_likely_update(&______f, ______r,		\
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 7afe426ea528..fd483d74f8e1 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -200,25 +200,27 @@ void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
-void ftrace_likely_update(struct ftrace_branch_data *f, int val,
+void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 			  int expect, int is_constant)
 {
 	/* A constant is always correct */
-	if (is_constant)
+	if (is_constant) {
+		f->constant++;
 		val = expect;
+	}
 	/*
 	 * I would love to have a trace point here instead, but the
 	 * trace point code is so inundated with unlikely and likely
 	 * conditions that the recursive nightmare that exists is too
 	 * much to try to get working. At least for now.
 	 */
-	trace_likely_condition(f, val, expect);
+	trace_likely_condition(&f->data, val, expect);
 
 	/* FIXME: Make this atomic! */
 	if (val == expect)
-		f->correct++;
+		f->data.correct++;
 	else
-		f->incorrect++;
+		f->data.incorrect++;
 }
 EXPORT_SYMBOL(ftrace_likely_update);
 
@@ -249,29 +251,60 @@ static inline long get_incorrect_percent(struct ftrace_branch_data *p)
 	return percent;
 }
 
-static int branch_stat_show(struct seq_file *m, void *v)
+static const char *branch_stat_process_file(struct ftrace_branch_data *p)
 {
-	struct ftrace_branch_data *p = v;
 	const char *f;
-	long percent;
 
 	/* Only print the file, not the path */
 	f = p->file + strlen(p->file);
 	while (f >= p->file && *f != '/')
 		f--;
-	f++;
+	return ++f;
+}
+
+static void branch_stat_show(struct seq_file *m,
+			     struct ftrace_branch_data *p, const char *f)
+{
+	long percent;
 
 	/*
 	 * The miss is overlayed on correct, and hit on incorrect.
 	 */
 	percent = get_incorrect_percent(p);
 
-	seq_printf(m, "%8lu %8lu ",  p->correct, p->incorrect);
 	if (percent < 0)
 		seq_puts(m, "  X ");
 	else
 		seq_printf(m, "%3ld ", percent);
+
 	seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
+}
+
+static int branch_stat_show_normal(struct seq_file *m,
+				   struct ftrace_branch_data *p, const char *f)
+{
+	seq_printf(m, "%8lu %8lu ",  p->correct, p->incorrect);
+	branch_stat_show(m, p, f);
+	return 0;
+}
+
+static int annotate_branch_stat_show(struct seq_file *m, void *v)
+{
+	struct ftrace_likely_data *p = v;
+	const char *f;
+	int l;
+
+	f = branch_stat_process_file(&p->data);
+
+	if (!p->constant)
+		return branch_stat_show_normal(m, &p->data, f);
+
+	l = snprintf(NULL, 0, "/%lu", p->constant);
+	l = l > 8 ? 0 : 8 - l;
+
+	seq_printf(m, "%8lu/%lu %*lu ",
+		   p->data.correct, p->constant, l, p->data.incorrect);
+	branch_stat_show(m, &p->data, f);
 	return 0;
 }
 
@@ -283,7 +316,7 @@ static void *annotated_branch_stat_start(struct tracer_stat *trace)
 static void *
 annotated_branch_stat_next(void *v, int idx)
 {
-	struct ftrace_branch_data *p = v;
+	struct ftrace_likely_data *p = v;
 
 	++p;
 
@@ -332,7 +365,7 @@ static struct tracer_stat annotated_branch_stats = {
 	.stat_next = annotated_branch_stat_next,
 	.stat_cmp = annotated_branch_stat_cmp,
 	.stat_headers = annotated_branch_stat_headers,
-	.stat_show = branch_stat_show
+	.stat_show = annotate_branch_stat_show
 };
 
 __init static int init_annotated_branch_stats(void)
@@ -383,12 +416,21 @@ all_branch_stat_next(void *v, int idx)
 	return p;
 }
 
+static int all_branch_stat_show(struct seq_file *m, void *v)
+{
+	struct ftrace_branch_data *p = v;
+	const char *f;
+
+	f = branch_stat_process_file(p);
+	return branch_stat_show_normal(m, p, f);
+}
+
 static struct tracer_stat all_branch_stats = {
 	.name = "branch_all",
 	.stat_start = all_branch_stat_start,
 	.stat_next = all_branch_stat_next,
 	.stat_headers = all_branch_stat_headers,
-	.stat_show = branch_stat_show
+	.stat_show = all_branch_stat_show
 };
 
 __init static int all_annotated_branch_stats(void)

From 068f530b3f274d313395663bf8d674798d4858c6 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 19 Jan 2017 08:57:41 -0500
Subject: [PATCH 04/25] tracing: Add the constant count for branch tracer

The unlikely/likely branch profiler now gets called even if the if statement
is a constant (always goes in one direction without a compare). Add a value
to denote this in the likely/unlikely tracer as well.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_branch.c  | 17 +++++++++--------
 kernel/trace/trace_entries.h |  6 ++++--
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index fd483d74f8e1..4d8fdf3184dc 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -27,7 +27,7 @@ static DEFINE_MUTEX(branch_tracing_mutex);
 static struct trace_array *branch_tracer;
 
 static void
-probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+probe_likely_condition(struct ftrace_likely_data *f, int val, int expect)
 {
 	struct trace_event_call *call = &event_branch;
 	struct trace_array *tr = branch_tracer;
@@ -68,16 +68,17 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 	entry	= ring_buffer_event_data(event);
 
 	/* Strip off the path, only save the file */
-	p = f->file + strlen(f->file);
-	while (p >= f->file && *p != '/')
+	p = f->data.file + strlen(f->data.file);
+	while (p >= f->data.file && *p != '/')
 		p--;
 	p++;
 
-	strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
+	strncpy(entry->func, f->data.func, TRACE_FUNC_SIZE);
 	strncpy(entry->file, p, TRACE_FILE_SIZE);
 	entry->func[TRACE_FUNC_SIZE] = 0;
 	entry->file[TRACE_FILE_SIZE] = 0;
-	entry->line = f->line;
+	entry->constant = f->constant;
+	entry->line = f->data.line;
 	entry->correct = val == expect;
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
@@ -89,7 +90,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 }
 
 static inline
-void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
 {
 	if (!branch_tracing_enabled)
 		return;
@@ -195,7 +196,7 @@ core_initcall(init_branch_tracer);
 
 #else
 static inline
-void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
 {
 }
 #endif /* CONFIG_BRANCH_TRACER */
@@ -214,7 +215,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	 * conditions that the recursive nightmare that exists is too
 	 * much to try to get working. At least for now.
 	 */
-	trace_likely_condition(&f->data, val, expect);
+	trace_likely_condition(f, val, expect);
 
 	/* FIXME: Make this atomic! */
 	if (val == expect)
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index eb7396b7e7c3..c203ac4df791 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -328,11 +328,13 @@ FTRACE_ENTRY(branch, trace_branch,
 		__array(	char,		func,	TRACE_FUNC_SIZE+1	)
 		__array(	char,		file,	TRACE_FILE_SIZE+1	)
 		__field(	char,		correct				)
+		__field(	char,		constant			)
 	),
 
-	F_printk("%u:%s:%s (%u)",
+	F_printk("%u:%s:%s (%u)%s",
 		 __entry->line,
-		 __entry->func, __entry->file, __entry->correct),
+		 __entry->func, __entry->file, __entry->correct,
+		 __entry->constant ? " CONSTANT" : ""),
 
 	FILTER_OTHER
 );

From 3e278c0dc1cf5070d9462ececde1c07369b469b2 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 20 Jan 2017 11:44:45 +0900
Subject: [PATCH 05/25] ftrace: Factor out __ftrace_hash_move()

The __ftrace_hash_move() is to allocates properly-sized hash and move
entries in the src ftrace_hash.  It will be used to set function graph
filters which has nothing to do with the dyn_ftrace records.

Link: http://lkml.kernel.org/r/20170120024447.26097-1-namhyung@kernel.org

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eb230f06ba41..37b0e948d924 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1383,9 +1383,8 @@ ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash);
 static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
 				       struct ftrace_hash *new_hash);
 
-static int
-ftrace_hash_move(struct ftrace_ops *ops, int enable,
-		 struct ftrace_hash **dst, struct ftrace_hash *src)
+static struct ftrace_hash *
+__ftrace_hash_move(struct ftrace_hash *src)
 {
 	struct ftrace_func_entry *entry;
 	struct hlist_node *tn;
@@ -1393,21 +1392,13 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
 	struct ftrace_hash *new_hash;
 	int size = src->count;
 	int bits = 0;
-	int ret;
 	int i;
 
-	/* Reject setting notrace hash on IPMODIFY ftrace_ops */
-	if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable)
-		return -EINVAL;
-
 	/*
-	 * If the new source is empty, just free dst and assign it
-	 * the empty_hash.
+	 * If the new source is empty, just return the empty_hash.
 	 */
-	if (!src->count) {
-		new_hash = EMPTY_HASH;
-		goto update;
-	}
+	if (!src->count)
+		return EMPTY_HASH;
 
 	/*
 	 * Make the hash size about 1/2 the # found
@@ -1421,7 +1412,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
 
 	new_hash = alloc_ftrace_hash(bits);
 	if (!new_hash)
-		return -ENOMEM;
+		return NULL;
 
 	size = 1 << src->size_bits;
 	for (i = 0; i < size; i++) {
@@ -1432,7 +1423,24 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
 		}
 	}
 
-update:
+	return new_hash;
+}
+
+static int
+ftrace_hash_move(struct ftrace_ops *ops, int enable,
+		 struct ftrace_hash **dst, struct ftrace_hash *src)
+{
+	struct ftrace_hash *new_hash;
+	int ret;
+
+	/* Reject setting notrace hash on IPMODIFY ftrace_ops */
+	if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable)
+		return -EINVAL;
+
+	new_hash = __ftrace_hash_move(src);
+	if (!new_hash)
+		return -ENOMEM;
+
 	/* Make sure this can be applied if it is IPMODIFY ftrace_ops */
 	if (enable) {
 		/* IPMODIFY should be updated only when filter_hash updating */

From 4046bf023b0647d09704a32d9fe8aecbcee3e4c3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 20 Jan 2017 11:44:46 +0900
Subject: [PATCH 06/25] ftrace: Expose ftrace_hash_empty and ftrace_lookup_ip

It will be used when checking graph filter hashes later.

Link: http://lkml.kernel.org/r/20170120024447.26097-2-namhyung@kernel.org

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
[ Moved ftrace_hash dec and functions outside of FUNCTION_GRAPH define ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 14 +-------------
 kernel/trace/trace.h  | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 37b0e948d924..0470e373b9b4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1110,13 +1110,6 @@ struct ftrace_func_entry {
 	unsigned long ip;
 };
 
-struct ftrace_hash {
-	unsigned long		size_bits;
-	struct hlist_head	*buckets;
-	unsigned long		count;
-	struct rcu_head		rcu;
-};
-
 /*
  * We make these constant because no one should touch them,
  * but they are used as the default "empty hash", to avoid allocating
@@ -1192,12 +1185,7 @@ struct ftrace_page {
 static struct ftrace_page	*ftrace_pages_start;
 static struct ftrace_page	*ftrace_pages;
 
-static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash)
-{
-	return !hash || !hash->count;
-}
-
-static struct ftrace_func_entry *
+struct ftrace_func_entry *
 ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 {
 	unsigned long key;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1ea51ab53edf..9001460291bb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -753,6 +753,21 @@ enum print_line_t print_trace_line(struct trace_iterator *iter);
 
 extern char trace_find_mark(unsigned long long duration);
 
+struct ftrace_hash {
+	unsigned long		size_bits;
+	struct hlist_head	*buckets;
+	unsigned long		count;
+	struct rcu_head		rcu;
+};
+
+struct ftrace_func_entry *
+ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip);
+
+static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash)
+{
+	return !hash || !hash->count;
+}
+
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
@@ -787,7 +802,6 @@ extern void __trace_graph_return(struct trace_array *tr,
 				 struct ftrace_graph_ret *trace,
 				 unsigned long flags, int pc);
 
-
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* TODO: make this variable */
 #define FTRACE_GRAPH_MAX_FUNCS		32

From b9b0c831bed2682c2e3e9f5420fb6985549ef020 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 20 Jan 2017 11:44:47 +0900
Subject: [PATCH 07/25] ftrace: Convert graph filter to use hash tables

Use ftrace_hash instead of a static array of a fixed size.  This is
useful when a graph filter pattern matches to a large number of
functions.  Now hash lookup is done with preemption disabled to protect
from the hash being changed/freed.

Link: http://lkml.kernel.org/r/20170120024447.26097-3-namhyung@kernel.org

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 190 ++++++++++++++++++++++++++++--------------
 kernel/trace/trace.h  |  60 +++++++------
 2 files changed, 156 insertions(+), 94 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0470e373b9b4..2d554a02241d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4378,7 +4378,7 @@ __setup("ftrace_filter=", set_ftrace_filter);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
 static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
-static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer);
+static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer);
 
 static unsigned long save_global_trampoline;
 static unsigned long save_global_flags;
@@ -4401,18 +4401,17 @@ static void __init set_ftrace_early_graph(char *buf, int enable)
 {
 	int ret;
 	char *func;
-	unsigned long *table = ftrace_graph_funcs;
-	int *count = &ftrace_graph_count;
+	struct ftrace_hash *hash;
 
-	if (!enable) {
-		table = ftrace_graph_notrace_funcs;
-		count = &ftrace_graph_notrace_count;
-	}
+	if (enable)
+		hash = ftrace_graph_hash;
+	else
+		hash = ftrace_graph_notrace_hash;
 
 	while (buf) {
 		func = strsep(&buf, ",");
 		/* we allow only one expression at a time */
-		ret = ftrace_set_func(table, count, FTRACE_GRAPH_MAX_FUNCS, func);
+		ret = ftrace_graph_set_hash(hash, func);
 		if (ret)
 			printk(KERN_DEBUG "ftrace: function %s not "
 					  "traceable\n", func);
@@ -4536,15 +4535,20 @@ static const struct file_operations ftrace_notrace_fops = {
 
 static DEFINE_MUTEX(graph_lock);
 
-int ftrace_graph_count;
-int ftrace_graph_notrace_count;
-unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
-unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH;
+struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH;
+
+enum graph_filter_type {
+	GRAPH_FILTER_NOTRACE	= 0,
+	GRAPH_FILTER_FUNCTION,
+};
 
 struct ftrace_graph_data {
-	unsigned long *table;
-	size_t size;
-	int *count;
+	struct ftrace_hash *hash;
+	struct ftrace_func_entry *entry;
+	int idx;   /* for hash table iteration */
+	enum graph_filter_type type;
+	struct ftrace_hash *new_hash;
 	const struct seq_operations *seq_ops;
 };
 
@@ -4552,10 +4556,31 @@ static void *
 __g_next(struct seq_file *m, loff_t *pos)
 {
 	struct ftrace_graph_data *fgd = m->private;
+	struct ftrace_func_entry *entry = fgd->entry;
+	struct hlist_head *head;
+	int i, idx = fgd->idx;
 
-	if (*pos >= *fgd->count)
+	if (*pos >= fgd->hash->count)
 		return NULL;
-	return &fgd->table[*pos];
+
+	if (entry) {
+		hlist_for_each_entry_continue(entry, hlist) {
+			fgd->entry = entry;
+			return entry;
+		}
+
+		idx++;
+	}
+
+	for (i = idx; i < 1 << fgd->hash->size_bits; i++) {
+		head = &fgd->hash->buckets[i];
+		hlist_for_each_entry(entry, head, hlist) {
+			fgd->entry = entry;
+			fgd->idx = i;
+			return entry;
+		}
+	}
+	return NULL;
 }
 
 static void *
@@ -4572,9 +4597,11 @@ static void *g_start(struct seq_file *m, loff_t *pos)
 	mutex_lock(&graph_lock);
 
 	/* Nothing, tell g_show to print all functions are enabled */
-	if (!*fgd->count && !*pos)
+	if (ftrace_hash_empty(fgd->hash) && !*pos)
 		return (void *)1;
 
+	fgd->idx = 0;
+	fgd->entry = NULL;
 	return __g_next(m, pos);
 }
 
@@ -4585,22 +4612,22 @@ static void g_stop(struct seq_file *m, void *p)
 
 static int g_show(struct seq_file *m, void *v)
 {
-	unsigned long *ptr = v;
+	struct ftrace_func_entry *entry = v;
 
-	if (!ptr)
+	if (!entry)
 		return 0;
 
-	if (ptr == (unsigned long *)1) {
+	if (entry == (void *)1) {
 		struct ftrace_graph_data *fgd = m->private;
 
-		if (fgd->table == ftrace_graph_funcs)
+		if (fgd->type == GRAPH_FILTER_FUNCTION)
 			seq_puts(m, "#### all functions enabled ####\n");
 		else
 			seq_puts(m, "#### no functions disabled ####\n");
 		return 0;
 	}
 
-	seq_printf(m, "%ps\n", (void *)*ptr);
+	seq_printf(m, "%ps\n", (void *)entry->ip);
 
 	return 0;
 }
@@ -4617,24 +4644,37 @@ __ftrace_graph_open(struct inode *inode, struct file *file,
 		    struct ftrace_graph_data *fgd)
 {
 	int ret = 0;
+	struct ftrace_hash *new_hash = NULL;
 
-	mutex_lock(&graph_lock);
-	if ((file->f_mode & FMODE_WRITE) &&
-	    (file->f_flags & O_TRUNC)) {
-		*fgd->count = 0;
-		memset(fgd->table, 0, fgd->size * sizeof(*fgd->table));
+	if (file->f_mode & FMODE_WRITE) {
+		const int size_bits = FTRACE_HASH_DEFAULT_BITS;
+
+		if (file->f_flags & O_TRUNC)
+			new_hash = alloc_ftrace_hash(size_bits);
+		else
+			new_hash = alloc_and_copy_ftrace_hash(size_bits,
+							      fgd->hash);
+		if (!new_hash) {
+			ret = -ENOMEM;
+			goto out;
+		}
 	}
-	mutex_unlock(&graph_lock);
 
 	if (file->f_mode & FMODE_READ) {
-		ret = seq_open(file, fgd->seq_ops);
+		ret = seq_open(file, &ftrace_graph_seq_ops);
 		if (!ret) {
 			struct seq_file *m = file->private_data;
 			m->private = fgd;
+		} else {
+			/* Failed */
+			free_ftrace_hash(new_hash);
+			new_hash = NULL;
 		}
 	} else
 		file->private_data = fgd;
 
+out:
+	fgd->new_hash = new_hash;
 	return ret;
 }
 
@@ -4642,6 +4682,7 @@ static int
 ftrace_graph_open(struct inode *inode, struct file *file)
 {
 	struct ftrace_graph_data *fgd;
+	int ret;
 
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
@@ -4650,18 +4691,25 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 	if (fgd == NULL)
 		return -ENOMEM;
 
-	fgd->table = ftrace_graph_funcs;
-	fgd->size = FTRACE_GRAPH_MAX_FUNCS;
-	fgd->count = &ftrace_graph_count;
+	mutex_lock(&graph_lock);
+
+	fgd->hash = ftrace_graph_hash;
+	fgd->type = GRAPH_FILTER_FUNCTION;
 	fgd->seq_ops = &ftrace_graph_seq_ops;
 
-	return __ftrace_graph_open(inode, file, fgd);
+	ret = __ftrace_graph_open(inode, file, fgd);
+	if (ret < 0)
+		kfree(fgd);
+
+	mutex_unlock(&graph_lock);
+	return ret;
 }
 
 static int
 ftrace_graph_notrace_open(struct inode *inode, struct file *file)
 {
 	struct ftrace_graph_data *fgd;
+	int ret;
 
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
@@ -4670,45 +4718,53 @@ ftrace_graph_notrace_open(struct inode *inode, struct file *file)
 	if (fgd == NULL)
 		return -ENOMEM;
 
-	fgd->table = ftrace_graph_notrace_funcs;
-	fgd->size = FTRACE_GRAPH_MAX_FUNCS;
-	fgd->count = &ftrace_graph_notrace_count;
+	mutex_lock(&graph_lock);
+
+	fgd->hash = ftrace_graph_notrace_hash;
+	fgd->type = GRAPH_FILTER_NOTRACE;
 	fgd->seq_ops = &ftrace_graph_seq_ops;
 
-	return __ftrace_graph_open(inode, file, fgd);
+	ret = __ftrace_graph_open(inode, file, fgd);
+	if (ret < 0)
+		kfree(fgd);
+
+	mutex_unlock(&graph_lock);
+	return ret;
 }
 
 static int
 ftrace_graph_release(struct inode *inode, struct file *file)
 {
+	struct ftrace_graph_data *fgd;
+
 	if (file->f_mode & FMODE_READ) {
 		struct seq_file *m = file->private_data;
 
-		kfree(m->private);
+		fgd = m->private;
 		seq_release(inode, file);
 	} else {
-		kfree(file->private_data);
+		fgd = file->private_data;
 	}
 
+	kfree(fgd->new_hash);
+	kfree(fgd);
+
 	return 0;
 }
 
 static int
-ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)
+ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
 {
 	struct ftrace_glob func_g;
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
+	struct ftrace_func_entry *entry;
 	int fail = 1;
 	int not;
-	bool exists;
-	int i;
 
 	/* decode regex */
 	func_g.type = filter_parse_regex(buffer, strlen(buffer),
 					 &func_g.search, &not);
-	if (!not && *idx >= size)
-		return -EBUSY;
 
 	func_g.len = strlen(func_g.search);
 
@@ -4725,26 +4781,18 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)
 			continue;
 
 		if (ftrace_match_record(rec, &func_g, NULL, 0)) {
-			/* if it is in the array */
-			exists = false;
-			for (i = 0; i < *idx; i++) {
-				if (array[i] == rec->ip) {
-					exists = true;
-					break;
-				}
-			}
+			entry = ftrace_lookup_ip(hash, rec->ip);
 
 			if (!not) {
 				fail = 0;
-				if (!exists) {
-					array[(*idx)++] = rec->ip;
-					if (*idx >= size)
-						goto out;
-				}
+
+				if (entry)
+					continue;
+				if (add_hash_entry(hash, rec->ip) < 0)
+					goto out;
 			} else {
-				if (exists) {
-					array[i] = array[--(*idx)];
-					array[*idx] = 0;
+				if (entry) {
+					free_hash_entry(hash, entry);
 					fail = 0;
 				}
 			}
@@ -4766,6 +4814,7 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 	struct trace_parser parser;
 	ssize_t read, ret = 0;
 	struct ftrace_graph_data *fgd = file->private_data;
+	struct ftrace_hash *old_hash, *new_hash;
 
 	if (!cnt)
 		return 0;
@@ -4781,10 +4830,25 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 		mutex_lock(&graph_lock);
 
 		/* we allow only one expression at a time */
-		ret = ftrace_set_func(fgd->table, fgd->count, fgd->size,
-				      parser.buffer);
+		ret = ftrace_graph_set_hash(fgd->new_hash,
+					    parser.buffer);
+
+		old_hash = fgd->hash;
+		new_hash = __ftrace_hash_move(fgd->new_hash);
+		if (!new_hash)
+			ret = -ENOMEM;
+
+		if (fgd->type == GRAPH_FILTER_FUNCTION)
+			rcu_assign_pointer(ftrace_graph_hash, new_hash);
+		else
+			rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash);
 
 		mutex_unlock(&graph_lock);
+
+		/* Wait till all users are no longer using the old hash */
+		synchronize_sched();
+
+		free_ftrace_hash(old_hash);
 	}
 
 	if (!ret)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9001460291bb..afbec961eab1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -803,51 +803,49 @@ extern void __trace_graph_return(struct trace_array *tr,
 				 unsigned long flags, int pc);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-/* TODO: make this variable */
-#define FTRACE_GRAPH_MAX_FUNCS		32
-extern int ftrace_graph_count;
-extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
-extern int ftrace_graph_notrace_count;
-extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
+extern struct ftrace_hash *ftrace_graph_hash;
+extern struct ftrace_hash *ftrace_graph_notrace_hash;
 
 static inline int ftrace_graph_addr(unsigned long addr)
 {
-	int i;
+	int ret = 0;
 
-	if (!ftrace_graph_count)
-		return 1;
+	preempt_disable_notrace();
 
-	for (i = 0; i < ftrace_graph_count; i++) {
-		if (addr == ftrace_graph_funcs[i]) {
-			/*
-			 * If no irqs are to be traced, but a set_graph_function
-			 * is set, and called by an interrupt handler, we still
-			 * want to trace it.
-			 */
-			if (in_irq())
-				trace_recursion_set(TRACE_IRQ_BIT);
-			else
-				trace_recursion_clear(TRACE_IRQ_BIT);
-			return 1;
-		}
+	if (ftrace_hash_empty(ftrace_graph_hash)) {
+		ret = 1;
+		goto out;
 	}
 
-	return 0;
+	if (ftrace_lookup_ip(ftrace_graph_hash, addr)) {
+		/*
+		 * If no irqs are to be traced, but a set_graph_function
+		 * is set, and called by an interrupt handler, we still
+		 * want to trace it.
+		 */
+		if (in_irq())
+			trace_recursion_set(TRACE_IRQ_BIT);
+		else
+			trace_recursion_clear(TRACE_IRQ_BIT);
+		ret = 1;
+	}
+
+out:
+	preempt_enable_notrace();
+	return ret;
 }
 
 static inline int ftrace_graph_notrace_addr(unsigned long addr)
 {
-	int i;
+	int ret = 0;
 
-	if (!ftrace_graph_notrace_count)
-		return 0;
+	preempt_disable_notrace();
 
-	for (i = 0; i < ftrace_graph_notrace_count; i++) {
-		if (addr == ftrace_graph_notrace_funcs[i])
-			return 1;
-	}
+	if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr))
+		ret = 1;
 
-	return 0;
+	preempt_enable_notrace();
+	return ret;
 }
 #else
 static inline int ftrace_graph_addr(unsigned long addr)

From 2b0cce0e190f8f0b37fe8102ae657f5d9eb0976d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 1 Feb 2017 12:19:33 -0500
Subject: [PATCH 08/25] tracing: Add ftrace_hash_key() helper function

Replace the couple of use cases that has small logic to produce the ftrace
function key id with a helper function. No need for duplicate code.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2d554a02241d..89240f62061c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1185,6 +1185,15 @@ struct ftrace_page {
 static struct ftrace_page	*ftrace_pages_start;
 static struct ftrace_page	*ftrace_pages;
 
+static __always_inline unsigned long
+ftrace_hash_key(struct ftrace_hash *hash, unsigned long ip)
+{
+	if (hash->size_bits > 0)
+		return hash_long(ip, hash->size_bits);
+
+	return 0;
+}
+
 struct ftrace_func_entry *
 ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 {
@@ -1195,11 +1204,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 	if (ftrace_hash_empty(hash))
 		return NULL;
 
-	if (hash->size_bits > 0)
-		key = hash_long(ip, hash->size_bits);
-	else
-		key = 0;
-
+	key = ftrace_hash_key(hash, ip);
 	hhd = &hash->buckets[key];
 
 	hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) {
@@ -1215,11 +1220,7 @@ static void __add_hash_entry(struct ftrace_hash *hash,
 	struct hlist_head *hhd;
 	unsigned long key;
 
-	if (hash->size_bits)
-		key = hash_long(entry->ip, hash->size_bits);
-	else
-		key = 0;
-
+	key = ftrace_hash_key(hash, entry->ip);
 	hhd = &hash->buckets[key];
 	hlist_add_head(&entry->hlist, hhd);
 	hash->count++;

From 2b2c279c814112e15577757ec593aa78465e2e56 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 1 Feb 2017 15:37:07 -0500
Subject: [PATCH 09/25] ftrace: Create a slight optimization on searching the
 ftrace_hash

This is a micro-optimization, but as it has to deal with a fast path of the
function tracer, these optimizations can be noticed.

The ftrace_lookup_ip() returns true if the given ip is found in the hash. If
it's not found or the hash is NULL, it returns false. But there's some cases
that a NULL hash is a true, and the ftrace_hash_empty() is tested before
calling ftrace_lookup_ip() in those cases. But as ftrace_lookup_ip() tests
that first, that adds a few extra unneeded instructions in those cases.

A new static "always_inlined" function is created that does not perform the
hash empty test. This most only be used by callers that do the check first
anyway, as an empty or NULL hash could cause a crash if a lookup is
performed on it.

Also add kernel doc for the ftrace_lookup_ip() main function.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 89240f62061c..1595df0d7d79 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1194,16 +1194,14 @@ ftrace_hash_key(struct ftrace_hash *hash, unsigned long ip)
 	return 0;
 }
 
-struct ftrace_func_entry *
-ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
+/* Only use this function if ftrace_hash_empty() has already been tested */
+static __always_inline struct ftrace_func_entry *
+__ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 {
 	unsigned long key;
 	struct ftrace_func_entry *entry;
 	struct hlist_head *hhd;
 
-	if (ftrace_hash_empty(hash))
-		return NULL;
-
 	key = ftrace_hash_key(hash, ip);
 	hhd = &hash->buckets[key];
 
@@ -1214,6 +1212,25 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 	return NULL;
 }
 
+/**
+ * ftrace_lookup_ip - Test to see if an ip exists in an ftrace_hash
+ * @hash: The hash to look at
+ * @ip: The instruction pointer to test
+ *
+ * Search a given @hash to see if a given instruction pointer (@ip)
+ * exists in it.
+ *
+ * Returns the entry that holds the @ip if found. NULL otherwise.
+ */
+struct ftrace_func_entry *
+ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
+{
+	if (ftrace_hash_empty(hash))
+		return NULL;
+
+	return __ftrace_lookup_ip(hash, ip);
+}
+
 static void __add_hash_entry(struct ftrace_hash *hash,
 			     struct ftrace_func_entry *entry)
 {
@@ -1463,9 +1480,9 @@ static bool hash_contains_ip(unsigned long ip,
 	 * notrace hash is considered not in the notrace hash.
 	 */
 	return (ftrace_hash_empty(hash->filter_hash) ||
-		ftrace_lookup_ip(hash->filter_hash, ip)) &&
+		__ftrace_lookup_ip(hash->filter_hash, ip)) &&
 		(ftrace_hash_empty(hash->notrace_hash) ||
-		 !ftrace_lookup_ip(hash->notrace_hash, ip));
+		 !__ftrace_lookup_ip(hash->notrace_hash, ip));
 }
 
 /*
@@ -2877,7 +2894,7 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 
 	/* The function must be in the filter */
 	if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
-	    !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
+	    !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
 		return 0;
 
 	/* If in notrace hash, we ignore it too */

From 555fc7813eeada1738eca42aa9f9ebafd7dc23e6 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Feb 2017 10:15:22 -0500
Subject: [PATCH 10/25] ftrace: Replace (void *)1 with a meaningful macro name
 FTRACE_GRAPH_EMPTY

When the set_graph_function or set_graph_notrace contains no records, a
banner is displayed of either "#### all functions enabled ####" or
"#### all functions disabled ####" respectively. To tell the seq operations
to do this, (void *)1 is passed as a return value. Instead of using a
hardcoded meaningless variable, define it as a macro.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1595df0d7d79..a9cfc8713198 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4561,6 +4561,8 @@ enum graph_filter_type {
 	GRAPH_FILTER_FUNCTION,
 };
 
+#define FTRACE_GRAPH_EMPTY	((void *)1)
+
 struct ftrace_graph_data {
 	struct ftrace_hash *hash;
 	struct ftrace_func_entry *entry;
@@ -4616,7 +4618,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
 
 	/* Nothing, tell g_show to print all functions are enabled */
 	if (ftrace_hash_empty(fgd->hash) && !*pos)
-		return (void *)1;
+		return FTRACE_GRAPH_EMPTY;
 
 	fgd->idx = 0;
 	fgd->entry = NULL;
@@ -4635,7 +4637,7 @@ static int g_show(struct seq_file *m, void *v)
 	if (!entry)
 		return 0;
 
-	if (entry == (void *)1) {
+	if (entry == FTRACE_GRAPH_EMPTY) {
 		struct ftrace_graph_data *fgd = m->private;
 
 		if (fgd->type == GRAPH_FILTER_FUNCTION)

From d4ad9a1ccac31a04a32b5e7547b70428830e0218 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Feb 2017 14:03:39 -0500
Subject: [PATCH 11/25] ftrace: Reset fgd->hash in ftrace_graph_write()

fgd->hash is saved and then freed, but is never reset to either
ftrace_graph_hash nor ftrace_graph_notrace_hash. But if multiple writes are
performed, then the freed hash could be accessed again.

 # cd /sys/kernel/debug/tracing
 # head -1000 available_filter_functions > /tmp/funcs
 # cat /tmp/funcs > set_graph_function

Causes:

 general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC
 Modules linked in:  [...]
 CPU: 2 PID: 1337 Comm: cat Not tainted 4.10.0-rc2-test-00010-g6b052e9 #32
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
 task: ffff880113a12200 task.stack: ffffc90001940000
 RIP: 0010:free_ftrace_hash+0x7c/0x160
 RSP: 0018:ffffc90001943db0 EFLAGS: 00010246
 RAX: 6b6b6b6b6b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: 6b6b6b6b6b6b6b6b
 RDX: 0000000000000002 RSI: 0000000000000001 RDI: ffff8800ce1e1d40
 RBP: ffff8800ce1e1d50 R08: 0000000000000000 R09: 0000000000006400
 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
 R13: ffff8800ce1e1d40 R14: 0000000000004000 R15: 0000000000000001
 FS:  00007f9408a07740(0000) GS:ffff88011e500000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000aee1f0 CR3: 0000000116bb4000 CR4: 00000000001406e0
 Call Trace:
  ? ftrace_graph_write+0x150/0x190
  ? __vfs_write+0x1f6/0x210
  ? __audit_syscall_entry+0x17f/0x200
  ? rw_verify_area+0xdb/0x210
  ? _cond_resched+0x2b/0x50
  ? __sb_start_write+0xb4/0x130
  ? vfs_write+0x1c8/0x330
  ? SyS_write+0x62/0xf0
  ? do_syscall_64+0xa3/0x1b0
  ? entry_SYSCALL64_slow_path+0x25/0x25
 Code: 01 48 85 db 0f 84 92 00 00 00 b8 01 00 00 00 d3 e0 85 c0 7e 3f 83 e8 01 48 8d 6f 10 45 31 e4 4c 8d 34 c5 08 00 00 00 49 8b 45 08 <4a> 8b 34 20 48 85 f6 74 13 48 8b 1e 48 89 ef e8 20 fa ff ff 48
 RIP: free_ftrace_hash+0x7c/0x160 RSP: ffffc90001943db0
 ---[ end trace 999b48216bf4b393 ]---

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a9cfc8713198..b7df0dcf8652 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4858,10 +4858,13 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 		if (!new_hash)
 			ret = -ENOMEM;
 
-		if (fgd->type == GRAPH_FILTER_FUNCTION)
+		if (fgd->type == GRAPH_FILTER_FUNCTION) {
 			rcu_assign_pointer(ftrace_graph_hash, new_hash);
-		else
+			fgd->hash = ftrace_graph_hash;
+		} else {
 			rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash);
+			fgd->hash = ftrace_graph_notrace_hash;
+		}
 
 		mutex_unlock(&graph_lock);
 

From ae98d27afc3bde5a48f440d905317602a5cfb0d2 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Feb 2017 16:59:06 -0500
Subject: [PATCH 12/25] ftrace: Have set_graph_functions handle write with RDWR

Since reading the set_graph_functions uses seq functions, which sets the
file->private_data pointer to a seq_file descriptor. On writes the
ftrace_graph_data descriptor is set to file->private_data. But if the file
is opened for RDWR, the ftrace_graph_write() will incorrectly use the
file->private_data descriptor instead of
((struct seq_file *)file->private_data)->private pointer, and this can crash
the kernel.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b7df0dcf8652..0233c8cb45f4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4842,6 +4842,12 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX))
 		return -ENOMEM;
 
+	/* Read mode uses seq functions */
+	if (file->f_mode & FMODE_READ) {
+		struct seq_file *m = file->private_data;
+		fgd = m->private;
+	}
+
 	read = trace_get_user(&parser, ubuf, cnt, ppos);
 
 	if (read >= 0 && trace_parser_loaded((&parser))) {

From 0e684b6578ee463ecb5c9a1cd0c20069f063d9f0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Feb 2017 17:58:18 -0500
Subject: [PATCH 13/25] tracing: Reset parser->buffer to allow multiple "puts"

trace_parser_put() simply frees the allocated parser buffer. But it does not
reset the pointer that was freed. This means that if trace_parser_put() is
called on the same parser more than once, it will corrupt the allocation
system. Setting parser->buffer to NULL after free allows it to be called
more than once without any ill effect.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d7449783987a..4589b67168fc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1193,6 +1193,7 @@ int trace_parser_get_init(struct trace_parser *parser, int size)
 void trace_parser_put(struct trace_parser *parser)
 {
 	kfree(parser->buffer);
+	parser->buffer = NULL;
 }
 
 /*

From 649b988b12ddb9aed16047a3d9bb4d7bfdb47221 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Feb 2017 20:16:29 -0500
Subject: [PATCH 14/25] ftrace: Do not hold references of
 ftrace_graph_{notrace_}hash out of graph_lock

The hashs ftrace_graph_hash and ftrace_graph_notrace_hash are modified
within the graph_lock being held. Holding a pointer to them and passing them
along can lead to a use of a stale pointer (fgd->hash). Move assigning the
pointer and its use to within the holding of the lock. Note, it's an
rcu_sched protected data, and other instances of referencing them are done
with preemption disabled. But the file manipuation code must be protected by
the lock.

The fgd->hash pointer is set to NULL when the lock is being released.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0233c8cb45f4..b3a4896ef78a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4616,6 +4616,13 @@ static void *g_start(struct seq_file *m, loff_t *pos)
 
 	mutex_lock(&graph_lock);
 
+	if (fgd->type == GRAPH_FILTER_FUNCTION)
+		fgd->hash = rcu_dereference_protected(ftrace_graph_hash,
+					lockdep_is_held(&graph_lock));
+	else
+		fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
+					lockdep_is_held(&graph_lock));
+
 	/* Nothing, tell g_show to print all functions are enabled */
 	if (ftrace_hash_empty(fgd->hash) && !*pos)
 		return FTRACE_GRAPH_EMPTY;
@@ -4695,6 +4702,14 @@ __ftrace_graph_open(struct inode *inode, struct file *file,
 
 out:
 	fgd->new_hash = new_hash;
+
+	/*
+	 * All uses of fgd->hash must be taken with the graph_lock
+	 * held. The graph_lock is going to be released, so force
+	 * fgd->hash to be reinitialized when it is taken again.
+	 */
+	fgd->hash = NULL;
+
 	return ret;
 }
 
@@ -4713,7 +4728,8 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 
 	mutex_lock(&graph_lock);
 
-	fgd->hash = ftrace_graph_hash;
+	fgd->hash = rcu_dereference_protected(ftrace_graph_hash,
+					lockdep_is_held(&graph_lock));
 	fgd->type = GRAPH_FILTER_FUNCTION;
 	fgd->seq_ops = &ftrace_graph_seq_ops;
 
@@ -4740,7 +4756,8 @@ ftrace_graph_notrace_open(struct inode *inode, struct file *file)
 
 	mutex_lock(&graph_lock);
 
-	fgd->hash = ftrace_graph_notrace_hash;
+	fgd->hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
+					lockdep_is_held(&graph_lock));
 	fgd->type = GRAPH_FILTER_NOTRACE;
 	fgd->seq_ops = &ftrace_graph_seq_ops;
 
@@ -4859,17 +4876,18 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 		ret = ftrace_graph_set_hash(fgd->new_hash,
 					    parser.buffer);
 
-		old_hash = fgd->hash;
 		new_hash = __ftrace_hash_move(fgd->new_hash);
 		if (!new_hash)
 			ret = -ENOMEM;
 
 		if (fgd->type == GRAPH_FILTER_FUNCTION) {
+			old_hash = rcu_dereference_protected(ftrace_graph_hash,
+					lockdep_is_held(&graph_lock));
 			rcu_assign_pointer(ftrace_graph_hash, new_hash);
-			fgd->hash = ftrace_graph_hash;
 		} else {
+			old_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
+					lockdep_is_held(&graph_lock));
 			rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash);
-			fgd->hash = ftrace_graph_notrace_hash;
 		}
 
 		mutex_unlock(&graph_lock);

From e704eff3ff5138a462443dcd64d071165df18782 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Feb 2017 20:34:37 -0500
Subject: [PATCH 15/25] ftrace: Have set_graph_function handle multiple
 functions in one write

Currently, only one function can be written to set_graph_function and
set_graph_notrace. The last function in the list will have saved, even
though other functions will be added then removed.

Change the behavior to be the same as set_ftrace_function as to allow
multiple functions to be written. If any one fails, none of them will be
added. The addition of the functions are done at the end when the file is
closed.

Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 105 +++++++++++++++++++++++++-----------------
 1 file changed, 64 insertions(+), 41 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b3a4896ef78a..0c0609326391 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4564,12 +4564,13 @@ enum graph_filter_type {
 #define FTRACE_GRAPH_EMPTY	((void *)1)
 
 struct ftrace_graph_data {
-	struct ftrace_hash *hash;
-	struct ftrace_func_entry *entry;
-	int idx;   /* for hash table iteration */
-	enum graph_filter_type type;
-	struct ftrace_hash *new_hash;
-	const struct seq_operations *seq_ops;
+	struct ftrace_hash		*hash;
+	struct ftrace_func_entry	*entry;
+	int				idx;   /* for hash table iteration */
+	enum graph_filter_type		type;
+	struct ftrace_hash		*new_hash;
+	const struct seq_operations	*seq_ops;
+	struct trace_parser		parser;
 };
 
 static void *
@@ -4676,6 +4677,9 @@ __ftrace_graph_open(struct inode *inode, struct file *file,
 	if (file->f_mode & FMODE_WRITE) {
 		const int size_bits = FTRACE_HASH_DEFAULT_BITS;
 
+		if (trace_parser_get_init(&fgd->parser, FTRACE_BUFF_MAX))
+			return -ENOMEM;
+
 		if (file->f_flags & O_TRUNC)
 			new_hash = alloc_ftrace_hash(size_bits);
 		else
@@ -4701,6 +4705,9 @@ __ftrace_graph_open(struct inode *inode, struct file *file,
 		file->private_data = fgd;
 
 out:
+	if (ret < 0 && file->f_mode & FMODE_WRITE)
+		trace_parser_put(&fgd->parser);
+
 	fgd->new_hash = new_hash;
 
 	/*
@@ -4773,6 +4780,9 @@ static int
 ftrace_graph_release(struct inode *inode, struct file *file)
 {
 	struct ftrace_graph_data *fgd;
+	struct ftrace_hash *old_hash, *new_hash;
+	struct trace_parser *parser;
+	int ret = 0;
 
 	if (file->f_mode & FMODE_READ) {
 		struct seq_file *m = file->private_data;
@@ -4783,10 +4793,50 @@ ftrace_graph_release(struct inode *inode, struct file *file)
 		fgd = file->private_data;
 	}
 
+
+	if (file->f_mode & FMODE_WRITE) {
+
+		parser = &fgd->parser;
+
+		if (trace_parser_loaded((parser))) {
+			parser->buffer[parser->idx] = 0;
+			ret = ftrace_graph_set_hash(fgd->new_hash,
+						    parser->buffer);
+		}
+
+		trace_parser_put(parser);
+
+		new_hash = __ftrace_hash_move(fgd->new_hash);
+		if (!new_hash) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		mutex_lock(&graph_lock);
+
+		if (fgd->type == GRAPH_FILTER_FUNCTION) {
+			old_hash = rcu_dereference_protected(ftrace_graph_hash,
+					lockdep_is_held(&graph_lock));
+			rcu_assign_pointer(ftrace_graph_hash, new_hash);
+		} else {
+			old_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
+					lockdep_is_held(&graph_lock));
+			rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash);
+		}
+
+		mutex_unlock(&graph_lock);
+
+		/* Wait till all users are no longer using the old hash */
+		synchronize_sched();
+
+		free_ftrace_hash(old_hash);
+	}
+
+ out:
 	kfree(fgd->new_hash);
 	kfree(fgd);
 
-	return 0;
+	return ret;
 }
 
 static int
@@ -4848,61 +4898,34 @@ static ssize_t
 ftrace_graph_write(struct file *file, const char __user *ubuf,
 		   size_t cnt, loff_t *ppos)
 {
-	struct trace_parser parser;
 	ssize_t read, ret = 0;
 	struct ftrace_graph_data *fgd = file->private_data;
-	struct ftrace_hash *old_hash, *new_hash;
+	struct trace_parser *parser;
 
 	if (!cnt)
 		return 0;
 
-	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX))
-		return -ENOMEM;
-
 	/* Read mode uses seq functions */
 	if (file->f_mode & FMODE_READ) {
 		struct seq_file *m = file->private_data;
 		fgd = m->private;
 	}
 
-	read = trace_get_user(&parser, ubuf, cnt, ppos);
+	parser = &fgd->parser;
 
-	if (read >= 0 && trace_parser_loaded((&parser))) {
-		parser.buffer[parser.idx] = 0;
+	read = trace_get_user(parser, ubuf, cnt, ppos);
 
-		mutex_lock(&graph_lock);
+	if (read >= 0 && trace_parser_loaded(parser) &&
+	    !trace_parser_cont(parser)) {
 
-		/* we allow only one expression at a time */
 		ret = ftrace_graph_set_hash(fgd->new_hash,
-					    parser.buffer);
-
-		new_hash = __ftrace_hash_move(fgd->new_hash);
-		if (!new_hash)
-			ret = -ENOMEM;
-
-		if (fgd->type == GRAPH_FILTER_FUNCTION) {
-			old_hash = rcu_dereference_protected(ftrace_graph_hash,
-					lockdep_is_held(&graph_lock));
-			rcu_assign_pointer(ftrace_graph_hash, new_hash);
-		} else {
-			old_hash = rcu_dereference_protected(ftrace_graph_notrace_hash,
-					lockdep_is_held(&graph_lock));
-			rcu_assign_pointer(ftrace_graph_notrace_hash, new_hash);
-		}
-
-		mutex_unlock(&graph_lock);
-
-		/* Wait till all users are no longer using the old hash */
-		synchronize_sched();
-
-		free_ftrace_hash(old_hash);
+					    parser->buffer);
+		trace_parser_clear(parser);
 	}
 
 	if (!ret)
 		ret = read;
 
-	trace_parser_put(&parser);
-
 	return ret;
 }
 

From 4c7384131c8d343c0bf79abac3b3e78596d85b10 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 8 Feb 2017 13:36:37 -0500
Subject: [PATCH 16/25] tracing: Have COMM event filter key be treated as a
 string

The GLOB operation "~" should be able to work with the COMM filter key in
order to trace programs with a glob. For example

  echo 'COMM ~ "systemd*"' > events/syscalls/filter

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index afbec961eab1..d2d068b36341 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1312,7 +1312,8 @@ static inline bool is_string_field(struct ftrace_event_field *field)
 {
 	return field->filter_type == FILTER_DYN_STRING ||
 	       field->filter_type == FILTER_STATIC_STRING ||
-	       field->filter_type == FILTER_PTR_STRING;
+	       field->filter_type == FILTER_PTR_STRING ||
+	       field->filter_type == FILTER_COMM;
 }
 
 static inline bool is_function_field(struct ftrace_event_field *field)

From 1f9b3546cf4c273b6d809003244d05cf0460a5e9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 9 Feb 2017 17:53:50 -0500
Subject: [PATCH 17/25] tracing: Have traceprobe_probes_write() not access
 userspace unnecessarily

The code in traceprobe_probes_write() reads up to 4096 bytes from userpace
for each line. If userspace passes in several lines to execute, the code
will do a large read for each line, even though, it is highly likely that
the first read from userspace received all of the lines at once.

I changed the logic to do a single read from userspace, and to only read
from userspace again if not all of the read from userspace made it in.

I tested this by adding printk()s and writing files that would test -1, ==,
and +1 the buffer size, to make sure that there's no overflows and that if a
single line is written with +1 the buffer size, that it fails properly.

Link: http://lkml.kernel.org/r/20170209180458.5c829ab2@gandalf.local.home

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_probe.c | 48 +++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 8c0553d9afd3..2a06f1fa7001 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -647,7 +647,7 @@ ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
 				size_t count, loff_t *ppos,
 				int (*createfn)(int, char **))
 {
-	char *kbuf, *tmp;
+	char *kbuf, *buf, *tmp;
 	int ret = 0;
 	size_t done = 0;
 	size_t size;
@@ -667,27 +667,37 @@ ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
 			goto out;
 		}
 		kbuf[size] = '\0';
-		tmp = strchr(kbuf, '\n');
+		buf = kbuf;
+		do {
+			tmp = strchr(buf, '\n');
+			if (tmp) {
+				*tmp = '\0';
+				size = tmp - buf + 1;
+			} else {
+				size = strlen(buf);
+				if (done + size < count) {
+					if (buf != kbuf)
+						break;
+					pr_warn("Line length is too long: Should be less than %d\n",
+						WRITE_BUFSIZE);
+					ret = -EINVAL;
+					goto out;
+				}
+			}
+			done += size;
 
-		if (tmp) {
-			*tmp = '\0';
-			size = tmp - kbuf + 1;
-		} else if (done + size < count) {
-			pr_warn("Line length is too long: Should be less than %d\n",
-				WRITE_BUFSIZE);
-			ret = -EINVAL;
-			goto out;
-		}
-		done += size;
-		/* Remove comments */
-		tmp = strchr(kbuf, '#');
+			/* Remove comments */
+			tmp = strchr(buf, '#');
 
-		if (tmp)
-			*tmp = '\0';
+			if (tmp)
+				*tmp = '\0';
 
-		ret = traceprobe_command(kbuf, createfn);
-		if (ret)
-			goto out;
+			ret = traceprobe_command(buf, createfn);
+			if (ret)
+				goto out;
+			buf += size;
+
+		} while (done < count);
 	}
 	ret = done;
 

From 8a58a34ba479d42b3ef28f8ffd9e245a81a7786f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Feb 2017 16:41:15 +0100
Subject: [PATCH 18/25] timers: Make flags output in the timer_start tracepoint
 useful

The timer flags in the timer_start trace event contain lots of useful
information, but the meaning is not clear in the trace output. Making tools
rely on the bit positions is bad as they might change over time.

Decode the flags in the print out. Tools can retrieve the bits and their
meaning from the trace format file.

Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1702101639290.4036@nanos

Requested-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/timer.h        |  2 ++
 include/trace/events/timer.h | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 51d601f192d4..a17f915f9456 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -66,6 +66,8 @@ struct timer_list {
 #define TIMER_ARRAYSHIFT	22
 #define TIMER_ARRAYMASK		0xFFC00000
 
+#define TIMER_TRACE_FLAGMASK	(TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE)
+
 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
 		.entry = { .next = TIMER_ENTRY_STATIC },	\
 		.function = (_function),			\
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 1448637616d6..f6d8fea6df77 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -36,6 +36,13 @@ DEFINE_EVENT(timer_class, timer_init,
 	TP_ARGS(timer)
 );
 
+#define decode_timer_flags(flags)			\
+	__print_flags(flags, "|",			\
+		{  TIMER_MIGRATING,	"M" },		\
+		{  TIMER_DEFERRABLE,	"D" },		\
+		{  TIMER_PINNED,	"P" },		\
+		{  TIMER_IRQSAFE,	"I" })
+
 /**
  * timer_start - called when the timer is started
  * @timer:	pointer to struct timer_list
@@ -65,9 +72,12 @@ TRACE_EVENT(timer_start,
 		__entry->flags		= flags;
 	),
 
-	TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x",
+	TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] cpu=%u idx=%u flags=%s",
 		  __entry->timer, __entry->function, __entry->expires,
-		  (long)__entry->expires - __entry->now, __entry->flags)
+		  (long)__entry->expires - __entry->now,
+		  __entry->flags & TIMER_CPUMASK,
+		  __entry->flags >> TIMER_ARRAYSHIFT,
+		  decode_timer_flags(__entry->flags & TIMER_TRACE_FLAGMASK))
 );
 
 /**

From 8e0f11429aec1119b17cbfcfa2d3194c8e864a26 Mon Sep 17 00:00:00 2001
From: Luiz Capitulino <lcapitulino@redhat.com>
Date: Mon, 13 Feb 2017 12:25:17 -0500
Subject: [PATCH 19/25] tracing/hwlat: Update old comment about migration

The ftrace hwlat does support a cpumask.

Link: http://lkml.kernel.org/r/20170213122517.6e211955@redhat.com

Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_hwlat.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 775569ec50d0..bf209d2657ab 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -322,10 +322,7 @@ static void move_to_next_cpu(void)
  * need to ensure nothing else might be running (and thus preempting).
  * Obviously this should never be used in production environments.
  *
- * Currently this runs on which ever CPU it was scheduled on, but most
- * real-world hardware latency situations occur across several CPUs,
- * but we might later generalize this if we find there are any actualy
- * systems with alternate SMI delivery or other hardware latencies.
+ * Executes one loop interaction on each CPU in tracing_cpumask sysfs file.
  */
 static int kthread_fn(void *data)
 {

From 7257634135c247de235f3cdfdaa22f9eb5f054e4 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 7 Feb 2017 20:21:28 +0900
Subject: [PATCH 20/25] tracing/probe: Show subsystem name in messages

Show "trace_probe:", "trace_kprobe:" and "trace_uprobe:"
headers for each warning/error/info message. This will
help people to notice that kprobe/uprobe events caused
those messages.

Link: http://lkml.kernel.org/r/148646647813.24658.16705315294927615333.stgit@devbox

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c | 1 +
 kernel/trace/trace_probe.c  | 1 +
 kernel/trace/trace_uprobe.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a133ecd741e4..5c9bd0550542 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -16,6 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+#define pr_fmt(fmt)	"trace_kprobe: " fmt
 
 #include <linux/module.h>
 #include <linux/uaccess.h>
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 2a06f1fa7001..11b63328d6fb 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -21,6 +21,7 @@
  * Copyright (C) IBM Corporation, 2010-2011
  * Author:     Srikar Dronamraju
  */
+#define pr_fmt(fmt)	"trace_probe: " fmt
 
 #include "trace_probe.h"
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 4f2ba2bb11e0..f4379e772171 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -17,6 +17,7 @@
  * Copyright (C) IBM Corporation, 2010-2012
  * Author:	Srikar Dronamraju <srikar@linux.vnet.ibm.com>
  */
+#define pr_fmt(fmt)	"trace_kprobe: " fmt
 
 #include <linux/module.h>
 #include <linux/uaccess.h>

From 3821fd35b58dba449bd894014fbf4e1c43c9e951 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Fri, 3 Feb 2017 15:42:24 -0500
Subject: [PATCH 21/25] jump_label: Reduce the size of struct static_key

The static_key->next field goes mostly unused. The field is used for
associating module uses with a static key. Most uses of struct static_key
define a static key in the core kernel and make use of it entirely within
the core kernel, or define the static key in a module and make use of it
only from within that module. In fact, of the ~3,000 static keys defined,
I found only about 5 or so that did not fit this pattern.

Thus, we can remove the static_key->next field entirely and overload
the static_key->entries field. That is, when all the static_key uses
are contained within the same module, static_key->entries continues
to point to those uses. However, if the static_key uses are not contained
within the module where the static_key is defined, then we allocate a
struct static_key_mod, store a pointer to the uses within that
struct static_key_mod, and have the static key point at the static_key_mod.
This does incur some extra memory usage when a static_key is used in a
module that does not define it, but since there are only a handful of such
cases there is a net savings.

In order to identify if the static_key->entries pointer contains a
struct static_key_mod or a struct jump_entry pointer, bit 1 of
static_key->entries is set to 1 if it points to a struct static_key_mod and
is 0 if it points to a struct jump_entry. We were already using bit 0 in a
similar way to store the initial value of the static_key. This does mean
that allocations of struct static_key_mod and that the struct jump_entry
tables need to be at least 4-byte aligned in memory. As far as I can tell
all arches meet this criteria.

For my .config, the patch increased the text by 778 bytes, but reduced
the data + bss size by 14912, for a net savings of 14,134 bytes.

   text	   data	    bss	    dec	    hex	filename
8092427	5016512	 790528	13899467	 d416cb	vmlinux.pre
8093205	5001600	 790528	13885333	 d3df95	vmlinux.post

Link: http://lkml.kernel.org/r/1486154544-4321-1-git-send-email-jbaron@akamai.com

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/static-keys.txt |   4 +-
 include/linux/jump_label.h    |  23 +++--
 kernel/jump_label.c           | 153 ++++++++++++++++++++++++++++------
 3 files changed, 145 insertions(+), 35 deletions(-)

diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index ea8d7b4e53f0..32a25fad0c1b 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -155,7 +155,9 @@ or:
 
 There are a few functions and macros that architectures must implement in order
 to take advantage of this optimization. If there is no architecture support, we
-simply fall back to a traditional, load, test, and jump sequence.
+simply fall back to a traditional, load, test, and jump sequence. Also, the
+struct jump_entry table must be at least 4-byte aligned because the
+static_key->entry field makes use of the two least significant bits.
 
 * select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig
 
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a0547c571800..680c98b2f41c 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -89,11 +89,17 @@ extern bool static_key_initialized;
 
 struct static_key {
 	atomic_t enabled;
-/* Set lsb bit to 1 if branch is default true, 0 ot */
-	struct jump_entry *entries;
-#ifdef CONFIG_MODULES
-	struct static_key_mod *next;
-#endif
+/*
+ * bit 0 => 1 if key is initially true
+ *	    0 if initially false
+ * bit 1 => 1 if points to struct static_key_mod
+ *	    0 if points to struct jump_entry
+ */
+	union {
+		unsigned long type;
+		struct jump_entry *entries;
+		struct static_key_mod *next;
+	};
 };
 
 #else
@@ -118,9 +124,10 @@ struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_TYPE_FALSE	0UL
-#define JUMP_TYPE_TRUE	1UL
-#define JUMP_TYPE_MASK	1UL
+#define JUMP_TYPE_FALSE		0UL
+#define JUMP_TYPE_TRUE		1UL
+#define JUMP_TYPE_LINKED	2UL
+#define JUMP_TYPE_MASK		3UL
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 93ad6c1fb9b6..953411f5ba7f 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -229,12 +229,28 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry
 
 static inline struct jump_entry *static_key_entries(struct static_key *key)
 {
-	return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
+	WARN_ON_ONCE(key->type & JUMP_TYPE_LINKED);
+	return (struct jump_entry *)(key->type & ~JUMP_TYPE_MASK);
 }
 
 static inline bool static_key_type(struct static_key *key)
 {
-	return (unsigned long)key->entries & JUMP_TYPE_MASK;
+	return key->type & JUMP_TYPE_TRUE;
+}
+
+static inline bool static_key_linked(struct static_key *key)
+{
+	return key->type & JUMP_TYPE_LINKED;
+}
+
+static inline void static_key_clear_linked(struct static_key *key)
+{
+	key->type &= ~JUMP_TYPE_LINKED;
+}
+
+static inline void static_key_set_linked(struct static_key *key)
+{
+	key->type |= JUMP_TYPE_LINKED;
 }
 
 static inline struct static_key *jump_entry_key(struct jump_entry *entry)
@@ -247,6 +263,26 @@ static bool jump_entry_branch(struct jump_entry *entry)
 	return (unsigned long)entry->key & 1UL;
 }
 
+/***
+ * A 'struct static_key' uses a union such that it either points directly
+ * to a table of 'struct jump_entry' or to a linked list of modules which in
+ * turn point to 'struct jump_entry' tables.
+ *
+ * The two lower bits of the pointer are used to keep track of which pointer
+ * type is in use and to store the initial branch direction, we use an access
+ * function which preserves these bits.
+ */
+static void static_key_set_entries(struct static_key *key,
+				   struct jump_entry *entries)
+{
+	unsigned long type;
+
+	WARN_ON_ONCE((unsigned long)entries & JUMP_TYPE_MASK);
+	type = key->type & JUMP_TYPE_MASK;
+	key->entries = entries;
+	key->type |= type;
+}
+
 static enum jump_label_type jump_label_type(struct jump_entry *entry)
 {
 	struct static_key *key = jump_entry_key(entry);
@@ -306,13 +342,7 @@ void __init jump_label_init(void)
 			continue;
 
 		key = iterk;
-		/*
-		 * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
-		 */
-		*((unsigned long *)&key->entries) += (unsigned long)iter;
-#ifdef CONFIG_MODULES
-		key->next = NULL;
-#endif
+		static_key_set_entries(key, iter);
 	}
 	static_key_initialized = true;
 	jump_label_unlock();
@@ -336,6 +366,29 @@ struct static_key_mod {
 	struct module *mod;
 };
 
+static inline struct static_key_mod *static_key_mod(struct static_key *key)
+{
+	WARN_ON_ONCE(!(key->type & JUMP_TYPE_LINKED));
+	return (struct static_key_mod *)(key->type & ~JUMP_TYPE_MASK);
+}
+
+/***
+ * key->type and key->next are the same via union.
+ * This sets key->next and preserves the type bits.
+ *
+ * See additional comments above static_key_set_entries().
+ */
+static void static_key_set_mod(struct static_key *key,
+			       struct static_key_mod *mod)
+{
+	unsigned long type;
+
+	WARN_ON_ONCE((unsigned long)mod & JUMP_TYPE_MASK);
+	type = key->type & JUMP_TYPE_MASK;
+	key->next = mod;
+	key->type |= type;
+}
+
 static int __jump_label_mod_text_reserved(void *start, void *end)
 {
 	struct module *mod;
@@ -358,11 +411,23 @@ static void __jump_label_mod_update(struct static_key *key)
 {
 	struct static_key_mod *mod;
 
-	for (mod = key->next; mod; mod = mod->next) {
-		struct module *m = mod->mod;
+	for (mod = static_key_mod(key); mod; mod = mod->next) {
+		struct jump_entry *stop;
+		struct module *m;
 
-		__jump_label_update(key, mod->entries,
-				    m->jump_entries + m->num_jump_entries);
+		/*
+		 * NULL if the static_key is defined in a module
+		 * that does not use it
+		 */
+		if (!mod->entries)
+			continue;
+
+		m = mod->mod;
+		if (!m)
+			stop = __stop___jump_table;
+		else
+			stop = m->jump_entries + m->num_jump_entries;
+		__jump_label_update(key, mod->entries, stop);
 	}
 }
 
@@ -397,7 +462,7 @@ static int jump_label_add_module(struct module *mod)
 	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
 	struct jump_entry *iter;
 	struct static_key *key = NULL;
-	struct static_key_mod *jlm;
+	struct static_key_mod *jlm, *jlm2;
 
 	/* if the module doesn't have jump label entries, just return */
 	if (iter_start == iter_stop)
@@ -414,20 +479,32 @@ static int jump_label_add_module(struct module *mod)
 
 		key = iterk;
 		if (within_module(iter->key, mod)) {
-			/*
-			 * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
-			 */
-			*((unsigned long *)&key->entries) += (unsigned long)iter;
-			key->next = NULL;
+			static_key_set_entries(key, iter);
 			continue;
 		}
 		jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL);
 		if (!jlm)
 			return -ENOMEM;
+		if (!static_key_linked(key)) {
+			jlm2 = kzalloc(sizeof(struct static_key_mod),
+				       GFP_KERNEL);
+			if (!jlm2) {
+				kfree(jlm);
+				return -ENOMEM;
+			}
+			preempt_disable();
+			jlm2->mod = __module_address((unsigned long)key);
+			preempt_enable();
+			jlm2->entries = static_key_entries(key);
+			jlm2->next = NULL;
+			static_key_set_mod(key, jlm2);
+			static_key_set_linked(key);
+		}
 		jlm->mod = mod;
 		jlm->entries = iter;
-		jlm->next = key->next;
-		key->next = jlm;
+		jlm->next = static_key_mod(key);
+		static_key_set_mod(key, jlm);
+		static_key_set_linked(key);
 
 		/* Only update if we've changed from our initial state */
 		if (jump_label_type(iter) != jump_label_init_type(iter))
@@ -454,16 +531,34 @@ static void jump_label_del_module(struct module *mod)
 		if (within_module(iter->key, mod))
 			continue;
 
+		/* No memory during module load */
+		if (WARN_ON(!static_key_linked(key)))
+			continue;
+
 		prev = &key->next;
-		jlm = key->next;
+		jlm = static_key_mod(key);
 
 		while (jlm && jlm->mod != mod) {
 			prev = &jlm->next;
 			jlm = jlm->next;
 		}
 
-		if (jlm) {
+		/* No memory during module load */
+		if (WARN_ON(!jlm))
+			continue;
+
+		if (prev == &key->next)
+			static_key_set_mod(key, jlm->next);
+		else
 			*prev = jlm->next;
+
+		kfree(jlm);
+
+		jlm = static_key_mod(key);
+		/* if only one etry is left, fold it back into the static_key */
+		if (jlm->next == NULL) {
+			static_key_set_entries(key, jlm->entries);
+			static_key_clear_linked(key);
 			kfree(jlm);
 		}
 	}
@@ -492,8 +587,10 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 	case MODULE_STATE_COMING:
 		jump_label_lock();
 		ret = jump_label_add_module(mod);
-		if (ret)
+		if (ret) {
+			WARN(1, "Failed to allocatote memory: jump_label may not work properly.\n");
 			jump_label_del_module(mod);
+		}
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_GOING:
@@ -554,11 +651,14 @@ int jump_label_text_reserved(void *start, void *end)
 static void jump_label_update(struct static_key *key)
 {
 	struct jump_entry *stop = __stop___jump_table;
-	struct jump_entry *entry = static_key_entries(key);
+	struct jump_entry *entry;
 #ifdef CONFIG_MODULES
 	struct module *mod;
 
-	__jump_label_mod_update(key);
+	if (static_key_linked(key)) {
+		__jump_label_mod_update(key);
+		return;
+	}
 
 	preempt_disable();
 	mod = __module_address((unsigned long)key);
@@ -566,6 +666,7 @@ static void jump_label_update(struct static_key *key)
 		stop = mod->jump_entries + mod->num_jump_entries;
 	preempt_enable();
 #endif
+	entry = static_key_entries(key);
 	/* if there are no users, entry can be NULL */
 	if (entry)
 		__jump_label_update(key, entry, stop);

From eb583cd484f9a76c3716c0539bd06340943eeff9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 23 Jan 2017 13:24:45 +0100
Subject: [PATCH 22/25] tracing: Use modern function declaration

We get a lot of harmless warnings about this header file at W=1 level
because of an unusual function declaration:

kernel/trace/trace.h:766:1: error: 'inline' is not at beginning of declaration [-Werror=old-style-declaration]

This moves the inline statement where it normally belongs, avoiding the
warning.

Link: http://lkml.kernel.org/r/20170123122521.3389010-1-arnd@arndb.de

Fixes: 4046bf023b06 ("ftrace: Expose ftrace_hash_empty and ftrace_lookup_ip")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d2d068b36341..ae1cce91fead 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -763,7 +763,7 @@ struct ftrace_hash {
 struct ftrace_func_entry *
 ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip);
 
-static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash)
+static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
 {
 	return !hash || !hash->count;
 }

From 8f0994bb8cbde5452e58ce0cacdbf6cb58079d01 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 12 Jan 2017 13:55:02 +0000
Subject: [PATCH 23/25] tracing: Fix return value check in
 trace_benchmark_reg()

In case of error, the function kthread_run() returns ERR_PTR() and never
returns NULL. The NULL test in the return value check should be replaced
with IS_ERR().

Link: http://lkml.kernel.org/r/20170112135502.28556-1-weiyj.lk@gmail.com

Cc: stable@vger.kernel.org
Fixes: 81dc9f0e ("tracing: Add tracepoint benchmark tracepoint")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_benchmark.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index e3b488825ae3..e49fbe901cfc 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -175,9 +175,9 @@ int trace_benchmark_reg(void)
 
 	bm_event_thread = kthread_run(benchmark_event_kthread,
 				      NULL, "event_benchmark");
-	if (!bm_event_thread) {
+	if (IS_ERR(bm_event_thread)) {
 		pr_warning("trace benchmark failed to create kernel thread\n");
-		return -ENOMEM;
+		return PTR_ERR(bm_event_thread);
 	}
 
 	return 0;

From bef5da60595f5706232e2502876f8545743b9b41 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 10 Feb 2017 22:21:55 +0900
Subject: [PATCH 24/25] tracing/probes: Fix a warning message to show correct
 maximum length

Since tracing/*probe_events will accept a probe definition
up to 4096 - 2 ('\n' and '\0') bytes, it must show 4094 instead
of 4096 in warning message.

Note that there is one possible case of exceed 4094. If user
prepare 4096 bytes null-terminated string and syscall write
it with the count == 4095, then it can be accepted. However,
if user puts a '\n' after that, it must rejected.
So IMHO, the warning message should indicate shorter one,
since it is safer.

Link: http://lkml.kernel.org/r/148673290462.2579.7966778294009665632.stgit@devbox

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_probe.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 11b63328d6fb..52478f033f88 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -679,8 +679,9 @@ ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer,
 				if (done + size < count) {
 					if (buf != kbuf)
 						break;
+					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
 					pr_warn("Line length is too long: Should be less than %d\n",
-						WRITE_BUFSIZE);
+						WRITE_BUFSIZE - 2);
 					ret = -EINVAL;
 					goto out;
 				}

From 67d04bb2bcbd3e99f4c4daa58599c90a83ad314a Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Thu, 16 Feb 2017 20:10:58 -0800
Subject: [PATCH 25/25] tracing: Remove outdated ring buffer comment

The comment about ring buffer's organization is outdated and the code sits
elsewhere, remove the comment.
Link: http://lkml.kernel.org/r/20170217041058.23904-1-joelaf@google.com

Cc: Ingo Molnar <mingo@redhat.com>

Signed-off-by: Joel Fernandes <joelaf@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4589b67168fc..54e3b8711aca 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -260,16 +260,8 @@ unsigned long long ns2usecs(u64 nsec)
 	TRACE_ITER_EVENT_FORK
 
 /*
- * The global_trace is the descriptor that holds the tracing
- * buffers for the live tracing. For each CPU, it contains
- * a link list of pages that will store trace entries. The
- * page descriptor of the pages in the memory is used to hold
- * the link list by linking the lru item in the page descriptor
- * to each of the pages in the buffer per CPU.
- *
- * For each active CPU there is a data field that holds the
- * pages for the buffer for that CPU. Each CPU has the same number
- * of pages allocated for its buffer.
+ * The global_trace is the descriptor that holds the top-level tracing
+ * buffers for the live tracing.
  */
 static struct trace_array global_trace = {
 	.trace_flags = TRACE_DEFAULT_FLAGS,