From e12d0271774fea9fddf1e2a7952a0bffb2ee8e8b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 10 May 2013 17:12:28 -0400
Subject: [PATCH 1/5] nohz: Warn if the machine can not perform nohz_full

If the user configures NO_HZ_FULL and defines nohz_full=XXX on the
kernel command line, or enables NO_HZ_FULL_ALL, but nohz fails
due to the machine having a unstable clock, warn about it.

We do not want users thinking that they are getting the benefit
of nohz when their machine can not support it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/time/tick-sched.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f4208138fbf4..d87d22cb9bf2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -178,6 +178,11 @@ static bool can_stop_full_tick(void)
 	 */
 	if (!sched_clock_stable) {
 		trace_tick_stop(0, "unstable sched clock\n");
+		/*
+		 * Don't allow the user to think they can get
+		 * full NO_HZ with this machine.
+		 */
+		WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock");
 		return false;
 	}
 #endif

From b8900bc0217fac8e68085997bee2f05e6db931a2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 6 Jun 2013 15:42:53 +0200
Subject: [PATCH 2/5] watchdog: Register / unregister watchdog kthreads on
 sysctl control

The user activation/deactivation of the watchdog through boot parameters
or systcl is currently implemented with a dance involving kthreads parking
and unparking methods: the threads are unconditionally registered on
boot and they park as soon as the user want the watchdog to be disabled.

This method involves a few noisy details to handle though: the watchdog
kthreads may be unparked anytime due to hotplug operations, after which
the watchdog internals have to decide to park again if it is user-disabled.

As a result the setup() and unpark() methods need to be able to request a
reparking. This is not currently supported in the kthread infrastructure
so this piece of the watchdog code only works halfway.

Besides, unparking/reparking the watchdog kthreads consume unnecessary
cputime on hotplug operations when those could be simply ignored in the
first place.

As suggested by Srivatsa, let's instead only register the watchdog
threads when they are needed. This way we don't need to think about
hotplug operations and we don't burden the CPU onlining when the watchdog
is simply disabled.

Suggested-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Anish Singh <anish198519851985@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Don Zickus <dzickus@redhat.com>
---
 kernel/watchdog.c | 93 +++++++++++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 43 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 05039e348f07..52c9a9b91bdd 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -31,7 +31,7 @@
 
 int watchdog_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
-static int __read_mostly watchdog_disabled;
+static int __read_mostly watchdog_disabled = 1;
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -347,11 +347,6 @@ static void watchdog_enable(unsigned int cpu)
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = watchdog_timer_fn;
 
-	if (!watchdog_enabled) {
-		kthread_park(current);
-		return;
-	}
-
 	/* Enable the perf event */
 	watchdog_nmi_enable(cpu);
 
@@ -374,6 +369,11 @@ static void watchdog_disable(unsigned int cpu)
 	watchdog_nmi_disable(cpu);
 }
 
+static void watchdog_cleanup(unsigned int cpu, bool online)
+{
+	watchdog_disable(cpu);
+}
+
 static int watchdog_should_run(unsigned int cpu)
 {
 	return __this_cpu_read(hrtimer_interrupts) !=
@@ -475,28 +475,40 @@ static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
 static void watchdog_nmi_disable(unsigned int cpu) { return; }
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
+static struct smp_hotplug_thread watchdog_threads = {
+	.store			= &softlockup_watchdog,
+	.thread_should_run	= watchdog_should_run,
+	.thread_fn		= watchdog,
+	.thread_comm		= "watchdog/%u",
+	.setup			= watchdog_enable,
+	.cleanup		= watchdog_cleanup,
+	.park			= watchdog_disable,
+	.unpark			= watchdog_enable,
+};
+
+static int watchdog_enable_all_cpus(void)
+{
+	int err = 0;
+
+	if (watchdog_disabled) {
+		err = smpboot_register_percpu_thread(&watchdog_threads);
+		if (err)
+			pr_err("Failed to create watchdog threads, disabled\n");
+		else
+			watchdog_disabled = 0;
+	}
+
+	return err;
+}
+
 /* prepare/enable/disable routines */
 /* sysctl functions */
 #ifdef CONFIG_SYSCTL
-static void watchdog_enable_all_cpus(void)
-{
-	unsigned int cpu;
-
-	if (watchdog_disabled) {
-		watchdog_disabled = 0;
-		for_each_online_cpu(cpu)
-			kthread_unpark(per_cpu(softlockup_watchdog, cpu));
-	}
-}
-
 static void watchdog_disable_all_cpus(void)
 {
-	unsigned int cpu;
-
 	if (!watchdog_disabled) {
 		watchdog_disabled = 1;
-		for_each_online_cpu(cpu)
-			kthread_park(per_cpu(softlockup_watchdog, cpu));
+		smpboot_unregister_percpu_thread(&watchdog_threads);
 	}
 }
 
@@ -507,14 +519,14 @@ static void watchdog_disable_all_cpus(void)
 int proc_dowatchdog(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int ret;
+	int err, old_thresh, old_enabled;
 
-	if (watchdog_disabled < 0)
-		return -ENODEV;
+	old_thresh = ACCESS_ONCE(watchdog_thresh);
+	old_enabled = ACCESS_ONCE(watchdog_enabled);
 
-	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-	if (ret || !write)
-		return ret;
+	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (err || !write)
+		return err;
 
 	set_sample_period();
 	/*
@@ -523,29 +535,24 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 	 * watchdog_*_all_cpus() function takes care of this.
 	 */
 	if (watchdog_enabled && watchdog_thresh)
-		watchdog_enable_all_cpus();
+		err = watchdog_enable_all_cpus();
 	else
 		watchdog_disable_all_cpus();
 
-	return ret;
+	/* Restore old values on failure */
+	if (err) {
+		watchdog_thresh = old_thresh;
+		watchdog_enabled = old_enabled;
+	}
+
+	return err;
 }
 #endif /* CONFIG_SYSCTL */
 
-static struct smp_hotplug_thread watchdog_threads = {
-	.store			= &softlockup_watchdog,
-	.thread_should_run	= watchdog_should_run,
-	.thread_fn		= watchdog,
-	.thread_comm		= "watchdog/%u",
-	.setup			= watchdog_enable,
-	.park			= watchdog_disable,
-	.unpark			= watchdog_enable,
-};
-
 void __init lockup_detector_init(void)
 {
 	set_sample_period();
-	if (smpboot_register_percpu_thread(&watchdog_threads)) {
-		pr_err("Failed to create watchdog threads, disabled\n");
-		watchdog_disabled = -ENODEV;
-	}
+
+	if (watchdog_enabled)
+		watchdog_enable_all_cpus();
 }

From 3c00ea82c724fab0b98f15428a804cb45eb9ad38 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 19 May 2013 20:45:15 +0200
Subject: [PATCH 3/5] watchdog: Rename confusing state variable

We have two very conflicting state variable names in the
watchdog:

* watchdog_enabled: This one reflects the user interface. It's
set to 1 by default and can be overriden with boot options
or sysctl/procfs interface.

* watchdog_disabled: This is the internal toggle state that
tells if watchdog threads, timers and NMI events are currently
running or not. This state mostly depends on the user settings.
It's a convenient state latch.

Now we really need to find clearer names because those
are just too confusing to encourage deep review.

watchdog_enabled now becomes watchdog_user_enabled to reflect
its purpose as an interface.

watchdog_disabled becomes watchdog_running to suggest its
role as a pure internal state.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Anish Singh <anish198519851985@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Don Zickus <dzickus@redhat.com>
---
 include/linux/nmi.h |  2 +-
 kernel/sysctl.c     |  4 ++--
 kernel/watchdog.c   | 30 +++++++++++++++---------------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index db50840e6355..6a45fb583ff1 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -46,7 +46,7 @@ static inline bool trigger_all_cpu_backtrace(void)
 #ifdef CONFIG_LOCKUP_DETECTOR
 int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
-extern int watchdog_enabled;
+extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 struct ctl_table;
 extern int proc_dowatchdog(struct ctl_table *, int ,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9edcf456e0fc..b0805652c4ff 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -801,7 +801,7 @@ static struct ctl_table kern_table[] = {
 #if defined(CONFIG_LOCKUP_DETECTOR)
 	{
 		.procname       = "watchdog",
-		.data           = &watchdog_enabled,
+		.data           = &watchdog_user_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
 		.proc_handler   = proc_dowatchdog,
@@ -828,7 +828,7 @@ static struct ctl_table kern_table[] = {
 	},
 	{
 		.procname       = "nmi_watchdog",
-		.data           = &watchdog_enabled,
+		.data           = &watchdog_user_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
 		.proc_handler   = proc_dowatchdog,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 52c9a9b91bdd..51c4f34d258e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,9 +29,9 @@
 #include <linux/kvm_para.h>
 #include <linux/perf_event.h>
 
-int watchdog_enabled = 1;
+int watchdog_user_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
-static int __read_mostly watchdog_disabled = 1;
+static int __read_mostly watchdog_running;
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -63,7 +63,7 @@ static int __init hardlockup_panic_setup(char *str)
 	else if (!strncmp(str, "nopanic", 7))
 		hardlockup_panic = 0;
 	else if (!strncmp(str, "0", 1))
-		watchdog_enabled = 0;
+		watchdog_user_enabled = 0;
 	return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -82,7 +82,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
 
 static int __init nowatchdog_setup(char *str)
 {
-	watchdog_enabled = 0;
+	watchdog_user_enabled = 0;
 	return 1;
 }
 __setup("nowatchdog", nowatchdog_setup);
@@ -90,7 +90,7 @@ __setup("nowatchdog", nowatchdog_setup);
 /* deprecated */
 static int __init nosoftlockup_setup(char *str)
 {
-	watchdog_enabled = 0;
+	watchdog_user_enabled = 0;
 	return 1;
 }
 __setup("nosoftlockup", nosoftlockup_setup);
@@ -158,7 +158,7 @@ void touch_all_softlockup_watchdogs(void)
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 void touch_nmi_watchdog(void)
 {
-	if (watchdog_enabled) {
+	if (watchdog_user_enabled) {
 		unsigned cpu;
 
 		for_each_present_cpu(cpu) {
@@ -490,12 +490,12 @@ static int watchdog_enable_all_cpus(void)
 {
 	int err = 0;
 
-	if (watchdog_disabled) {
+	if (!watchdog_running) {
 		err = smpboot_register_percpu_thread(&watchdog_threads);
 		if (err)
 			pr_err("Failed to create watchdog threads, disabled\n");
 		else
-			watchdog_disabled = 0;
+			watchdog_running = 1;
 	}
 
 	return err;
@@ -506,8 +506,8 @@ static int watchdog_enable_all_cpus(void)
 #ifdef CONFIG_SYSCTL
 static void watchdog_disable_all_cpus(void)
 {
-	if (!watchdog_disabled) {
-		watchdog_disabled = 1;
+	if (watchdog_running) {
+		watchdog_running = 0;
 		smpboot_unregister_percpu_thread(&watchdog_threads);
 	}
 }
@@ -522,7 +522,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 	int err, old_thresh, old_enabled;
 
 	old_thresh = ACCESS_ONCE(watchdog_thresh);
-	old_enabled = ACCESS_ONCE(watchdog_enabled);
+	old_enabled = ACCESS_ONCE(watchdog_user_enabled);
 
 	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (err || !write)
@@ -531,10 +531,10 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 	set_sample_period();
 	/*
 	 * Watchdog threads shouldn't be enabled if they are
-	 * disabled. The 'watchdog_disabled' variable check in
+	 * disabled. The 'watchdog_running' variable check in
 	 * watchdog_*_all_cpus() function takes care of this.
 	 */
-	if (watchdog_enabled && watchdog_thresh)
+	if (watchdog_user_enabled && watchdog_thresh)
 		err = watchdog_enable_all_cpus();
 	else
 		watchdog_disable_all_cpus();
@@ -542,7 +542,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 	/* Restore old values on failure */
 	if (err) {
 		watchdog_thresh = old_thresh;
-		watchdog_enabled = old_enabled;
+		watchdog_user_enabled = old_enabled;
 	}
 
 	return err;
@@ -553,6 +553,6 @@ void __init lockup_detector_init(void)
 {
 	set_sample_period();
 
-	if (watchdog_enabled)
+	if (watchdog_user_enabled)
 		watchdog_enable_all_cpus();
 }

From 940be35ac0139530d7554aa2352a8388e3d4adca Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 7 Jun 2013 13:35:42 +0200
Subject: [PATCH 4/5] watchdog: Boot-disable by default on full dynticks

When the watchdog runs, it prevents the full dynticks
CPUs from stopping their tick because the hard lockup
detector uses perf events internally, which in turn
rely on the periodic tick.

Since this is a rather confusing behaviour that is not
easy to track down and identify for those who want to
test CONFIG_NO_HZ_FULL, let's default disable the
watchdog on boot time when full dynticks is enabled.

The user can still enable it later on runtime using
proc or sysctl.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Anish Singh <anish198519851985@gmail.com>
---
 kernel/watchdog.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 51c4f34d258e..1241d8c91d5e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -553,6 +553,14 @@ void __init lockup_detector_init(void)
 {
 	set_sample_period();
 
+#ifdef CONFIG_NO_HZ_FULL
+	if (watchdog_user_enabled) {
+		watchdog_user_enabled = 0;
+		pr_warning("Disabled lockup detectors by default for full dynticks\n");
+		pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n");
+	}
+#endif
+
 	if (watchdog_user_enabled)
 		watchdog_enable_all_cpus();
 }

From 5b8621a68fdcd2baf1d3b413726f913a5254d46a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 8 Jun 2013 13:47:31 +0200
Subject: [PATCH 5/5] nohz: Remove obsolete check for full dynticks CPUs to be
 RCU nocbs

Building full dynticks now implies that all CPUs are forced
into RCU nocb mode through CONFIG_RCU_NOCB_CPU_ALL.

The dynamic check has become useless.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
---
 kernel/time/tick-sched.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d87d22cb9bf2..b15750139260 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -351,16 +351,6 @@ void __init tick_nohz_init(void)
 	}
 
 	cpu_notifier(tick_nohz_cpu_down_callback, 0);
-
-	/* Make sure full dynticks CPU are also RCU nocbs */
-	for_each_cpu(cpu, nohz_full_mask) {
-		if (!rcu_is_nocb_cpu(cpu)) {
-			pr_warning("NO_HZ: CPU %d is not RCU nocb: "
-				   "cleared from nohz_full range", cpu);
-			cpumask_clear_cpu(cpu, nohz_full_mask);
-		}
-	}
-
 	cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
 	pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
 }