rcu: Introduce CONFIG_RCU_EXP_CPU_STALL_TIMEOUT

Currently both expedited and regular grace period stall warnings use
a single timeout value that with units of seconds.  However, recent
Android use cases problem require a sub-100-millisecond expedited RCU CPU
stall warning.  Given that expedited RCU grace periods normally complete
in far less than a single millisecond, especially for small systems,
this is not unreasonable.

Therefore introduce the CONFIG_RCU_EXP_CPU_STALL_TIMEOUT kernel
configuration that defaults to 20 msec on Android and remains the same
as that of the non-expedited stall warnings otherwise.  It also can be
changed in run-time via: /sys/.../parameters/rcu_exp_cpu_stall_timeout.

[ paulmck: Default of zero to use CONFIG_RCU_STALL_TIMEOUT. ]

Signed-off-by: Uladzislau Rezki <uladzislau.rezki@sony.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
This commit is contained in:
Uladzislau Rezki 2022-02-16 14:52:09 +01:00 committed by Paul E. McKenney
parent 3123109284
commit 28b3ae4265
7 changed files with 80 additions and 2 deletions

View File

@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
Stall-warning messages may be enabled and disabled completely via Stall-warning messages may be enabled and disabled completely via
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress. /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
--------------------------------
Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
the expedited grace period. This parameter defines the period
of time that RCU will wait from the beginning of an expedited
grace period until it issues an RCU CPU stall warning. This time
period is normally 20 milliseconds on Android devices. A zero
value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
after conversion to milliseconds.
This configuration parameter may be changed at runtime via the
/sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
this parameter is checked only at the beginning of a cycle. If you
are in a current stall cycle, setting it to a new value will change
the timeout for the -next- stall.
Stall-warning messages may be enabled and disabled completely via
/sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
RCU_STALL_DELAY_DELTA RCU_STALL_DELAY_DELTA
--------------------- ---------------------

View File

@ -4893,6 +4893,18 @@
rcupdate.rcu_cpu_stall_timeout= [KNL] rcupdate.rcu_cpu_stall_timeout= [KNL]
Set timeout for RCU CPU stall warning messages. Set timeout for RCU CPU stall warning messages.
The value is in seconds and the maximum allowed
value is 300 seconds.
rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
Set timeout for expedited RCU CPU stall warning
messages. The value is in milliseconds
and the maximum allowed value is 21000
milliseconds. Please note that this value is
adjusted to an arch timer tick resolution.
Setting this to zero causes the value from
rcupdate.rcu_cpu_stall_timeout to be used (after
conversion from seconds to milliseconds).
rcupdate.rcu_expedited= [KNL] rcupdate.rcu_expedited= [KNL]
Use expedited grace-period primitives, for Use expedited grace-period primitives, for

View File

@ -91,6 +91,20 @@ config RCU_CPU_STALL_TIMEOUT
RCU grace period persists, additional CPU stall warnings are RCU grace period persists, additional CPU stall warnings are
printed at more widely spaced intervals. printed at more widely spaced intervals.
config RCU_EXP_CPU_STALL_TIMEOUT
int "Expedited RCU CPU stall timeout in milliseconds"
depends on RCU_STALL_COMMON
range 0 21000
default 20 if ANDROID
default 0 if !ANDROID
help
If a given expedited RCU grace period extends more than the
specified number of milliseconds, a CPU stall warning is printed.
If the RCU grace period persists, additional CPU stall warnings
are printed at more widely spaced intervals. A value of zero
says to use the RCU_CPU_STALL_TIMEOUT value converted from
seconds to milliseconds.
config RCU_TRACE config RCU_TRACE
bool "Enable tracing for RCU" bool "Enable tracing for RCU"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL

View File

@ -210,7 +210,9 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
extern int rcu_cpu_stall_ftrace_dump; extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress; extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout; extern int rcu_cpu_stall_timeout;
extern int rcu_exp_cpu_stall_timeout;
int rcu_jiffies_till_stall_check(void); int rcu_jiffies_till_stall_check(void);
int rcu_exp_jiffies_till_stall_check(void);
static inline bool rcu_stall_is_suppressed(void) static inline bool rcu_stall_is_suppressed(void)
{ {

View File

@ -496,7 +496,7 @@ static void synchronize_rcu_expedited_wait(void)
struct rcu_node *rnp_root = rcu_get_root(); struct rcu_node *rnp_root = rcu_get_root();
trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait")); trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
jiffies_stall = rcu_jiffies_till_stall_check(); jiffies_stall = rcu_exp_jiffies_till_stall_check();
jiffies_start = jiffies; jiffies_start = jiffies;
if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) { if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
if (synchronize_rcu_expedited_wait_once(1)) if (synchronize_rcu_expedited_wait_once(1))
@ -571,7 +571,7 @@ static void synchronize_rcu_expedited_wait(void)
dump_cpu_task(cpu); dump_cpu_task(cpu);
} }
} }
jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3; jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
} }
} }

View File

@ -25,6 +25,34 @@ int sysctl_max_rcu_stall_to_panic __read_mostly;
#define RCU_STALL_MIGHT_DIV 8 #define RCU_STALL_MIGHT_DIV 8
#define RCU_STALL_MIGHT_MIN (2 * HZ) #define RCU_STALL_MIGHT_MIN (2 * HZ)
int rcu_exp_jiffies_till_stall_check(void)
{
int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout);
int exp_stall_delay_delta = 0;
int till_stall_check;
// Zero says to use rcu_cpu_stall_timeout, but in milliseconds.
if (!cpu_stall_timeout)
cpu_stall_timeout = jiffies_to_msecs(rcu_jiffies_till_stall_check());
// Limit check must be consistent with the Kconfig limits for
// CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
// The minimum clamped value is "2UL", because at least one full
// tick has to be guaranteed.
till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ);
if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
#ifdef CONFIG_PROVE_RCU
/* Add extra ~25% out of till_stall_check. */
exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1;
#endif
return till_stall_check + exp_stall_delay_delta;
}
EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check);
/* Limit-check stall timeouts specified at boottime and runtime. */ /* Limit-check stall timeouts specified at boottime and runtime. */
int rcu_jiffies_till_stall_check(void) int rcu_jiffies_till_stall_check(void)
{ {

View File

@ -506,6 +506,8 @@ EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
module_param(rcu_cpu_stall_suppress, int, 0644); module_param(rcu_cpu_stall_suppress, int, 0644);
int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644); module_param(rcu_cpu_stall_timeout, int, 0644);
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
// Suppress boot-time RCU CPU stall warnings and rcutorture writer stall // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall