nohz: Track last do_timer() cpu
The previous patch which limits the sleep time to the maximum deferment time of the time keeping clocksource has some limitations on SMP machines: if all CPUs are idle then for all CPUs the maximum sleep time is limited. Solve this by keeping track of which cpu had the do_timer() duty assigned last and limit the sleep time only for this cpu. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> LKML-Reference: <new-submission> Cc: Jon Hunter <jon-hunter@ti.com> Cc: John Stultz <johnstul@us.ibm.com>
This commit is contained in:
parent
98962465ed
commit
27185016b8
@ -43,6 +43,7 @@ enum tick_nohz_mode {
|
|||||||
* @idle_exittime: Time when the idle state was left
|
* @idle_exittime: Time when the idle state was left
|
||||||
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
|
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
|
||||||
* @sleep_length: Duration of the current idle sleep
|
* @sleep_length: Duration of the current idle sleep
|
||||||
|
* @do_timer_lst: CPU was the last one doing do_timer before going idle
|
||||||
*/
|
*/
|
||||||
struct tick_sched {
|
struct tick_sched {
|
||||||
struct hrtimer sched_timer;
|
struct hrtimer sched_timer;
|
||||||
@ -64,6 +65,7 @@ struct tick_sched {
|
|||||||
unsigned long last_jiffies;
|
unsigned long last_jiffies;
|
||||||
unsigned long next_jiffies;
|
unsigned long next_jiffies;
|
||||||
ktime_t idle_expires;
|
ktime_t idle_expires;
|
||||||
|
int do_timer_last;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void __init tick_init(void);
|
extern void __init tick_init(void);
|
||||||
|
@ -263,17 +263,7 @@ void tick_nohz_stop_sched_tick(int inidle)
|
|||||||
seq = read_seqbegin(&xtime_lock);
|
seq = read_seqbegin(&xtime_lock);
|
||||||
last_update = last_jiffies_update;
|
last_update = last_jiffies_update;
|
||||||
last_jiffies = jiffies;
|
last_jiffies = jiffies;
|
||||||
|
time_delta = timekeeping_max_deferment();
|
||||||
/*
|
|
||||||
* On SMP we really should only care for the CPU which
|
|
||||||
* has the do_timer duty assigned. All other CPUs can
|
|
||||||
* sleep as long as they want.
|
|
||||||
*/
|
|
||||||
if (cpu == tick_do_timer_cpu ||
|
|
||||||
tick_do_timer_cpu == TICK_DO_TIMER_NONE)
|
|
||||||
time_delta = timekeeping_max_deferment();
|
|
||||||
else
|
|
||||||
time_delta = KTIME_MAX;
|
|
||||||
} while (read_seqretry(&xtime_lock, seq));
|
} while (read_seqretry(&xtime_lock, seq));
|
||||||
|
|
||||||
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
|
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
|
||||||
@ -295,6 +285,29 @@ void tick_nohz_stop_sched_tick(int inidle)
|
|||||||
/* Schedule the tick, if we are at least one jiffie off */
|
/* Schedule the tick, if we are at least one jiffie off */
|
||||||
if ((long)delta_jiffies >= 1) {
|
if ((long)delta_jiffies >= 1) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this cpu is the one which updates jiffies, then
|
||||||
|
* give up the assignment and let it be taken by the
|
||||||
|
* cpu which runs the tick timer next, which might be
|
||||||
|
* this cpu as well. If we don't drop this here the
|
||||||
|
* jiffies might be stale and do_timer() never
|
||||||
|
* invoked. Keep track of the fact that it was the one
|
||||||
|
* which had the do_timer() duty last. If this cpu is
|
||||||
|
* the one which had the do_timer() duty last, we
|
||||||
|
* limit the sleep time to the timekeeping
|
||||||
|
* max_deferement value which we retrieved
|
||||||
|
* above. Otherwise we can sleep as long as we want.
|
||||||
|
*/
|
||||||
|
if (cpu == tick_do_timer_cpu) {
|
||||||
|
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
|
||||||
|
ts->do_timer_last = 1;
|
||||||
|
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
|
||||||
|
time_delta = KTIME_MAX;
|
||||||
|
ts->do_timer_last = 0;
|
||||||
|
} else if (!ts->do_timer_last) {
|
||||||
|
time_delta = KTIME_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* calculate the expiry time for the next timer wheel
|
* calculate the expiry time for the next timer wheel
|
||||||
* timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
|
* timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
|
||||||
@ -312,21 +325,12 @@ void tick_nohz_stop_sched_tick(int inidle)
|
|||||||
*/
|
*/
|
||||||
time_delta = min_t(u64, time_delta,
|
time_delta = min_t(u64, time_delta,
|
||||||
tick_period.tv64 * delta_jiffies);
|
tick_period.tv64 * delta_jiffies);
|
||||||
expires = ktime_add_ns(last_update, time_delta);
|
|
||||||
} else {
|
|
||||||
expires.tv64 = KTIME_MAX;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
if (time_delta < KTIME_MAX)
|
||||||
* If this cpu is the one which updates jiffies, then
|
expires = ktime_add_ns(last_update, time_delta);
|
||||||
* give up the assignment and let it be taken by the
|
else
|
||||||
* cpu which runs the tick timer next, which might be
|
expires.tv64 = KTIME_MAX;
|
||||||
* this cpu as well. If we don't drop this here the
|
|
||||||
* jiffies might be stale and do_timer() never
|
|
||||||
* invoked.
|
|
||||||
*/
|
|
||||||
if (cpu == tick_do_timer_cpu)
|
|
||||||
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
|
|
||||||
|
|
||||||
if (delta_jiffies > 1)
|
if (delta_jiffies > 1)
|
||||||
cpumask_set_cpu(cpu, nohz_cpu_mask);
|
cpumask_set_cpu(cpu, nohz_cpu_mask);
|
||||||
|
Loading…
Reference in New Issue
Block a user