From e8fcaa5c54e3b0371230e5d43a6f650c667da9c5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 7 Aug 2013 22:28:01 +0200 Subject: [PATCH 1/6] nohz: Convert a few places to use local per cpu accesses A few functions use remote per CPU access APIs when they deal with local values. Just do the right conversion to improve performance, code readability and debug checks. While at it, lets extend some of these function names with *_this_cpu() suffix in order to display their purpose more clearly. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- include/linux/tick.h | 6 +++--- kernel/softirq.c | 4 +--- kernel/time/tick-broadcast.c | 6 +++--- kernel/time/tick-internal.h | 4 ++-- kernel/time/tick-sched.c | 39 +++++++++++++++--------------------- 5 files changed, 25 insertions(+), 34 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 5128d33bbb39..a004f66a6cf0 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -104,7 +104,7 @@ extern struct cpumask *tick_get_broadcast_oneshot_mask(void); extern void tick_clock_notify(void); extern int tick_check_oneshot_change(int allow_nohz); extern struct tick_sched *tick_get_tick_sched(int cpu); -extern void tick_check_idle(int cpu); +extern void tick_check_idle(void); extern int tick_oneshot_mode_active(void); # ifndef arch_needs_cpu # define arch_needs_cpu(cpu) (0) @@ -112,7 +112,7 @@ extern int tick_oneshot_mode_active(void); # else static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -static inline void tick_check_idle(int cpu) { } +static inline void tick_check_idle(void) { } static inline int tick_oneshot_mode_active(void) { return 0; } # endif @@ -121,7 +121,7 @@ static inline void tick_init(void) { } static inline void tick_cancel_sched_timer(int cpu) { } static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -static inline void tick_check_idle(int cpu) { } +static inline void tick_check_idle(void) { } static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 11025ccc06dd..11348de09400 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -311,8 +311,6 @@ asmlinkage void do_softirq(void) */ void irq_enter(void) { - int cpu = smp_processor_id(); - rcu_irq_enter(); if (is_idle_task(current) && !in_interrupt()) { /* @@ -320,7 +318,7 @@ void irq_enter(void) * here, as softirq will be serviced on return from interrupt. */ local_bh_disable(); - tick_check_idle(cpu); + tick_check_idle(); _local_bh_enable(); } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 9532690daaa9..43780ab5e279 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -538,10 +538,10 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc) * Called from irq_enter() when idle was interrupted to reenable the * per cpu device. */ -void tick_check_oneshot_broadcast(int cpu) +void tick_check_oneshot_broadcast_this_cpu(void) { - if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { - struct tick_device *td = &per_cpu(tick_cpu_device, cpu); + if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { + struct tick_device *td = &__get_cpu_var(tick_cpu_device); /* * We might be in the middle of switching over from diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 18e71f7fbc2a..e2bced59b6dd 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -51,7 +51,7 @@ extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); -extern void tick_check_oneshot_broadcast(int cpu); +extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); # else /* BROADCAST */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) @@ -62,7 +62,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } -static inline void tick_check_oneshot_broadcast(int cpu) { } +static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return true; } # endif /* !BROADCAST */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3612fc77f834..2afd43fca93b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -391,11 +391,9 @@ __setup("nohz=", setup_tick_nohz); */ static void tick_nohz_update_jiffies(ktime_t now) { - int cpu = smp_processor_id(); - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long flags; - ts->idle_waketime = now; + __this_cpu_write(tick_cpu_sched.idle_waketime, now); local_irq_save(flags); tick_do_update_jiffies64(now); @@ -426,17 +424,15 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda } -static void tick_nohz_stop_idle(int cpu, ktime_t now) +static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) { - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - - update_ts_time_stats(cpu, ts, now, NULL); + update_ts_time_stats(smp_processor_id(), ts, now, NULL); ts->idle_active = 0; sched_clock_idle_wakeup_event(0); } -static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) +static ktime_t tick_nohz_start_idle(struct tick_sched *ts) { ktime_t now = ktime_get(); @@ -752,7 +748,7 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) ktime_t now, expires; int cpu = smp_processor_id(); - now = tick_nohz_start_idle(cpu, ts); + now = tick_nohz_start_idle(ts); if (can_stop_idle_tick(cpu, ts)) { int was_stopped = ts->tick_stopped; @@ -914,8 +910,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) */ void tick_nohz_idle_exit(void) { - int cpu = smp_processor_id(); - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ktime_t now; local_irq_disable(); @@ -928,7 +923,7 @@ void tick_nohz_idle_exit(void) now = ktime_get(); if (ts->idle_active) - tick_nohz_stop_idle(cpu, now); + tick_nohz_stop_idle(ts, now); if (ts->tick_stopped) { tick_nohz_restart_sched_tick(ts, now); @@ -1012,12 +1007,10 @@ static void tick_nohz_switch_to_nohz(void) * timer and do not touch the other magic bits which need to be done * when idle is left. */ -static void tick_nohz_kick_tick(int cpu, ktime_t now) +static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now) { #if 0 /* Switch back to 2.6.27 behaviour */ - - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t delta; /* @@ -1032,36 +1025,36 @@ static void tick_nohz_kick_tick(int cpu, ktime_t now) #endif } -static inline void tick_check_nohz(int cpu) +static inline void tick_check_nohz_this_cpu(void) { - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ktime_t now; if (!ts->idle_active && !ts->tick_stopped) return; now = ktime_get(); if (ts->idle_active) - tick_nohz_stop_idle(cpu, now); + tick_nohz_stop_idle(ts, now); if (ts->tick_stopped) { tick_nohz_update_jiffies(now); - tick_nohz_kick_tick(cpu, now); + tick_nohz_kick_tick(ts, now); } } #else static inline void tick_nohz_switch_to_nohz(void) { } -static inline void tick_check_nohz(int cpu) { } +static inline void tick_check_nohz_this_cpu(void) { } #endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() */ -void tick_check_idle(int cpu) +void tick_check_idle(void) { - tick_check_oneshot_broadcast(cpu); - tick_check_nohz(cpu); + tick_check_oneshot_broadcast_this_cpu(); + tick_check_nohz_this_cpu(); } /* From 99c8b1ea0972be82ce1842d830e0173e70907065 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 24 Oct 2013 10:07:47 -0400 Subject: [PATCH 2/6] trivial: fix spelling in CONTEXT_TRACKING_FORCE help text Signed-off-by: Paul Gortmaker Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 79383d3aa5dc..12d61f82e5f7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -532,7 +532,7 @@ config CONTEXT_TRACKING_FORCE dynticks subsystem by forcing the context tracking on all CPUs in the system. - Say Y only if you're working on the developpement of an + Say Y only if you're working on the development of an architecture backend for the context tracking. Say N otherwise, this option brings an overhead that you From 58135f574f1b791c926622387780ed3d090116d6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 Nov 2013 14:45:57 +0100 Subject: [PATCH 3/6] context_tracking: Wrap static key check into more intuitive function name Use a function with a meaningful name to check the global context tracking state. static_key_false() is a bit confusing for reviewers. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- include/linux/context_tracking.h | 10 +++++----- include/linux/context_tracking_state.h | 4 ++++ include/linux/tick.h | 2 +- include/linux/vtime.h | 2 +- kernel/context_tracking.c | 8 ++++---- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 158158704c30..37b81bd51ec0 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -17,13 +17,13 @@ extern void __context_tracking_task_switch(struct task_struct *prev, static inline void user_enter(void) { - if (static_key_false(&context_tracking_enabled)) + if (context_tracking_is_enabled()) context_tracking_user_enter(); } static inline void user_exit(void) { - if (static_key_false(&context_tracking_enabled)) + if (context_tracking_is_enabled()) context_tracking_user_exit(); } @@ -31,7 +31,7 @@ static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (!static_key_false(&context_tracking_enabled)) + if (!context_tracking_is_enabled()) return 0; prev_ctx = this_cpu_read(context_tracking.state); @@ -42,7 +42,7 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { - if (static_key_false(&context_tracking_enabled)) { + if (context_tracking_is_enabled()) { if (prev_ctx == IN_USER) context_tracking_user_enter(); } @@ -51,7 +51,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) static inline void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { - if (static_key_false(&context_tracking_enabled)) + if (context_tracking_is_enabled()) __context_tracking_task_switch(prev, next); } #else diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 0f1979d0674f..0db535b79be7 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -22,6 +22,10 @@ struct context_tracking { extern struct static_key context_tracking_enabled; DECLARE_PER_CPU(struct context_tracking, context_tracking); +static inline bool context_tracking_is_enabled(void) +{ + return static_key_false(&context_tracking_enabled); +} static inline bool context_tracking_in_user(void) { return __this_cpu_read(context_tracking.state) == IN_USER; diff --git a/include/linux/tick.h b/include/linux/tick.h index a004f66a6cf0..0175d8663b6c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -165,7 +165,7 @@ extern cpumask_var_t tick_nohz_full_mask; static inline bool tick_nohz_full_enabled(void) { - if (!static_key_false(&context_tracking_enabled)) + if (!context_tracking_is_enabled()) return false; return tick_nohz_full_running; diff --git a/include/linux/vtime.h b/include/linux/vtime.h index f5b72b364bda..807c732cbf29 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -19,7 +19,7 @@ static inline bool vtime_accounting_enabled(void) { return true; } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static inline bool vtime_accounting_enabled(void) { - if (static_key_false(&context_tracking_enabled)) { + if (context_tracking_is_enabled()) { if (context_tracking_active()) return true; } diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index e5f3917aa05b..6cb20d2e7ee0 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -53,10 +53,10 @@ void context_tracking_user_enter(void) /* * Repeat the user_enter() check here because some archs may be calling * this from asm and if no CPU needs context tracking, they shouldn't - * go further. Repeat the check here until they support the static key - * check. + * go further. Repeat the check here until they support the inline static + * key check. */ - if (!static_key_false(&context_tracking_enabled)) + if (!context_tracking_is_enabled()) return; /* @@ -160,7 +160,7 @@ void context_tracking_user_exit(void) { unsigned long flags; - if (!static_key_false(&context_tracking_enabled)) + if (!context_tracking_is_enabled()) return; if (in_interrupt()) From d0df09ebfc126b23c1005f98ddecc9907f9c5d25 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 Nov 2013 15:11:57 +0100 Subject: [PATCH 4/6] context_tracking: Rename context_tracking_active() to context_tracking_cpu_is_enabled() We currently have a confusing couple of API naming with the existing context_tracking_active() and context_tracking_is_enabled(). Lets keep the latter one, context_tracking_is_enabled(), for global context tracking state check and use context_tracking_cpu_is_enabled() for local state check. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- include/linux/context_tracking_state.h | 11 ++++++----- include/linux/vtime.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 0db535b79be7..97a81225d037 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -26,15 +26,16 @@ static inline bool context_tracking_is_enabled(void) { return static_key_false(&context_tracking_enabled); } + +static inline bool context_tracking_cpu_is_enabled(void) +{ + return __this_cpu_read(context_tracking.active); +} + static inline bool context_tracking_in_user(void) { return __this_cpu_read(context_tracking.state) == IN_USER; } - -static inline bool context_tracking_active(void) -{ - return __this_cpu_read(context_tracking.active); -} #else static inline bool context_tracking_in_user(void) { return false; } static inline bool context_tracking_active(void) { return false; } diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 807c732cbf29..c5165fd256f9 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -20,7 +20,7 @@ static inline bool vtime_accounting_enabled(void) { return true; } static inline bool vtime_accounting_enabled(void) { if (context_tracking_is_enabled()) { - if (context_tracking_active()) + if (context_tracking_cpu_is_enabled()) return true; } From d4283c654130c3d01b6842d3821dbdc3c15ceb46 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 Nov 2013 15:42:04 +0100 Subject: [PATCH 5/6] posix-timers: Spare workqueue if there is no full dynticks CPU to kick After a posix cpu timer is set, a workqueue is scheduled in order to kick the full dynticks CPUs and let them restart their tick if necessary in case the task they are running is concerned by the new timer. This kick is implemented by way of IPIs, which require interrupts to be enabled, hence the need for a workqueue to raise them because the posix cpu timer set path has interrupts disabled. Now if there is no full dynticks CPU on the system, the workqueue is still scheduled but it simply won't send any IPI and return immediately. So lets spare that worqueue when it is not needed. Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt --- kernel/posix-cpu-timers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c7f31aa272f7..35509c5a3ffb 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -608,7 +608,8 @@ static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn); */ static void posix_cpu_timer_kick_nohz(void) { - schedule_work(&nohz_kick_work); + if (context_tracking_is_enabled()) + schedule_work(&nohz_kick_work); } bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk) From c925077c33fc9a546e7cf6c3be2adf4a2afe2608 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 6 Nov 2013 17:18:30 +0100 Subject: [PATCH 6/6] posix-timers: Fix full dynticks CPUs kick on timer rescheduling A posix CPU timer can be rearmed while it is firing or after it is notified with a signal. This can happen for example with timers that were set with a non zero interval in timer_settime(). This rearming can happen in two places: 1) On timer firing time, which happens on the target's tick. If the timer can't trigger a signal because it is ignored, it reschedules itself to honour the timer interval. 2) On signal handling from the timer's notification target. This one can be a different task than the timer's target itself. Once the signal is notified, the notification target rearms the timer, again to honour the timer interval. When a timer is rearmed, we need to notify the full dynticks CPUs such that they restart their tick in case they are running tasks that may have a share in elapsing this timer. Now the 1st case above handles full dynticks CPUs with a call to posix_cpu_timer_kick_nohz() from the posix cpu timer firing code. But the second case ignores the fact that some CPUs may run non-idle tasks with their tick off. As a result, when a timer is resheduled after its signal notification, the full dynticks CPUs may completely ignore it and not tick on the timer as expected This patch fixes this bug by handling both cases in one. All we need is to move the kick to the rearming common code in posix_cpu_timer_schedule(). Signed-off-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Olivier Langlois --- kernel/posix-cpu-timers.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 35509c5a3ffb..79747b7d9420 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1091,7 +1091,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) put_task_struct(p); timer->it.cpu.task = p = NULL; timer->it.cpu.expires = 0; - goto out_unlock; + read_unlock(&tasklist_lock); + goto out; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { /* * We've noticed that the thread is dead, but @@ -1100,7 +1101,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ cpu_timer_sample_group(timer->it_clock, p, &now); clear_dead_task(timer, now); - goto out_unlock; + read_unlock(&tasklist_lock); + goto out; } spin_lock(&p->sighand->siglock); cpu_timer_sample_group(timer->it_clock, p, &now); @@ -1114,10 +1116,11 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) BUG_ON(!irqs_disabled()); arm_timer(timer); spin_unlock(&p->sighand->siglock); - -out_unlock: read_unlock(&tasklist_lock); + /* Kick full dynticks CPUs in case they need to tick on the new timer */ + posix_cpu_timer_kick_nohz(); + out: timer->it_overrun_last = timer->it_overrun; timer->it_overrun = -1; @@ -1257,13 +1260,6 @@ void run_posix_cpu_timers(struct task_struct *tsk) cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } - - /* - * In case some timers were rescheduled after the queue got emptied, - * wake up full dynticks CPUs. - */ - if (tsk->signal->cputimer.running) - posix_cpu_timer_kick_nohz(); } /*