Scheduler changes in this cycle were:
- Updates to scheduler metrics: - PELT fixes & enhancements - PSI fixes & enhancements - Refactor cpu_util_without() - Updates to instrumentation/debugging: - Remove sched_trace_*() helper functions - can be done via debug info - Fix double update_rq_clock() warnings - Introduce & use "preemption model accessors" to simplify some of the Kconfig complexity. - Make softirq handling RT-safe. - Misc smaller fixes & cleanups. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmKLvXYRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1hcXg//fJ1jAB9pQOg/Su9wwwbcOeaXNUpQA38e 970nXdK6i7w+YeAT2x1ikIQZq5S/px7k9S4Fzks8U9LMhnKPxhjdnG6J69h5XLuB z1BtRJBB6W8BAYWzAeq1M+R8whQylciOMZOBSjeTIEdpYBK7c9QA/R1DkDqPRlBA 7nW0mFbpYcK8Q1n1ItjP0wkpiHG4q8orp+BXiPG8rjiHdCa3GFt7g38hiqNls64H fOQ/Ka25tBSYrmeqQY3QsWKnKNHKQRLNareHAwI/x4V8F8d4tn/OmJzmWGDdtprn 6/gi/E99ej1j5Do8sgx/oTp/aVg4j8AsurrpGFd4/er+euoG4UyHr42UhX6zmFM6 /KIinp0Z/V2n9okgI9LUZ2x7mD682iXDilNDgiSAwu1bNDUvxBXPD30gThh+KasA HxeKxTzb4/dZV4ih4xUMsCOjUT4NFZT2rmiMorUystgyNRk28DtFCdBMtrs/zVtG qAktb7v5g76pKAmV4nQu4imZeSD+f+RJP2fuSUYQCJbCxQfthTZkn8GfCMYEdY7Y sDyBx4Te8Vu/dcnal9qMpY/m5EPruPQAkvC9zK4YvkvLUmGC742PG/xHfCdC9J2m Adbl/Cmn7tD9dOGYbHPsrViqwIiZUcjbnBlMN5DjJXQF6kWNOIXUEguZpBocminP 1CSy0+gyI6o= =GY8N -----END PGP SIGNATURE----- Merge tag 'sched-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: - Updates to scheduler metrics: - PELT fixes & enhancements - PSI fixes & enhancements - Refactor cpu_util_without() - Updates to instrumentation/debugging: - Remove sched_trace_*() helper functions - can be done via debug info - Fix double update_rq_clock() warnings - Introduce & use "preemption model accessors" to simplify some of the Kconfig complexity. - Make softirq handling RT-safe. - Misc smaller fixes & cleanups. * tag 'sched-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: topology: Remove unused cpu_cluster_mask() sched: Reverse sched_class layout sched/deadline: Remove superfluous rq clock update in push_dl_task() sched/core: Avoid obvious double update_rq_clock warning smp: Make softirq handling RT safe in flush_smp_call_function_queue() smp: Rename flush_smp_call_function_from_idle() sched: Fix missing prototype warnings sched/fair: Remove cfs_rq_tg_path() sched/fair: Remove sched_trace_*() helper functions sched/fair: Refactor cpu_util_without() sched/fair: Revise comment about lb decision matrix sched/psi: report zeroes for CPU full at the system level sched/fair: Delete useless condition in tg_unthrottle_up() sched/fair: Fix cfs_rq_clock_pelt() for throttled cfs_rq sched/fair: Move calculate of avg_load to a better location mailmap: Update my email address to @redhat.com MAINTAINERS: Add myself as scheduler topology reviewer psi: Fix trigger being fired unexpectedly at initial ftrace: Use preemption model accessors for trace header printout kcsan: Use preemption model accessors
This commit is contained in:
commit
6f3f04c190
1
.mailmap
1
.mailmap
@ -398,6 +398,7 @@ Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
|
||||
Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com>
|
||||
Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
|
||||
Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
|
||||
Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
|
||||
|
@ -37,11 +37,7 @@ Pressure interface
|
||||
Pressure information for each resource is exported through the
|
||||
respective file in /proc/pressure/ -- cpu, memory, and io.
|
||||
|
||||
The format for CPU is as such::
|
||||
|
||||
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
|
||||
|
||||
and for memory and IO::
|
||||
The format is as such::
|
||||
|
||||
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
|
||||
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
|
||||
@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is
|
||||
still doing productive work. As such, time spent in this subset of the
|
||||
stall state is tracked separately and exported in the "full" averages.
|
||||
|
||||
CPU full is undefined at the system level, but has been reported
|
||||
since 5.13, so it is set to zero for backward compatibility.
|
||||
|
||||
The ratios (in %) are tracked as recent trends over ten, sixty, and
|
||||
three hundred second windows, which gives insight into short term events
|
||||
as well as medium and long term trends. The total absolute stall time
|
||||
|
@ -17524,6 +17524,7 @@ R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
|
||||
R: Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
|
||||
R: Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
|
||||
R: Daniel Bristot de Oliveira <bristot@redhat.com> (SCHED_DEADLINE)
|
||||
R: Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
|
||||
|
@ -126,13 +126,13 @@
|
||||
*/
|
||||
#define SCHED_DATA \
|
||||
STRUCT_ALIGN(); \
|
||||
__begin_sched_classes = .; \
|
||||
*(__idle_sched_class) \
|
||||
*(__fair_sched_class) \
|
||||
*(__rt_sched_class) \
|
||||
*(__dl_sched_class) \
|
||||
__sched_class_highest = .; \
|
||||
*(__stop_sched_class) \
|
||||
__end_sched_classes = .;
|
||||
*(__dl_sched_class) \
|
||||
*(__rt_sched_class) \
|
||||
*(__fair_sched_class) \
|
||||
*(__idle_sched_class) \
|
||||
__sched_class_lowest = .;
|
||||
|
||||
/* The actual configuration determine if the init/exit sections
|
||||
* are handled as text/data or they can be discarded (which
|
||||
|
@ -589,6 +589,15 @@ struct softirq_action
|
||||
asmlinkage void do_softirq(void);
|
||||
asmlinkage void __do_softirq(void);
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
extern void do_softirq_post_smp_call_flush(unsigned int was_pending);
|
||||
#else
|
||||
static inline void do_softirq_post_smp_call_flush(unsigned int unused)
|
||||
{
|
||||
do_softirq();
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void open_softirq(int nr, void (*action)(struct softirq_action *));
|
||||
extern void softirq_init(void);
|
||||
extern void __raise_softirq_irqoff(unsigned int nr);
|
||||
|
@ -2382,20 +2382,6 @@ static inline void rseq_syscall(struct pt_regs *regs)
|
||||
|
||||
#endif
|
||||
|
||||
const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
|
||||
char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
|
||||
int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
|
||||
|
||||
const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq);
|
||||
const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq);
|
||||
const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq);
|
||||
|
||||
int sched_trace_rq_cpu(struct rq *rq);
|
||||
int sched_trace_rq_cpu_capacity(struct rq *rq);
|
||||
int sched_trace_rq_nr_running(struct rq *rq);
|
||||
|
||||
const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
extern void sched_core_free(struct task_struct *tsk);
|
||||
extern void sched_core_fork(struct task_struct *p);
|
||||
@ -2406,4 +2392,6 @@ static inline void sched_core_free(struct task_struct *tsk) { }
|
||||
static inline void sched_core_fork(struct task_struct *p) { }
|
||||
#endif
|
||||
|
||||
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
|
||||
|
||||
#endif
|
||||
|
@ -240,13 +240,6 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask)
|
||||
static inline const struct cpumask *cpu_cluster_mask(int cpu)
|
||||
{
|
||||
return topology_cluster_cpumask(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline const struct cpumask *cpu_cpu_mask(int cpu)
|
||||
{
|
||||
return cpumask_of_node(cpu_to_node(cpu));
|
||||
|
@ -1380,13 +1380,14 @@ static const void *nthreads_gen_params(const void *prev, char *desc)
|
||||
else
|
||||
nthreads *= 2;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_PREEMPT) || !IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
|
||||
if (!preempt_model_preemptible() ||
|
||||
!IS_ENABLED(CONFIG_KCSAN_INTERRUPT_WATCHER)) {
|
||||
/*
|
||||
* Without any preemption, keep 2 CPUs free for other tasks, one
|
||||
* of which is the main test case function checking for
|
||||
* completion or failure.
|
||||
*/
|
||||
const long min_unused_cpus = IS_ENABLED(CONFIG_PREEMPT_NONE) ? 2 : 0;
|
||||
const long min_unused_cpus = preempt_model_none() ? 2 : 0;
|
||||
const long min_required_cpus = 2 + min_unused_cpus;
|
||||
|
||||
if (num_online_cpus() < min_required_cpus) {
|
||||
|
@ -15,6 +15,7 @@
|
||||
/* Headers: */
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/sched/hotplug.h>
|
||||
#include <linux/sched/posix-timers.h>
|
||||
#include <linux/sched/rt.h>
|
||||
|
||||
@ -31,6 +32,7 @@
|
||||
#include <uapi/linux/sched/types.h>
|
||||
|
||||
#include "sched.h"
|
||||
#include "smp.h"
|
||||
|
||||
#include "autogroup.h"
|
||||
#include "stats.h"
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/sched/nohz.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sched/rseq_api.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
@ -26,7 +26,10 @@
|
||||
#include <linux/topology.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/cond_resched.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/hotplug.h>
|
||||
#include <linux/sched/init.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/sched/mm.h>
|
||||
@ -610,10 +613,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
|
||||
swap(rq1, rq2);
|
||||
|
||||
raw_spin_rq_lock(rq1);
|
||||
if (__rq_lockp(rq1) == __rq_lockp(rq2))
|
||||
return;
|
||||
if (__rq_lockp(rq1) != __rq_lockp(rq2))
|
||||
raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
|
||||
|
||||
raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
|
||||
double_rq_clock_clear_update(rq1, rq2);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2190,7 +2193,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
if (p->sched_class == rq->curr->sched_class)
|
||||
rq->curr->sched_class->check_preempt_curr(rq, p, flags);
|
||||
else if (p->sched_class > rq->curr->sched_class)
|
||||
else if (sched_class_above(p->sched_class, rq->curr->sched_class))
|
||||
resched_curr(rq);
|
||||
|
||||
/*
|
||||
@ -2408,7 +2411,7 @@ static int migration_cpu_stop(void *data)
|
||||
* __migrate_task() such that we will not miss enforcing cpus_ptr
|
||||
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
|
||||
*/
|
||||
flush_smp_call_function_from_idle();
|
||||
flush_smp_call_function_queue();
|
||||
|
||||
raw_spin_lock(&p->pi_lock);
|
||||
rq_lock(rq, &rf);
|
||||
@ -5689,7 +5692,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
* higher scheduling class, because otherwise those lose the
|
||||
* opportunity to pull in more work from other CPUs.
|
||||
*/
|
||||
if (likely(prev->sched_class <= &fair_sched_class &&
|
||||
if (likely(!sched_class_above(prev->sched_class, &fair_sched_class) &&
|
||||
rq->nr_running == rq->cfs.h_nr_running)) {
|
||||
|
||||
p = pick_next_task_fair(rq, prev, rf);
|
||||
@ -9469,11 +9472,11 @@ void __init sched_init(void)
|
||||
int i;
|
||||
|
||||
/* Make sure the linker didn't screw up */
|
||||
BUG_ON(&idle_sched_class + 1 != &fair_sched_class ||
|
||||
&fair_sched_class + 1 != &rt_sched_class ||
|
||||
&rt_sched_class + 1 != &dl_sched_class);
|
||||
BUG_ON(&idle_sched_class != &fair_sched_class + 1 ||
|
||||
&fair_sched_class != &rt_sched_class + 1 ||
|
||||
&rt_sched_class != &dl_sched_class + 1);
|
||||
#ifdef CONFIG_SMP
|
||||
BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
|
||||
BUG_ON(&dl_sched_class != &stop_sched_class + 1);
|
||||
#endif
|
||||
|
||||
wait_bit_init();
|
||||
|
@ -1220,8 +1220,6 @@ int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
|
||||
return (dl_se->runtime <= 0);
|
||||
}
|
||||
|
||||
extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
|
||||
|
||||
/*
|
||||
* This function implements the GRUB accounting rule:
|
||||
* according to the GRUB reclaiming algorithm, the runtime is
|
||||
@ -1832,6 +1830,7 @@ out:
|
||||
|
||||
static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
|
||||
{
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
if (READ_ONCE(p->__state) != TASK_WAKING)
|
||||
@ -1843,7 +1842,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
|
||||
* from try_to_wake_up(). Hence, p->pi_lock is locked, but
|
||||
* rq->lock is not... So, lock it
|
||||
*/
|
||||
raw_spin_rq_lock(rq);
|
||||
rq_lock(rq, &rf);
|
||||
if (p->dl.dl_non_contending) {
|
||||
update_rq_clock(rq);
|
||||
sub_running_bw(&p->dl, &rq->dl);
|
||||
@ -1859,7 +1858,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
|
||||
put_task_struct(p);
|
||||
}
|
||||
sub_rq_bw(&p->dl, &rq->dl);
|
||||
raw_spin_rq_unlock(rq);
|
||||
rq_unlock(rq, &rf);
|
||||
}
|
||||
|
||||
static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
|
||||
@ -2319,13 +2318,7 @@ retry:
|
||||
|
||||
deactivate_task(rq, next_task, 0);
|
||||
set_task_cpu(next_task, later_rq->cpu);
|
||||
|
||||
/*
|
||||
* Update the later_rq clock here, because the clock is used
|
||||
* by the cpufreq_update_util() inside __add_running_bw().
|
||||
*/
|
||||
update_rq_clock(later_rq);
|
||||
activate_task(later_rq, next_task, ENQUEUE_NOCLOCK);
|
||||
activate_task(later_rq, next_task, 0);
|
||||
ret = 1;
|
||||
|
||||
resched_curr(later_rq);
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <linux/sched/cond_resched.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/sched/nohz.h>
|
||||
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/interrupt.h>
|
||||
@ -313,19 +314,6 @@ const struct sched_class fair_sched_class;
|
||||
#define for_each_sched_entity(se) \
|
||||
for (; se; se = se->parent)
|
||||
|
||||
static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
|
||||
{
|
||||
if (!path)
|
||||
return;
|
||||
|
||||
if (cfs_rq && task_group_is_autogroup(cfs_rq->tg))
|
||||
autogroup_path(cfs_rq->tg, path, len);
|
||||
else if (cfs_rq && cfs_rq->tg->css.cgroup)
|
||||
cgroup_path(cfs_rq->tg->css.cgroup, path, len);
|
||||
else
|
||||
strlcpy(path, "(null)", len);
|
||||
}
|
||||
|
||||
static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
@ -493,12 +481,6 @@ static int se_is_idle(struct sched_entity *se)
|
||||
#define for_each_sched_entity(se) \
|
||||
for (; se; se = NULL)
|
||||
|
||||
static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
|
||||
{
|
||||
if (path)
|
||||
strlcpy(path, "(null)", len);
|
||||
}
|
||||
|
||||
static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return true;
|
||||
@ -4846,11 +4828,11 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
|
||||
|
||||
cfs_rq->throttle_count--;
|
||||
if (!cfs_rq->throttle_count) {
|
||||
cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
|
||||
cfs_rq->throttled_clock_task;
|
||||
cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
|
||||
cfs_rq->throttled_clock_pelt;
|
||||
|
||||
/* Add cfs_rq with load or one or more already running entities to the list */
|
||||
if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
|
||||
if (!cfs_rq_is_decayed(cfs_rq))
|
||||
list_add_leaf_cfs_rq(cfs_rq);
|
||||
}
|
||||
|
||||
@ -4864,7 +4846,7 @@ static int tg_throttle_down(struct task_group *tg, void *data)
|
||||
|
||||
/* group is entering throttled state, stop time */
|
||||
if (!cfs_rq->throttle_count) {
|
||||
cfs_rq->throttled_clock_task = rq_clock_task(rq);
|
||||
cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
|
||||
list_del_leaf_cfs_rq(cfs_rq);
|
||||
}
|
||||
cfs_rq->throttle_count++;
|
||||
@ -5308,7 +5290,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
|
||||
pcfs_rq = tg->parent->cfs_rq[cpu];
|
||||
|
||||
cfs_rq->throttle_count = pcfs_rq->throttle_count;
|
||||
cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
|
||||
cfs_rq->throttled_clock_pelt = rq_clock_pelt(cpu_rq(cpu));
|
||||
}
|
||||
|
||||
/* conditionally throttle active cfs_rq's from put_prev_entity() */
|
||||
@ -6543,6 +6525,68 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
return target;
|
||||
}
|
||||
|
||||
/*
|
||||
* Predicts what cpu_util(@cpu) would return if @p was removed from @cpu
|
||||
* (@dst_cpu = -1) or migrated to @dst_cpu.
|
||||
*/
|
||||
static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
/*
|
||||
* If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
|
||||
* contribution. If @p migrates from another CPU to @cpu add its
|
||||
* contribution. In all the other cases @cpu is not impacted by the
|
||||
* migration so its util_avg is already correct.
|
||||
*/
|
||||
if (task_cpu(p) == cpu && dst_cpu != cpu)
|
||||
lsub_positive(&util, task_util(p));
|
||||
else if (task_cpu(p) != cpu && dst_cpu == cpu)
|
||||
util += task_util(p);
|
||||
|
||||
if (sched_feat(UTIL_EST)) {
|
||||
unsigned long util_est;
|
||||
|
||||
util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
|
||||
|
||||
/*
|
||||
* During wake-up @p isn't enqueued yet and doesn't contribute
|
||||
* to any cpu_rq(cpu)->cfs.avg.util_est.enqueued.
|
||||
* If @dst_cpu == @cpu add it to "simulate" cpu_util after @p
|
||||
* has been enqueued.
|
||||
*
|
||||
* During exec (@dst_cpu = -1) @p is enqueued and does
|
||||
* contribute to cpu_rq(cpu)->cfs.util_est.enqueued.
|
||||
* Remove it to "simulate" cpu_util without @p's contribution.
|
||||
*
|
||||
* Despite the task_on_rq_queued(@p) check there is still a
|
||||
* small window for a possible race when an exec
|
||||
* select_task_rq_fair() races with LB's detach_task().
|
||||
*
|
||||
* detach_task()
|
||||
* deactivate_task()
|
||||
* p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
* -------------------------------- A
|
||||
* dequeue_task() \
|
||||
* dequeue_task_fair() + Race Time
|
||||
* util_est_dequeue() /
|
||||
* -------------------------------- B
|
||||
*
|
||||
* The additional check "current == p" is required to further
|
||||
* reduce the race window.
|
||||
*/
|
||||
if (dst_cpu == cpu)
|
||||
util_est += _task_util_est(p);
|
||||
else if (unlikely(task_on_rq_queued(p) || current == p))
|
||||
lsub_positive(&util_est, _task_util_est(p));
|
||||
|
||||
util = max(util, util_est);
|
||||
}
|
||||
|
||||
return min(util, capacity_orig_of(cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu_util_without: compute cpu utilization without any contributions from *p
|
||||
* @cpu: the CPU which utilization is requested
|
||||
@ -6558,116 +6602,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
*/
|
||||
static unsigned long cpu_util_without(int cpu, struct task_struct *p)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
unsigned int util;
|
||||
|
||||
/* Task has no contribution or is new */
|
||||
if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
|
||||
return cpu_util_cfs(cpu);
|
||||
|
||||
cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
/* Discount task's util from CPU's util */
|
||||
lsub_positive(&util, task_util(p));
|
||||
|
||||
/*
|
||||
* Covered cases:
|
||||
*
|
||||
* a) if *p is the only task sleeping on this CPU, then:
|
||||
* cpu_util (== task_util) > util_est (== 0)
|
||||
* and thus we return:
|
||||
* cpu_util_without = (cpu_util - task_util) = 0
|
||||
*
|
||||
* b) if other tasks are SLEEPING on this CPU, which is now exiting
|
||||
* IDLE, then:
|
||||
* cpu_util >= task_util
|
||||
* cpu_util > util_est (== 0)
|
||||
* and thus we discount *p's blocked utilization to return:
|
||||
* cpu_util_without = (cpu_util - task_util) >= 0
|
||||
*
|
||||
* c) if other tasks are RUNNABLE on that CPU and
|
||||
* util_est > cpu_util
|
||||
* then we use util_est since it returns a more restrictive
|
||||
* estimation of the spare capacity on that CPU, by just
|
||||
* considering the expected utilization of tasks already
|
||||
* runnable on that CPU.
|
||||
*
|
||||
* Cases a) and b) are covered by the above code, while case c) is
|
||||
* covered by the following code when estimated utilization is
|
||||
* enabled.
|
||||
*/
|
||||
if (sched_feat(UTIL_EST)) {
|
||||
unsigned int estimated =
|
||||
READ_ONCE(cfs_rq->avg.util_est.enqueued);
|
||||
|
||||
/*
|
||||
* Despite the following checks we still have a small window
|
||||
* for a possible race, when an execl's select_task_rq_fair()
|
||||
* races with LB's detach_task():
|
||||
*
|
||||
* detach_task()
|
||||
* p->on_rq = TASK_ON_RQ_MIGRATING;
|
||||
* ---------------------------------- A
|
||||
* deactivate_task() \
|
||||
* dequeue_task() + RaceTime
|
||||
* util_est_dequeue() /
|
||||
* ---------------------------------- B
|
||||
*
|
||||
* The additional check on "current == p" it's required to
|
||||
* properly fix the execl regression and it helps in further
|
||||
* reducing the chances for the above race.
|
||||
*/
|
||||
if (unlikely(task_on_rq_queued(p) || current == p))
|
||||
lsub_positive(&estimated, _task_util_est(p));
|
||||
|
||||
util = max(util, estimated);
|
||||
}
|
||||
|
||||
/*
|
||||
* Utilization (estimated) can exceed the CPU capacity, thus let's
|
||||
* clamp to the maximum CPU capacity to ensure consistency with
|
||||
* cpu_util.
|
||||
*/
|
||||
return min_t(unsigned long, util, capacity_orig_of(cpu));
|
||||
}
|
||||
|
||||
/*
|
||||
* Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
|
||||
* to @dst_cpu.
|
||||
*/
|
||||
static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
|
||||
unsigned long util_est, util = READ_ONCE(cfs_rq->avg.util_avg);
|
||||
|
||||
/*
|
||||
* If @p migrates from @cpu to another, remove its contribution. Or,
|
||||
* if @p migrates from another CPU to @cpu, add its contribution. In
|
||||
* the other cases, @cpu is not impacted by the migration, so the
|
||||
* util_avg should already be correct.
|
||||
*/
|
||||
if (task_cpu(p) == cpu && dst_cpu != cpu)
|
||||
lsub_positive(&util, task_util(p));
|
||||
else if (task_cpu(p) != cpu && dst_cpu == cpu)
|
||||
util += task_util(p);
|
||||
|
||||
if (sched_feat(UTIL_EST)) {
|
||||
util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
|
||||
|
||||
/*
|
||||
* During wake-up, the task isn't enqueued yet and doesn't
|
||||
* appear in the cfs_rq->avg.util_est.enqueued of any rq,
|
||||
* so just add it (if needed) to "simulate" what will be
|
||||
* cpu_util after the task has been enqueued.
|
||||
*/
|
||||
if (dst_cpu == cpu)
|
||||
util_est += _task_util_est(p);
|
||||
|
||||
util = max(util, util_est);
|
||||
}
|
||||
|
||||
return min(util, capacity_orig_of(cpu));
|
||||
return cpu_util_next(cpu, p, -1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -9460,8 +9399,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
local->avg_load = (local->group_load * SCHED_CAPACITY_SCALE) /
|
||||
local->group_capacity;
|
||||
|
||||
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
|
||||
sds->total_capacity;
|
||||
/*
|
||||
* If the local group is more loaded than the selected
|
||||
* busiest group don't try to pull any tasks.
|
||||
@ -9470,6 +9407,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
env->imbalance = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
|
||||
sds->total_capacity;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -9495,7 +9435,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
* busiest \ local has_spare fully_busy misfit asym imbalanced overloaded
|
||||
* has_spare nr_idle balanced N/A N/A balanced balanced
|
||||
* fully_busy nr_idle nr_idle N/A N/A balanced balanced
|
||||
* misfit_task force N/A N/A N/A force force
|
||||
* misfit_task force N/A N/A N/A N/A N/A
|
||||
* asym_packing force force N/A N/A force force
|
||||
* imbalanced force force N/A N/A force force
|
||||
* overloaded force force N/A N/A force avg_load
|
||||
@ -11881,101 +11821,3 @@ __init void init_sched_fair_class(void)
|
||||
#endif /* SMP */
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper functions to facilitate extracting info from tracepoints.
|
||||
*/
|
||||
|
||||
const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
return cfs_rq ? &cfs_rq->avg : NULL;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_avg);
|
||||
|
||||
char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len)
|
||||
{
|
||||
if (!cfs_rq) {
|
||||
if (str)
|
||||
strlcpy(str, "(null)", len);
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cfs_rq_tg_path(cfs_rq, str, len);
|
||||
return str;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_path);
|
||||
|
||||
int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return cfs_rq ? cpu_of(rq_of(cfs_rq)) : -1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_cpu);
|
||||
|
||||
const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
return rq ? &rq->avg_rt : NULL;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_rq_avg_rt);
|
||||
|
||||
const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
return rq ? &rq->avg_dl : NULL;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_rq_avg_dl);
|
||||
|
||||
const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq)
|
||||
{
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_HAVE_SCHED_AVG_IRQ)
|
||||
return rq ? &rq->avg_irq : NULL;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_rq_avg_irq);
|
||||
|
||||
int sched_trace_rq_cpu(struct rq *rq)
|
||||
{
|
||||
return rq ? cpu_of(rq) : -1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_rq_cpu);
|
||||
|
||||
int sched_trace_rq_cpu_capacity(struct rq *rq)
|
||||
{
|
||||
return rq ?
|
||||
#ifdef CONFIG_SMP
|
||||
rq->cpu_capacity
|
||||
#else
|
||||
SCHED_CAPACITY_SCALE
|
||||
#endif
|
||||
: -1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_rq_cpu_capacity);
|
||||
|
||||
const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
return rd ? rd->span : NULL;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_rd_span);
|
||||
|
||||
int sched_trace_rq_nr_running(struct rq *rq)
|
||||
{
|
||||
return rq ? rq->nr_running : -1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_trace_rq_nr_running);
|
||||
|
@ -327,7 +327,7 @@ static void do_idle(void)
|
||||
* RCU relies on this call to be done outside of an RCU read-side
|
||||
* critical section.
|
||||
*/
|
||||
flush_smp_call_function_from_idle();
|
||||
flush_smp_call_function_queue();
|
||||
schedule_idle();
|
||||
|
||||
if (unlikely(klp_patch_pending(current)))
|
||||
|
@ -145,9 +145,9 @@ static inline u64 rq_clock_pelt(struct rq *rq)
|
||||
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
if (unlikely(cfs_rq->throttle_count))
|
||||
return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
|
||||
return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time;
|
||||
|
||||
return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
|
||||
return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time;
|
||||
}
|
||||
#else
|
||||
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
||||
|
@ -1060,14 +1060,17 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
|
||||
mutex_unlock(&group->avgs_lock);
|
||||
|
||||
for (full = 0; full < 2; full++) {
|
||||
unsigned long avg[3];
|
||||
u64 total;
|
||||
unsigned long avg[3] = { 0, };
|
||||
u64 total = 0;
|
||||
int w;
|
||||
|
||||
for (w = 0; w < 3; w++)
|
||||
avg[w] = group->avg[res * 2 + full][w];
|
||||
total = div_u64(group->total[PSI_AVGS][res * 2 + full],
|
||||
NSEC_PER_USEC);
|
||||
/* CPU FULL is undefined at the system level */
|
||||
if (!(group == &psi_system && res == PSI_CPU && full)) {
|
||||
for (w = 0; w < 3; w++)
|
||||
avg[w] = group->avg[res * 2 + full][w];
|
||||
total = div_u64(group->total[PSI_AVGS][res * 2 + full],
|
||||
NSEC_PER_USEC);
|
||||
}
|
||||
|
||||
seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
|
||||
full ? "full" : "some",
|
||||
@ -1117,7 +1120,8 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||
t->state = state;
|
||||
t->threshold = threshold_us * NSEC_PER_USEC;
|
||||
t->win.size = window_us * NSEC_PER_USEC;
|
||||
window_reset(&t->win, 0, 0, 0);
|
||||
window_reset(&t->win, sched_clock(),
|
||||
group->total[PSI_POLL][t->state], 0);
|
||||
|
||||
t->event = 0;
|
||||
t->last_event_time = 0;
|
||||
|
@ -871,6 +871,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||
int enqueue = 0;
|
||||
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
struct rq_flags rf;
|
||||
int skip;
|
||||
|
||||
/*
|
||||
@ -885,7 +886,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||
if (skip)
|
||||
continue;
|
||||
|
||||
raw_spin_rq_lock(rq);
|
||||
rq_lock(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
|
||||
if (rt_rq->rt_time) {
|
||||
@ -923,7 +924,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||
|
||||
if (enqueue)
|
||||
sched_rt_rq_enqueue(rt_rq);
|
||||
raw_spin_rq_unlock(rq);
|
||||
rq_unlock(rq, &rf);
|
||||
}
|
||||
|
||||
if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
|
||||
|
@ -603,8 +603,8 @@ struct cfs_rq {
|
||||
s64 runtime_remaining;
|
||||
|
||||
u64 throttled_clock;
|
||||
u64 throttled_clock_task;
|
||||
u64 throttled_clock_task_time;
|
||||
u64 throttled_clock_pelt;
|
||||
u64 throttled_clock_pelt_time;
|
||||
int throttled;
|
||||
int throttle_count;
|
||||
struct list_head throttled_list;
|
||||
@ -1827,12 +1827,7 @@ static inline void dirty_sched_domain_sysctl(int cpu)
|
||||
#endif
|
||||
|
||||
extern int sched_update_scaling(void);
|
||||
|
||||
extern void flush_smp_call_function_from_idle(void);
|
||||
|
||||
#else /* !CONFIG_SMP: */
|
||||
static inline void flush_smp_call_function_from_idle(void) { }
|
||||
#endif
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#include "stats.h"
|
||||
|
||||
@ -2182,6 +2177,8 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
|
||||
*
|
||||
* include/asm-generic/vmlinux.lds.h
|
||||
*
|
||||
* *CAREFUL* they are laid out in *REVERSE* order!!!
|
||||
*
|
||||
* Also enforce alignment on the instance, not the type, to guarantee layout.
|
||||
*/
|
||||
#define DEFINE_SCHED_CLASS(name) \
|
||||
@ -2190,17 +2187,16 @@ const struct sched_class name##_sched_class \
|
||||
__section("__" #name "_sched_class")
|
||||
|
||||
/* Defined in include/asm-generic/vmlinux.lds.h */
|
||||
extern struct sched_class __begin_sched_classes[];
|
||||
extern struct sched_class __end_sched_classes[];
|
||||
|
||||
#define sched_class_highest (__end_sched_classes - 1)
|
||||
#define sched_class_lowest (__begin_sched_classes - 1)
|
||||
extern struct sched_class __sched_class_highest[];
|
||||
extern struct sched_class __sched_class_lowest[];
|
||||
|
||||
#define for_class_range(class, _from, _to) \
|
||||
for (class = (_from); class != (_to); class--)
|
||||
for (class = (_from); class < (_to); class++)
|
||||
|
||||
#define for_each_class(class) \
|
||||
for_class_range(class, sched_class_highest, sched_class_lowest)
|
||||
for_class_range(class, __sched_class_highest, __sched_class_lowest)
|
||||
|
||||
#define sched_class_above(_a, _b) ((_a) < (_b))
|
||||
|
||||
extern const struct sched_class stop_sched_class;
|
||||
extern const struct sched_class dl_sched_class;
|
||||
@ -2309,6 +2305,7 @@ extern void resched_cpu(int cpu);
|
||||
|
||||
extern struct rt_bandwidth def_rt_bandwidth;
|
||||
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
|
||||
extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
|
||||
|
||||
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
|
||||
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
|
||||
@ -2478,6 +2475,24 @@ unsigned long arch_scale_freq_capacity(int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
/*
|
||||
* In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to
|
||||
* acquire rq lock instead of rq_lock(). So at the end of these two functions
|
||||
* we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of
|
||||
* rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
|
||||
*/
|
||||
static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
|
||||
{
|
||||
rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
|
||||
/* rq1 == rq2 for !CONFIG_SMP, so just clear RQCF_UPDATED once. */
|
||||
#ifdef CONFIG_SMP
|
||||
rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
@ -2543,14 +2558,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
|
||||
__acquires(busiest->lock)
|
||||
__acquires(this_rq->lock)
|
||||
{
|
||||
if (__rq_lockp(this_rq) == __rq_lockp(busiest))
|
||||
return 0;
|
||||
|
||||
if (likely(raw_spin_rq_trylock(busiest)))
|
||||
if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
|
||||
likely(raw_spin_rq_trylock(busiest))) {
|
||||
double_rq_clock_clear_update(this_rq, busiest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (rq_order_less(this_rq, busiest)) {
|
||||
raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
|
||||
double_rq_clock_clear_update(this_rq, busiest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2644,6 +2660,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
|
||||
BUG_ON(rq1 != rq2);
|
||||
raw_spin_rq_lock(rq1);
|
||||
__acquire(rq2->lock); /* Fake it out ;) */
|
||||
double_rq_clock_clear_update(rq1, rq2);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -7,3 +7,9 @@
|
||||
extern void sched_ttwu_pending(void *arg);
|
||||
|
||||
extern void send_call_function_single_ipi(int cpu);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
extern void flush_smp_call_function_queue(void);
|
||||
#else
|
||||
static inline void flush_smp_call_function_queue(void) { }
|
||||
#endif
|
||||
|
32
kernel/smp.c
32
kernel/smp.c
@ -96,7 +96,7 @@ static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
|
||||
|
||||
static void flush_smp_call_function_queue(bool warn_cpu_offline);
|
||||
static void __flush_smp_call_function_queue(bool warn_cpu_offline);
|
||||
|
||||
int smpcfd_prepare_cpu(unsigned int cpu)
|
||||
{
|
||||
@ -141,7 +141,7 @@ int smpcfd_dying_cpu(unsigned int cpu)
|
||||
* ensure that the outgoing CPU doesn't go offline with work
|
||||
* still pending.
|
||||
*/
|
||||
flush_smp_call_function_queue(false);
|
||||
__flush_smp_call_function_queue(false);
|
||||
irq_work_run();
|
||||
return 0;
|
||||
}
|
||||
@ -544,11 +544,11 @@ void generic_smp_call_function_single_interrupt(void)
|
||||
{
|
||||
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU,
|
||||
smp_processor_id(), CFD_SEQ_GOTIPI);
|
||||
flush_smp_call_function_queue(true);
|
||||
__flush_smp_call_function_queue(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
|
||||
* __flush_smp_call_function_queue - Flush pending smp-call-function callbacks
|
||||
*
|
||||
* @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
|
||||
* offline CPU. Skip this check if set to 'false'.
|
||||
@ -561,7 +561,7 @@ void generic_smp_call_function_single_interrupt(void)
|
||||
* Loop through the call_single_queue and run all the queued callbacks.
|
||||
* Must be called with interrupts disabled.
|
||||
*/
|
||||
static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
||||
static void __flush_smp_call_function_queue(bool warn_cpu_offline)
|
||||
{
|
||||
call_single_data_t *csd, *csd_next;
|
||||
struct llist_node *entry, *prev;
|
||||
@ -684,8 +684,22 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
||||
smp_processor_id(), CFD_SEQ_HDLEND);
|
||||
}
|
||||
|
||||
void flush_smp_call_function_from_idle(void)
|
||||
|
||||
/**
|
||||
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
|
||||
* from task context (idle, migration thread)
|
||||
*
|
||||
* When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it
|
||||
* set, then remote CPUs can avoid sending IPIs and wake the idle CPU by
|
||||
* setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to
|
||||
* handle queued SMP function calls before scheduling.
|
||||
*
|
||||
* The migration thread has to ensure that an eventually pending wakeup has
|
||||
* been handled before it migrates a task.
|
||||
*/
|
||||
void flush_smp_call_function_queue(void)
|
||||
{
|
||||
unsigned int was_pending;
|
||||
unsigned long flags;
|
||||
|
||||
if (llist_empty(this_cpu_ptr(&call_single_queue)))
|
||||
@ -694,9 +708,11 @@ void flush_smp_call_function_from_idle(void)
|
||||
cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
|
||||
smp_processor_id(), CFD_SEQ_IDLE);
|
||||
local_irq_save(flags);
|
||||
flush_smp_call_function_queue(true);
|
||||
/* Get the already pending soft interrupts for RT enabled kernels */
|
||||
was_pending = local_softirq_pending();
|
||||
__flush_smp_call_function_queue(true);
|
||||
if (local_softirq_pending())
|
||||
do_softirq();
|
||||
do_softirq_post_smp_call_flush(was_pending);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
@ -294,6 +294,19 @@ static inline void invoke_softirq(void)
|
||||
wakeup_softirqd();
|
||||
}
|
||||
|
||||
/*
|
||||
* flush_smp_call_function_queue() can raise a soft interrupt in a function
|
||||
* call. On RT kernels this is undesired and the only known functionality
|
||||
* in the block layer which does this is disabled on RT. If soft interrupts
|
||||
* get raised which haven't been raised before the flush, warn so it can be
|
||||
* investigated.
|
||||
*/
|
||||
void do_softirq_post_smp_call_flush(unsigned int was_pending)
|
||||
{
|
||||
if (WARN_ON_ONCE(was_pending != local_softirq_pending()))
|
||||
invoke_softirq();
|
||||
}
|
||||
|
||||
#else /* CONFIG_PREEMPT_RT */
|
||||
|
||||
/*
|
||||
|
@ -535,8 +535,6 @@ void stop_machine_park(int cpu)
|
||||
kthread_park(stopper->thread);
|
||||
}
|
||||
|
||||
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
|
||||
|
||||
static void cpu_stop_create(unsigned int cpu)
|
||||
{
|
||||
sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
|
||||
|
@ -4289,17 +4289,11 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
|
||||
entries,
|
||||
total,
|
||||
buf->cpu,
|
||||
#if defined(CONFIG_PREEMPT_NONE)
|
||||
"server",
|
||||
#elif defined(CONFIG_PREEMPT_VOLUNTARY)
|
||||
"desktop",
|
||||
#elif defined(CONFIG_PREEMPT)
|
||||
"preempt",
|
||||
#elif defined(CONFIG_PREEMPT_RT)
|
||||
"preempt_rt",
|
||||
#else
|
||||
preempt_model_none() ? "server" :
|
||||
preempt_model_voluntary() ? "desktop" :
|
||||
preempt_model_full() ? "preempt" :
|
||||
preempt_model_rt() ? "preempt_rt" :
|
||||
"unknown",
|
||||
#endif
|
||||
/* These are reserved for later use */
|
||||
0, 0, 0, 0);
|
||||
#ifdef CONFIG_SMP
|
||||
|
Loading…
Reference in New Issue
Block a user