sched: Add core wide task selection and scheduling
Instead of only selecting a local task, select a task for all SMT siblings for every reschedule on the core (irrespective which logical CPU does the reschedule). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Don Hiatt <dhiatt@digitalocean.com> Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com> Tested-by: Vincent Guittot <vincent.guittot@linaro.org> Link: https://lkml.kernel.org/r/20210422123308.557559654@infradead.org
This commit is contained in:
parent
8a311c740b
commit
539f65125d
@ -5282,7 +5282,7 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
|
||||
* Pick up the highest-prio task:
|
||||
*/
|
||||
static inline struct task_struct *
|
||||
pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
__pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
{
|
||||
const struct sched_class *class;
|
||||
struct task_struct *p;
|
||||
@ -5323,6 +5323,294 @@ restart:
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
static inline bool is_task_rq_idle(struct task_struct *t)
|
||||
{
|
||||
return (task_rq(t)->idle == t);
|
||||
}
|
||||
|
||||
static inline bool cookie_equals(struct task_struct *a, unsigned long cookie)
|
||||
{
|
||||
return is_task_rq_idle(a) || (a->core_cookie == cookie);
|
||||
}
|
||||
|
||||
static inline bool cookie_match(struct task_struct *a, struct task_struct *b)
|
||||
{
|
||||
if (is_task_rq_idle(a) || is_task_rq_idle(b))
|
||||
return true;
|
||||
|
||||
return a->core_cookie == b->core_cookie;
|
||||
}
|
||||
|
||||
// XXX fairness/fwd progress conditions
|
||||
/*
|
||||
* Returns
|
||||
* - NULL if there is no runnable task for this class.
|
||||
* - the highest priority task for this runqueue if it matches
|
||||
* rq->core->core_cookie or its priority is greater than max.
|
||||
* - Else returns idle_task.
|
||||
*/
|
||||
static struct task_struct *
|
||||
pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max)
|
||||
{
|
||||
struct task_struct *class_pick, *cookie_pick;
|
||||
unsigned long cookie = rq->core->core_cookie;
|
||||
|
||||
class_pick = class->pick_task(rq);
|
||||
if (!class_pick)
|
||||
return NULL;
|
||||
|
||||
if (!cookie) {
|
||||
/*
|
||||
* If class_pick is tagged, return it only if it has
|
||||
* higher priority than max.
|
||||
*/
|
||||
if (max && class_pick->core_cookie &&
|
||||
prio_less(class_pick, max))
|
||||
return idle_sched_class.pick_task(rq);
|
||||
|
||||
return class_pick;
|
||||
}
|
||||
|
||||
/*
|
||||
* If class_pick is idle or matches cookie, return early.
|
||||
*/
|
||||
if (cookie_equals(class_pick, cookie))
|
||||
return class_pick;
|
||||
|
||||
cookie_pick = sched_core_find(rq, cookie);
|
||||
|
||||
/*
|
||||
* If class > max && class > cookie, it is the highest priority task on
|
||||
* the core (so far) and it must be selected, otherwise we must go with
|
||||
* the cookie pick in order to satisfy the constraint.
|
||||
*/
|
||||
if (prio_less(cookie_pick, class_pick) &&
|
||||
(!max || prio_less(max, class_pick)))
|
||||
return class_pick;
|
||||
|
||||
return cookie_pick;
|
||||
}
|
||||
|
||||
static struct task_struct *
|
||||
pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
{
|
||||
struct task_struct *next, *max = NULL;
|
||||
const struct sched_class *class;
|
||||
const struct cpumask *smt_mask;
|
||||
bool need_sync;
|
||||
int i, j, cpu;
|
||||
|
||||
if (!sched_core_enabled(rq))
|
||||
return __pick_next_task(rq, prev, rf);
|
||||
|
||||
cpu = cpu_of(rq);
|
||||
|
||||
/* Stopper task is switching into idle, no need core-wide selection. */
|
||||
if (cpu_is_offline(cpu)) {
|
||||
/*
|
||||
* Reset core_pick so that we don't enter the fastpath when
|
||||
* coming online. core_pick would already be migrated to
|
||||
* another cpu during offline.
|
||||
*/
|
||||
rq->core_pick = NULL;
|
||||
return __pick_next_task(rq, prev, rf);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there were no {en,de}queues since we picked (IOW, the task
|
||||
* pointers are all still valid), and we haven't scheduled the last
|
||||
* pick yet, do so now.
|
||||
*
|
||||
* rq->core_pick can be NULL if no selection was made for a CPU because
|
||||
* it was either offline or went offline during a sibling's core-wide
|
||||
* selection. In this case, do a core-wide selection.
|
||||
*/
|
||||
if (rq->core->core_pick_seq == rq->core->core_task_seq &&
|
||||
rq->core->core_pick_seq != rq->core_sched_seq &&
|
||||
rq->core_pick) {
|
||||
WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq);
|
||||
|
||||
next = rq->core_pick;
|
||||
if (next != prev) {
|
||||
put_prev_task(rq, prev);
|
||||
set_next_task(rq, next);
|
||||
}
|
||||
|
||||
rq->core_pick = NULL;
|
||||
return next;
|
||||
}
|
||||
|
||||
put_prev_task_balance(rq, prev, rf);
|
||||
|
||||
smt_mask = cpu_smt_mask(cpu);
|
||||
|
||||
/*
|
||||
* core->core_task_seq, core->core_pick_seq, rq->core_sched_seq
|
||||
*
|
||||
* @task_seq guards the task state ({en,de}queues)
|
||||
* @pick_seq is the @task_seq we did a selection on
|
||||
* @sched_seq is the @pick_seq we scheduled
|
||||
*
|
||||
* However, preemptions can cause multiple picks on the same task set.
|
||||
* 'Fix' this by also increasing @task_seq for every pick.
|
||||
*/
|
||||
rq->core->core_task_seq++;
|
||||
need_sync = !!rq->core->core_cookie;
|
||||
|
||||
/* reset state */
|
||||
rq->core->core_cookie = 0UL;
|
||||
for_each_cpu(i, smt_mask) {
|
||||
struct rq *rq_i = cpu_rq(i);
|
||||
|
||||
rq_i->core_pick = NULL;
|
||||
|
||||
if (rq_i->core_forceidle) {
|
||||
need_sync = true;
|
||||
rq_i->core_forceidle = false;
|
||||
}
|
||||
|
||||
if (i != cpu)
|
||||
update_rq_clock(rq_i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try and select tasks for each sibling in decending sched_class
|
||||
* order.
|
||||
*/
|
||||
for_each_class(class) {
|
||||
again:
|
||||
for_each_cpu_wrap(i, smt_mask, cpu) {
|
||||
struct rq *rq_i = cpu_rq(i);
|
||||
struct task_struct *p;
|
||||
|
||||
if (rq_i->core_pick)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If this sibling doesn't yet have a suitable task to
|
||||
* run; ask for the most elegible task, given the
|
||||
* highest priority task already selected for this
|
||||
* core.
|
||||
*/
|
||||
p = pick_task(rq_i, class, max);
|
||||
if (!p) {
|
||||
/*
|
||||
* If there weren't no cookies; we don't need to
|
||||
* bother with the other siblings.
|
||||
* If the rest of the core is not running a tagged
|
||||
* task, i.e. need_sync == 0, and the current CPU
|
||||
* which called into the schedule() loop does not
|
||||
* have any tasks for this class, skip selecting for
|
||||
* other siblings since there's no point. We don't skip
|
||||
* for RT/DL because that could make CFS force-idle RT.
|
||||
*/
|
||||
if (i == cpu && !need_sync && class == &fair_sched_class)
|
||||
goto next_class;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Optimize the 'normal' case where there aren't any
|
||||
* cookies and we don't need to sync up.
|
||||
*/
|
||||
if (i == cpu && !need_sync && !p->core_cookie) {
|
||||
next = p;
|
||||
goto done;
|
||||
}
|
||||
|
||||
rq_i->core_pick = p;
|
||||
|
||||
/*
|
||||
* If this new candidate is of higher priority than the
|
||||
* previous; and they're incompatible; we need to wipe
|
||||
* the slate and start over. pick_task makes sure that
|
||||
* p's priority is more than max if it doesn't match
|
||||
* max's cookie.
|
||||
*
|
||||
* NOTE: this is a linear max-filter and is thus bounded
|
||||
* in execution time.
|
||||
*/
|
||||
if (!max || !cookie_match(max, p)) {
|
||||
struct task_struct *old_max = max;
|
||||
|
||||
rq->core->core_cookie = p->core_cookie;
|
||||
max = p;
|
||||
|
||||
if (old_max) {
|
||||
for_each_cpu(j, smt_mask) {
|
||||
if (j == i)
|
||||
continue;
|
||||
|
||||
cpu_rq(j)->core_pick = NULL;
|
||||
}
|
||||
goto again;
|
||||
} else {
|
||||
/*
|
||||
* Once we select a task for a cpu, we
|
||||
* should not be doing an unconstrained
|
||||
* pick because it might starve a task
|
||||
* on a forced idle cpu.
|
||||
*/
|
||||
need_sync = true;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
next_class:;
|
||||
}
|
||||
|
||||
rq->core->core_pick_seq = rq->core->core_task_seq;
|
||||
next = rq->core_pick;
|
||||
rq->core_sched_seq = rq->core->core_pick_seq;
|
||||
|
||||
/* Something should have been selected for current CPU */
|
||||
WARN_ON_ONCE(!next);
|
||||
|
||||
/*
|
||||
* Reschedule siblings
|
||||
*
|
||||
* NOTE: L1TF -- at this point we're no longer running the old task and
|
||||
* sending an IPI (below) ensures the sibling will no longer be running
|
||||
* their task. This ensures there is no inter-sibling overlap between
|
||||
* non-matching user state.
|
||||
*/
|
||||
for_each_cpu(i, smt_mask) {
|
||||
struct rq *rq_i = cpu_rq(i);
|
||||
|
||||
/*
|
||||
* An online sibling might have gone offline before a task
|
||||
* could be picked for it, or it might be offline but later
|
||||
* happen to come online, but its too late and nothing was
|
||||
* picked for it. That's Ok - it will pick tasks for itself,
|
||||
* so ignore it.
|
||||
*/
|
||||
if (!rq_i->core_pick)
|
||||
continue;
|
||||
|
||||
if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running)
|
||||
rq_i->core_forceidle = true;
|
||||
|
||||
if (i == cpu) {
|
||||
rq_i->core_pick = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Did we break L1TF mitigation requirements? */
|
||||
WARN_ON_ONCE(!cookie_match(next, rq_i->core_pick));
|
||||
|
||||
if (rq_i->curr == rq_i->core_pick) {
|
||||
rq_i->core_pick = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
resched_curr(rq_i);
|
||||
}
|
||||
|
||||
done:
|
||||
set_next_task(rq, next);
|
||||
return next;
|
||||
}
|
||||
|
||||
static inline void sched_core_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
@ -5354,6 +5642,12 @@ static inline void sched_core_cpu_starting(unsigned int cpu)
|
||||
|
||||
static inline void sched_core_cpu_starting(unsigned int cpu) {}
|
||||
|
||||
static struct task_struct *
|
||||
pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
|
||||
{
|
||||
return __pick_next_task(rq, prev, rf);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SCHED_CORE */
|
||||
|
||||
/*
|
||||
@ -8609,7 +8903,12 @@ void __init sched_init(void)
|
||||
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
rq->core = NULL;
|
||||
rq->core_pick = NULL;
|
||||
rq->core_enabled = 0;
|
||||
rq->core_tree = RB_ROOT;
|
||||
rq->core_forceidle = false;
|
||||
|
||||
rq->core_cookie = 0UL;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1079,11 +1079,16 @@ struct rq {
|
||||
#ifdef CONFIG_SCHED_CORE
|
||||
/* per rq */
|
||||
struct rq *core;
|
||||
struct task_struct *core_pick;
|
||||
unsigned int core_enabled;
|
||||
unsigned int core_sched_seq;
|
||||
struct rb_root core_tree;
|
||||
unsigned char core_forceidle;
|
||||
|
||||
/* shared state */
|
||||
unsigned int core_task_seq;
|
||||
unsigned int core_pick_seq;
|
||||
unsigned long core_cookie;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -2060,7 +2065,6 @@ static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
|
||||
|
||||
static inline void set_next_task(struct rq *rq, struct task_struct *next)
|
||||
{
|
||||
WARN_ON_ONCE(rq->curr != next);
|
||||
next->sched_class->set_next_task(rq, next, false);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user