sched/fair: Improve PELT stuff some more

Vincent noted that the update_tg_load_avg() usage in commit:

  3d30544f02 ("sched/fair: Apply more PELT fixes")

isn't entirely sufficient. We need to call this function every time
cfs_rq->avg.load changes, this includes when update_cfs_rq_load_avg()
returns true, but {attach,detach}_entity_load_avg() themselves also
change it. This means we need to unconditionally call
update_tg_load_avg().

Also, add more comments.

Reported-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2016-07-13 10:56:25 +02:00 committed by Ingo Molnar
parent a1fd46565b
commit 7c3edd2c30

View File

@ -726,7 +726,6 @@ void post_init_entity_util_avg(struct sched_entity *se)
struct sched_avg *sa = &se->avg; struct sched_avg *sa = &se->avg;
long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2; long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
u64 now = cfs_rq_clock_task(cfs_rq); u64 now = cfs_rq_clock_task(cfs_rq);
int tg_update;
if (cap > 0) { if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) { if (cfs_rq->avg.util_avg != 0) {
@ -759,10 +758,9 @@ void post_init_entity_util_avg(struct sched_entity *se)
} }
} }
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false); update_cfs_rq_load_avg(now, cfs_rq, false);
attach_entity_load_avg(cfs_rq, se); attach_entity_load_avg(cfs_rq, se);
if (tg_update) update_tg_load_avg(cfs_rq, false);
update_tg_load_avg(cfs_rq, false);
} }
#else /* !CONFIG_SMP */ #else /* !CONFIG_SMP */
@ -2803,9 +2801,21 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
} }
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* /**
* Updating tg's load_avg is necessary before update_cfs_share (which is done) * update_tg_load_avg - update the tg's load avg
* and effective_load (which is not done because it is too costly). * @cfs_rq: the cfs_rq whose avg changed
* @force: update regardless of how small the difference
*
* This function 'ensures': tg->load_avg := \Sum tg->cfs_rq[]->avg.load.
* However, because tg->load_avg is a global value there are performance
* considerations.
*
* In order to avoid having to look at the other cfs_rq's, we use a
* differential update where we store the last value we propagated. This in
* turn allows skipping updates if the differential is 'small'.
*
* Updating tg's load_avg is necessary before update_cfs_share() (which is
* done) and effective_load() (which is not done because it is too costly).
*/ */
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
{ {
@ -2931,10 +2941,10 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
* *
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example. * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
* *
* Returns true if the load decayed or we removed utilization. It is expected * Returns true if the load decayed or we removed load.
* that one calls update_tg_load_avg() on this condition, but after you've *
* modified the cfs_rq avg (attach/detach), such that we propagate the new * Since both these conditions indicate a changed cfs_rq->avg.load we should
* avg up. * call update_tg_load_avg() when this function returns true.
*/ */
static inline int static inline int
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
@ -8442,7 +8452,6 @@ static void detach_task_cfs_rq(struct task_struct *p)
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se); struct cfs_rq *cfs_rq = cfs_rq_of(se);
u64 now = cfs_rq_clock_task(cfs_rq); u64 now = cfs_rq_clock_task(cfs_rq);
int tg_update;
if (!vruntime_normalized(p)) { if (!vruntime_normalized(p)) {
/* /*
@ -8454,10 +8463,9 @@ static void detach_task_cfs_rq(struct task_struct *p)
} }
/* Catch up with the cfs_rq and remove our load when we leave */ /* Catch up with the cfs_rq and remove our load when we leave */
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false); update_cfs_rq_load_avg(now, cfs_rq, false);
detach_entity_load_avg(cfs_rq, se); detach_entity_load_avg(cfs_rq, se);
if (tg_update) update_tg_load_avg(cfs_rq, false);
update_tg_load_avg(cfs_rq, false);
} }
static void attach_task_cfs_rq(struct task_struct *p) static void attach_task_cfs_rq(struct task_struct *p)
@ -8465,7 +8473,6 @@ static void attach_task_cfs_rq(struct task_struct *p)
struct sched_entity *se = &p->se; struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se); struct cfs_rq *cfs_rq = cfs_rq_of(se);
u64 now = cfs_rq_clock_task(cfs_rq); u64 now = cfs_rq_clock_task(cfs_rq);
int tg_update;
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* /*
@ -8476,10 +8483,9 @@ static void attach_task_cfs_rq(struct task_struct *p)
#endif #endif
/* Synchronize task with its cfs_rq */ /* Synchronize task with its cfs_rq */
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false); update_cfs_rq_load_avg(now, cfs_rq, false);
attach_entity_load_avg(cfs_rq, se); attach_entity_load_avg(cfs_rq, se);
if (tg_update) update_tg_load_avg(cfs_rq, false);
update_tg_load_avg(cfs_rq, false);
if (!vruntime_normalized(p)) if (!vruntime_normalized(p))
se->vruntime += cfs_rq->min_vruntime; se->vruntime += cfs_rq->min_vruntime;