diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c33227b0f82..d74830cc51eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -908,6 +908,7 @@ struct sched_entity { u64 sum_exec_runtime; u64 vruntime; u64 prev_sum_exec_runtime; + u64 last_min_vruntime; #ifdef CONFIG_SCHEDSTATS u64 wait_start; diff --git a/kernel/sched.c b/kernel/sched.c index 5004dff91850..fe1165b226a1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1615,6 +1615,7 @@ static void __sched_fork(struct task_struct *p) p->se.exec_start = 0; p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; + p->se.last_min_vruntime = 0; #ifdef CONFIG_SCHEDSTATS p->se.wait_start = 0; @@ -6495,6 +6496,7 @@ static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) #ifdef CONFIG_FAIR_GROUP_SCHED cfs_rq->rq = rq; #endif + cfs_rq->min_vruntime = (u64)(-(1LL << 20)); } void __init sched_init(void) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5db7bd18e818..87acc5cedd2d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -243,6 +243,15 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) return period; } +static u64 __sched_vslice(unsigned long nr_running) +{ + u64 period = __sched_period(nr_running); + + do_div(period, nr_running); + + return period; +} + /* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. @@ -441,32 +450,33 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) { - u64 min_runtime, latency; + u64 vruntime; - min_runtime = cfs_rq->min_vruntime; + vruntime = cfs_rq->min_vruntime; if (sched_feat(USE_TREE_AVG)) { struct sched_entity *last = __pick_last_entity(cfs_rq); if (last) { - min_runtime = __pick_next_entity(cfs_rq)->vruntime; - min_runtime += last->vruntime; - min_runtime >>= 1; + vruntime += last->vruntime; + vruntime >>= 1; } - } else if (sched_feat(APPROX_AVG)) - min_runtime += sysctl_sched_latency/2; + } else if (sched_feat(APPROX_AVG) && cfs_rq->nr_running) + vruntime += __sched_vslice(cfs_rq->nr_running)/2; if (initial && sched_feat(START_DEBIT)) - min_runtime += sched_slice(cfs_rq, se); + vruntime += __sched_vslice(cfs_rq->nr_running + 1); if (!initial && sched_feat(NEW_FAIR_SLEEPERS)) { - latency = sysctl_sched_latency; - if (min_runtime > latency) - min_runtime -= latency; + s64 latency = cfs_rq->min_vruntime - se->last_min_vruntime; + if (latency < 0 || !cfs_rq->nr_running) + latency = 0; else - min_runtime = 0; + latency = min_t(s64, latency, sysctl_sched_latency); + vruntime -= latency; } - se->vruntime = max(se->vruntime, min_runtime); + se->vruntime = vruntime; + } static void @@ -478,6 +488,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) update_curr(cfs_rq); if (wakeup) { + /* se->vruntime += cfs_rq->min_vruntime; */ place_entity(cfs_rq, se, 0); enqueue_sleeper(cfs_rq, se); } @@ -492,8 +503,8 @@ static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) { update_stats_dequeue(cfs_rq, se); -#ifdef CONFIG_SCHEDSTATS if (sleep) { +#ifdef CONFIG_SCHEDSTATS if (entity_is_task(se)) { struct task_struct *tsk = task_of(se); @@ -502,8 +513,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) if (tsk->state & TASK_UNINTERRUPTIBLE) se->block_start = rq_of(cfs_rq)->clock; } - } #endif + /* se->vruntime = entity_key(cfs_rq, se); */ + se->last_min_vruntime = cfs_rq->min_vruntime; + } + if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se);