Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Thomas Gleixner: "From the scheduler departement: - a bunch of sched deadline related fixes which deal with various buglets and corner cases. - two fixes for the loadavg spikes which are caused by the delayed NOHZ accounting" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/deadline: Use deadline instead of period when calculating overflow sched/deadline: Throttle a constrained deadline task activated after the deadline sched/deadline: Make sure the replenishment timer fires in the next period sched/loadavg: Use {READ,WRITE}_ONCE() for sample window sched/loadavg: Avoid loadavg spikes caused by delayed NO_HZ accounting sched/deadline: Add missing update_rq_clock() in dl_task_timer()
This commit is contained in:
commit
cd21debe53
@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
|
||||
*
|
||||
* This function returns true if:
|
||||
*
|
||||
* runtime / (deadline - t) > dl_runtime / dl_period ,
|
||||
* runtime / (deadline - t) > dl_runtime / dl_deadline ,
|
||||
*
|
||||
* IOW we can't recycle current parameters.
|
||||
*
|
||||
* Notice that the bandwidth check is done against the period. For
|
||||
* Notice that the bandwidth check is done against the deadline. For
|
||||
* task with deadline equal to period this is the same of using
|
||||
* dl_deadline instead of dl_period in the equation above.
|
||||
* dl_period instead of dl_deadline in the equation above.
|
||||
*/
|
||||
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
|
||||
struct sched_dl_entity *pi_se, u64 t)
|
||||
@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
|
||||
* of anything below microseconds resolution is actually fiction
|
||||
* (but still we want to give the user that illusion >;).
|
||||
*/
|
||||
left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
|
||||
left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
|
||||
right = ((dl_se->deadline - t) >> DL_SCALE) *
|
||||
(pi_se->dl_runtime >> DL_SCALE);
|
||||
|
||||
@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
|
||||
}
|
||||
}
|
||||
|
||||
static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the entity depleted all its runtime, and if we want it to sleep
|
||||
* while waiting for some new execution time to become available, we
|
||||
* set the bandwidth enforcement timer to the replenishment instant
|
||||
* set the bandwidth replenishment timer to the replenishment instant
|
||||
* and try to activate it.
|
||||
*
|
||||
* Notice that it is important for the caller to know if the timer
|
||||
@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p)
|
||||
* that it is actually coming from rq->clock and not from
|
||||
* hrtimer's time base reading.
|
||||
*/
|
||||
act = ns_to_ktime(dl_se->deadline);
|
||||
act = ns_to_ktime(dl_next_period(dl_se));
|
||||
now = hrtimer_cb_get_time(timer);
|
||||
delta = ktime_to_ns(now) - rq_clock(rq);
|
||||
act = ktime_add_ns(act, delta);
|
||||
@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
||||
lockdep_unpin_lock(&rq->lock, rf.cookie);
|
||||
rq = dl_task_offline_migration(rq, p);
|
||||
rf.cookie = lockdep_pin_lock(&rq->lock);
|
||||
update_rq_clock(rq);
|
||||
|
||||
/*
|
||||
* Now that the task has been migrated to the new RQ and we
|
||||
@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
|
||||
timer->function = dl_task_timer;
|
||||
}
|
||||
|
||||
/*
|
||||
* During the activation, CBS checks if it can reuse the current task's
|
||||
* runtime and period. If the deadline of the task is in the past, CBS
|
||||
* cannot use the runtime, and so it replenishes the task. This rule
|
||||
* works fine for implicit deadline tasks (deadline == period), and the
|
||||
* CBS was designed for implicit deadline tasks. However, a task with
|
||||
* constrained deadline (deadine < period) might be awakened after the
|
||||
* deadline, but before the next period. In this case, replenishing the
|
||||
* task would allow it to run for runtime / deadline. As in this case
|
||||
* deadline < period, CBS enables a task to run for more than the
|
||||
* runtime / period. In a very loaded system, this can cause a domino
|
||||
* effect, making other tasks miss their deadlines.
|
||||
*
|
||||
* To avoid this problem, in the activation of a constrained deadline
|
||||
* task after the deadline but before the next period, throttle the
|
||||
* task and set the replenishing timer to the begin of the next period,
|
||||
* unless it is boosted.
|
||||
*/
|
||||
static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
struct task_struct *p = dl_task_of(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
|
||||
|
||||
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
|
||||
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
|
||||
if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
|
||||
return;
|
||||
dl_se->dl_throttled = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
|
||||
__dequeue_dl_entity(dl_se);
|
||||
}
|
||||
|
||||
static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
|
||||
{
|
||||
return dl_se->dl_deadline < dl_se->dl_period;
|
||||
}
|
||||
|
||||
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
struct task_struct *pi_task = rt_mutex_get_top_task(p);
|
||||
@ -947,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a constrained deadline task was activated
|
||||
* after the deadline but before the next period.
|
||||
* If that is the case, the task will be throttled and
|
||||
* the replenishment timer will be set to the next period.
|
||||
*/
|
||||
if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
|
||||
dl_check_constrained_dl(&p->dl);
|
||||
|
||||
/*
|
||||
* If p is throttled, we do nothing. In fact, if it exhausted
|
||||
* its budget it needs a replenishment and, since it now is on
|
||||
|
@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void)
|
||||
* If the folding window started, make sure we start writing in the
|
||||
* next idle-delta.
|
||||
*/
|
||||
if (!time_before(jiffies, calc_load_update))
|
||||
if (!time_before(jiffies, READ_ONCE(calc_load_update)))
|
||||
idx++;
|
||||
|
||||
return idx & 1;
|
||||
@ -202,8 +202,9 @@ void calc_load_exit_idle(void)
|
||||
struct rq *this_rq = this_rq();
|
||||
|
||||
/*
|
||||
* If we're still before the sample window, we're done.
|
||||
* If we're still before the pending sample window, we're done.
|
||||
*/
|
||||
this_rq->calc_load_update = READ_ONCE(calc_load_update);
|
||||
if (time_before(jiffies, this_rq->calc_load_update))
|
||||
return;
|
||||
|
||||
@ -212,7 +213,6 @@ void calc_load_exit_idle(void)
|
||||
* accounted through the nohz accounting, so skip the entire deal and
|
||||
* sync up for the next window.
|
||||
*/
|
||||
this_rq->calc_load_update = calc_load_update;
|
||||
if (time_before(jiffies, this_rq->calc_load_update + 10))
|
||||
this_rq->calc_load_update += LOAD_FREQ;
|
||||
}
|
||||
@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp,
|
||||
*/
|
||||
static void calc_global_nohz(void)
|
||||
{
|
||||
unsigned long sample_window;
|
||||
long delta, active, n;
|
||||
|
||||
if (!time_before(jiffies, calc_load_update + 10)) {
|
||||
sample_window = READ_ONCE(calc_load_update);
|
||||
if (!time_before(jiffies, sample_window + 10)) {
|
||||
/*
|
||||
* Catch-up, fold however many we are behind still
|
||||
*/
|
||||
delta = jiffies - calc_load_update - 10;
|
||||
delta = jiffies - sample_window - 10;
|
||||
n = 1 + (delta / LOAD_FREQ);
|
||||
|
||||
active = atomic_long_read(&calc_load_tasks);
|
||||
@ -324,7 +326,7 @@ static void calc_global_nohz(void)
|
||||
avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
|
||||
avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
|
||||
|
||||
calc_load_update += n * LOAD_FREQ;
|
||||
WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { }
|
||||
*/
|
||||
void calc_global_load(unsigned long ticks)
|
||||
{
|
||||
unsigned long sample_window;
|
||||
long active, delta;
|
||||
|
||||
if (time_before(jiffies, calc_load_update + 10))
|
||||
sample_window = READ_ONCE(calc_load_update);
|
||||
if (time_before(jiffies, sample_window + 10))
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks)
|
||||
avenrun[1] = calc_load(avenrun[1], EXP_5, active);
|
||||
avenrun[2] = calc_load(avenrun[2], EXP_15, active);
|
||||
|
||||
calc_load_update += LOAD_FREQ;
|
||||
WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
|
||||
|
||||
/*
|
||||
* In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.
|
||||
|
Loading…
Reference in New Issue
Block a user