linux/kernel/sched_idletask.c
Peter Williams a4ac01c36e sched: fix bug in balance_tasks()
There are two problems with balance_tasks() and how it used:

1. The variables best_prio and best_prio_seen (inherited from the old
move_tasks()) were only required to handle problems caused by the
active/expired arrays, the order in which they were processed and the
possibility that the task with the highest priority could be on either.
  These issues are no longer present and the extra overhead associated
with their use is unnecessary (and possibly wrong).

2. In the absence of CONFIG_FAIR_GROUP_SCHED being set, the same
this_best_prio variable needs to be used by all scheduling classes or
there is a risk of moving too much load.  E.g. if the highest priority
task on this at the beginning is a fairly low priority task and the rt
class migrates a task (during its turn) then that moved task becomes the
new highest priority task on this_rq but when the sched_fair class
initializes its copy of this_best_prio it will get the priority of the
original highest priority task as, due to the run queue locks being
held, the reschedule triggered by pull_task() will not have taken place.
  This could result in inappropriate overriding of skip_for_load and
excessive load being moved.

The attached patch addresses these problems by deleting all reference to
best_prio and best_prio_seen and making this_best_prio a reference
parameter to the various functions involved.

load_balance_fair() has also been modified so that this_best_prio is
only reset (in the loop) if CONFIG_FAIR_GROUP_SCHED is set.  This should
preserve the effect of helping spread groups' higher priority tasks
around the available CPUs while improving system performance when
CONFIG_FAIR_GROUP_SCHED isn't set.

Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2007-08-09 11:16:46 +02:00

72 lines
1.7 KiB
C

/*
* idle-task scheduling class.
*
* (NOTE: these are not related to SCHED_IDLE tasks which are
* handled in sched_fair.c)
*/
/*
* Idle tasks are unconditionally rescheduled:
*/
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
{
resched_task(rq->idle);
}
static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
{
schedstat_inc(rq, sched_goidle);
return rq->idle;
}
/*
* It is not legal to sleep in the idle task - print a warning
* message if some code attempts to do it:
*/
static void
dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now)
{
spin_unlock_irq(&rq->lock);
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
dump_stack();
spin_lock_irq(&rq->lock);
}
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now)
{
}
static unsigned long
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio)
{
return 0;
}
static void task_tick_idle(struct rq *rq, struct task_struct *curr)
{
}
/*
* Simple, special scheduling class for the per-CPU idle tasks:
*/
static struct sched_class idle_sched_class __read_mostly = {
/* no enqueue/yield_task for idle tasks */
/* dequeue is not valid, we print a debug message there: */
.dequeue_task = dequeue_task_idle,
.check_preempt_curr = check_preempt_curr_idle,
.pick_next_task = pick_next_task_idle,
.put_prev_task = put_prev_task_idle,
.load_balance = load_balance_idle,
.task_tick = task_tick_idle,
/* no .task_new for idle tasks */
};