cpuidle: coupled: abort idle if pokes are pending

Joseph Lo <josephl@nvidia.com> reported a lockup on Tegra20 caused
by a race condition in coupled cpuidle.  When two or more cpus
enter idle at the same time, the first cpus to arrive may go to the
ready loop without processing pending pokes from the last cpu to
arrive.

This patch adds a check for pending pokes once all cpus have been
synchronized in the ready loop and resets the coupled state and
retries if any cpus failed to handle their pending poke.

Retrying on all cpus may trigger the same issue again, so this patch
also adds a check to ensure that each cpu has received at least one
poke between when it enters the waiting loop and when it moves on to
the ready loop.

Reported-and-tested-by: Joseph Lo <josephl@nvidia.com>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Colin Cross <ccross@android.com>
Cc: 3.6+ <stable@vger.kernel.org> # 3.6+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
Colin Cross 2013-08-28 18:41:47 -07:00 committed by Rafael J. Wysocki
parent 59e9985611
commit f983827bcb

View File

@ -106,6 +106,7 @@ struct cpuidle_coupled {
cpumask_t coupled_cpus; cpumask_t coupled_cpus;
int requested_state[NR_CPUS]; int requested_state[NR_CPUS];
atomic_t ready_waiting_counts; atomic_t ready_waiting_counts;
atomic_t abort_barrier;
int online_count; int online_count;
int refcnt; int refcnt;
int prevent; int prevent;
@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock);
static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
/* /*
* The cpuidle_coupled_poked_mask mask is used to avoid calling * The cpuidle_coupled_poke_pending mask is used to avoid calling
* __smp_call_function_single with the per cpu call_single_data struct already * __smp_call_function_single with the per cpu call_single_data struct already
* in use. This prevents a deadlock where two cpus are waiting for each others * in use. This prevents a deadlock where two cpus are waiting for each others
* call_single_data struct to be available * call_single_data struct to be available
*/ */
static cpumask_t cpuidle_coupled_poked_mask; static cpumask_t cpuidle_coupled_poke_pending;
/*
* The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked
* once to minimize entering the ready loop with a poke pending, which would
* require aborting and retrying.
*/
static cpumask_t cpuidle_coupled_poked;
/** /**
* cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus
@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
return state; return state;
} }
static void cpuidle_coupled_poked(void *info) static void cpuidle_coupled_handle_poke(void *info)
{ {
int cpu = (unsigned long)info; int cpu = (unsigned long)info;
cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask); cpumask_set_cpu(cpu, &cpuidle_coupled_poked);
cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending);
} }
/** /**
@ -313,7 +322,7 @@ static void cpuidle_coupled_poke(int cpu)
{ {
struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask)) if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
__smp_call_function_single(cpu, csd, 0); __smp_call_function_single(cpu, csd, 0);
} }
@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu,
* @coupled: the struct coupled that contains the current cpu * @coupled: the struct coupled that contains the current cpu
* @next_state: the index in drv->states of the requested state for this cpu * @next_state: the index in drv->states of the requested state for this cpu
* *
* Updates the requested idle state for the specified cpuidle device, * Updates the requested idle state for the specified cpuidle device.
* poking all coupled cpus out of idle if necessary to let them see the new * Returns the number of waiting cpus.
* state.
*/ */
static void cpuidle_coupled_set_waiting(int cpu, static int cpuidle_coupled_set_waiting(int cpu,
struct cpuidle_coupled *coupled, int next_state) struct cpuidle_coupled *coupled, int next_state)
{ {
int w;
coupled->requested_state[cpu] = next_state; coupled->requested_state[cpu] = next_state;
/* /*
* If this is the last cpu to enter the waiting state, poke
* all the other cpus out of their waiting state so they can
* enter a deeper state. This can race with one of the cpus
* exiting the waiting state due to an interrupt and
* decrementing waiting_count, see comment below.
*
* The atomic_inc_return provides a write barrier to order the write * The atomic_inc_return provides a write barrier to order the write
* to requested_state with the later write that increments ready_count. * to requested_state with the later write that increments ready_count.
*/ */
w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
if (w == coupled->online_count)
cpuidle_coupled_poke_others(cpu, coupled);
} }
/** /**
@ -418,13 +416,24 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
static int cpuidle_coupled_clear_pokes(int cpu) static int cpuidle_coupled_clear_pokes(int cpu)
{ {
local_irq_enable(); local_irq_enable();
while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask)) while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
cpu_relax(); cpu_relax();
local_irq_disable(); local_irq_disable();
return need_resched() ? -EINTR : 0; return need_resched() ? -EINTR : 0;
} }
static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled)
{
cpumask_t cpus;
int ret;
cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus);
return ret;
}
/** /**
* cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus * cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus
* @dev: struct cpuidle_device for the current cpu * @dev: struct cpuidle_device for the current cpu
@ -449,6 +458,7 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
{ {
int entered_state = -1; int entered_state = -1;
struct cpuidle_coupled *coupled = dev->coupled; struct cpuidle_coupled *coupled = dev->coupled;
int w;
if (!coupled) if (!coupled)
return -EINVAL; return -EINVAL;
@ -466,14 +476,33 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
/* Read barrier ensures online_count is read after prevent is cleared */ /* Read barrier ensures online_count is read after prevent is cleared */
smp_rmb(); smp_rmb();
cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); reset:
cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked);
w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
/*
* If this is the last cpu to enter the waiting state, poke
* all the other cpus out of their waiting state so they can
* enter a deeper state. This can race with one of the cpus
* exiting the waiting state due to an interrupt and
* decrementing waiting_count, see comment below.
*/
if (w == coupled->online_count) {
cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked);
cpuidle_coupled_poke_others(dev->cpu, coupled);
}
retry: retry:
/* /*
* Wait for all coupled cpus to be idle, using the deepest state * Wait for all coupled cpus to be idle, using the deepest state
* allowed for a single cpu. * allowed for a single cpu. If this was not the poking cpu, wait
* for at least one poke before leaving to avoid a race where
* two cpus could arrive at the waiting loop at the same time,
* but the first of the two to arrive could skip the loop without
* processing the pokes from the last to arrive.
*/ */
while (!cpuidle_coupled_cpus_waiting(coupled)) { while (!cpuidle_coupled_cpus_waiting(coupled) ||
!cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) {
if (cpuidle_coupled_clear_pokes(dev->cpu)) { if (cpuidle_coupled_clear_pokes(dev->cpu)) {
cpuidle_coupled_set_not_waiting(dev->cpu, coupled); cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
goto out; goto out;
@ -494,6 +523,12 @@ retry:
goto out; goto out;
} }
/*
* Make sure final poke status for this cpu is visible before setting
* cpu as ready.
*/
smp_wmb();
/* /*
* All coupled cpus are probably idle. There is a small chance that * All coupled cpus are probably idle. There is a small chance that
* one of the other cpus just became active. Increment the ready count, * one of the other cpus just became active. Increment the ready count,
@ -513,6 +548,28 @@ retry:
cpu_relax(); cpu_relax();
} }
/*
* Make sure read of all cpus ready is done before reading pending pokes
*/
smp_rmb();
/*
* There is a small chance that a cpu left and reentered idle after this
* cpu saw that all cpus were waiting. The cpu that reentered idle will
* have sent this cpu a poke, which will still be pending after the
* ready loop. The pending interrupt may be lost by the interrupt
* controller when entering the deep idle state. It's not possible to
* clear a pending interrupt without turning interrupts on and handling
* it, and it's too late to turn on interrupts here, so reset the
* coupled idle state of all cpus and retry.
*/
if (cpuidle_coupled_any_pokes_pending(coupled)) {
cpuidle_coupled_set_done(dev->cpu, coupled);
/* Wait for all cpus to see the pending pokes */
cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier);
goto reset;
}
/* all cpus have acked the coupled state */ /* all cpus have acked the coupled state */
next_state = cpuidle_coupled_get_state(dev, coupled); next_state = cpuidle_coupled_get_state(dev, coupled);
@ -598,7 +655,7 @@ have_coupled:
coupled->refcnt++; coupled->refcnt++;
csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
csd->func = cpuidle_coupled_poked; csd->func = cpuidle_coupled_handle_poke;
csd->info = (void *)(unsigned long)dev->cpu; csd->info = (void *)(unsigned long)dev->cpu;
return 0; return 0;