mirror of
https://github.com/torvalds/linux.git
synced 2024-11-15 00:21:59 +00:00
bc9560155f
Commitde30ec4730
"Remove unnecessary ->wait.lock serialization when reading completion state" was not correct, without lock/unlock the code like stop_machine_from_inactive_cpu() while (!completion_done()) cpu_relax(); can return before complete() finishes its spin_unlock() which writes to this memory. And spin_unlock_wait(). While at it, change try_wait_for_completion() to use READ_ONCE(). Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reported-by: Davidlohr Bueso <dave@stgolabs.net> Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> [ Added a comment with the barrier. ] Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Nicholas Mc Guire <der.herr@hofr.at> Cc: raghavendra.kt@linux.vnet.ibm.com Cc: waiman.long@hp.com Fixes:de30ec4730
("sched/completion: Remove unnecessary ->wait.lock serialization when reading completion state") Link: http://lkml.kernel.org/r/20150212195913.GA30430@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
318 lines
9.6 KiB
C
318 lines
9.6 KiB
C
/*
|
|
* Generic wait-for-completion handler;
|
|
*
|
|
* It differs from semaphores in that their default case is the opposite,
|
|
* wait_for_completion default blocks whereas semaphore default non-block. The
|
|
* interface also makes it easy to 'complete' multiple waiting threads,
|
|
* something which isn't entirely natural for semaphores.
|
|
*
|
|
* But more importantly, the primitive documents the usage. Semaphores would
|
|
* typically be used for exclusion which gives rise to priority inversion.
|
|
* Waiting for completion is a typically sync point, but not an exclusion point.
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/completion.h>
|
|
|
|
/**
|
|
* complete: - signals a single thread waiting on this completion
|
|
* @x: holds the state of this particular completion
|
|
*
|
|
* This will wake up a single thread waiting on this completion. Threads will be
|
|
* awakened in the same order in which they were queued.
|
|
*
|
|
* See also complete_all(), wait_for_completion() and related routines.
|
|
*
|
|
* It may be assumed that this function implies a write memory barrier before
|
|
* changing the task state if and only if any tasks are woken up.
|
|
*/
|
|
void complete(struct completion *x)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&x->wait.lock, flags);
|
|
x->done++;
|
|
__wake_up_locked(&x->wait, TASK_NORMAL, 1);
|
|
spin_unlock_irqrestore(&x->wait.lock, flags);
|
|
}
|
|
EXPORT_SYMBOL(complete);
|
|
|
|
/**
|
|
* complete_all: - signals all threads waiting on this completion
|
|
* @x: holds the state of this particular completion
|
|
*
|
|
* This will wake up all threads waiting on this particular completion event.
|
|
*
|
|
* It may be assumed that this function implies a write memory barrier before
|
|
* changing the task state if and only if any tasks are woken up.
|
|
*/
|
|
void complete_all(struct completion *x)
|
|
{
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&x->wait.lock, flags);
|
|
x->done += UINT_MAX/2;
|
|
__wake_up_locked(&x->wait, TASK_NORMAL, 0);
|
|
spin_unlock_irqrestore(&x->wait.lock, flags);
|
|
}
|
|
EXPORT_SYMBOL(complete_all);
|
|
|
|
static inline long __sched
|
|
do_wait_for_common(struct completion *x,
|
|
long (*action)(long), long timeout, int state)
|
|
{
|
|
if (!x->done) {
|
|
DECLARE_WAITQUEUE(wait, current);
|
|
|
|
__add_wait_queue_tail_exclusive(&x->wait, &wait);
|
|
do {
|
|
if (signal_pending_state(state, current)) {
|
|
timeout = -ERESTARTSYS;
|
|
break;
|
|
}
|
|
__set_current_state(state);
|
|
spin_unlock_irq(&x->wait.lock);
|
|
timeout = action(timeout);
|
|
spin_lock_irq(&x->wait.lock);
|
|
} while (!x->done && timeout);
|
|
__remove_wait_queue(&x->wait, &wait);
|
|
if (!x->done)
|
|
return timeout;
|
|
}
|
|
x->done--;
|
|
return timeout ?: 1;
|
|
}
|
|
|
|
static inline long __sched
|
|
__wait_for_common(struct completion *x,
|
|
long (*action)(long), long timeout, int state)
|
|
{
|
|
might_sleep();
|
|
|
|
spin_lock_irq(&x->wait.lock);
|
|
timeout = do_wait_for_common(x, action, timeout, state);
|
|
spin_unlock_irq(&x->wait.lock);
|
|
return timeout;
|
|
}
|
|
|
|
static long __sched
|
|
wait_for_common(struct completion *x, long timeout, int state)
|
|
{
|
|
return __wait_for_common(x, schedule_timeout, timeout, state);
|
|
}
|
|
|
|
static long __sched
|
|
wait_for_common_io(struct completion *x, long timeout, int state)
|
|
{
|
|
return __wait_for_common(x, io_schedule_timeout, timeout, state);
|
|
}
|
|
|
|
/**
|
|
* wait_for_completion: - waits for completion of a task
|
|
* @x: holds the state of this particular completion
|
|
*
|
|
* This waits to be signaled for completion of a specific task. It is NOT
|
|
* interruptible and there is no timeout.
|
|
*
|
|
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
|
|
* and interrupt capability. Also see complete().
|
|
*/
|
|
void __sched wait_for_completion(struct completion *x)
|
|
{
|
|
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
|
|
}
|
|
EXPORT_SYMBOL(wait_for_completion);
|
|
|
|
/**
|
|
* wait_for_completion_timeout: - waits for completion of a task (w/timeout)
|
|
* @x: holds the state of this particular completion
|
|
* @timeout: timeout value in jiffies
|
|
*
|
|
* This waits for either a completion of a specific task to be signaled or for a
|
|
* specified timeout to expire. The timeout is in jiffies. It is not
|
|
* interruptible.
|
|
*
|
|
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
|
|
* till timeout) if completed.
|
|
*/
|
|
unsigned long __sched
|
|
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
|
|
{
|
|
return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
|
|
}
|
|
EXPORT_SYMBOL(wait_for_completion_timeout);
|
|
|
|
/**
|
|
* wait_for_completion_io: - waits for completion of a task
|
|
* @x: holds the state of this particular completion
|
|
*
|
|
* This waits to be signaled for completion of a specific task. It is NOT
|
|
* interruptible and there is no timeout. The caller is accounted as waiting
|
|
* for IO (which traditionally means blkio only).
|
|
*/
|
|
void __sched wait_for_completion_io(struct completion *x)
|
|
{
|
|
wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
|
|
}
|
|
EXPORT_SYMBOL(wait_for_completion_io);
|
|
|
|
/**
|
|
* wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
|
|
* @x: holds the state of this particular completion
|
|
* @timeout: timeout value in jiffies
|
|
*
|
|
* This waits for either a completion of a specific task to be signaled or for a
|
|
* specified timeout to expire. The timeout is in jiffies. It is not
|
|
* interruptible. The caller is accounted as waiting for IO (which traditionally
|
|
* means blkio only).
|
|
*
|
|
* Return: 0 if timed out, and positive (at least 1, or number of jiffies left
|
|
* till timeout) if completed.
|
|
*/
|
|
unsigned long __sched
|
|
wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
|
|
{
|
|
return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
|
|
}
|
|
EXPORT_SYMBOL(wait_for_completion_io_timeout);
|
|
|
|
/**
|
|
* wait_for_completion_interruptible: - waits for completion of a task (w/intr)
|
|
* @x: holds the state of this particular completion
|
|
*
|
|
* This waits for completion of a specific task to be signaled. It is
|
|
* interruptible.
|
|
*
|
|
* Return: -ERESTARTSYS if interrupted, 0 if completed.
|
|
*/
|
|
int __sched wait_for_completion_interruptible(struct completion *x)
|
|
{
|
|
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
|
|
if (t == -ERESTARTSYS)
|
|
return t;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(wait_for_completion_interruptible);
|
|
|
|
/**
|
|
* wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
|
|
* @x: holds the state of this particular completion
|
|
* @timeout: timeout value in jiffies
|
|
*
|
|
* This waits for either a completion of a specific task to be signaled or for a
|
|
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
|
|
*
|
|
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
|
|
* or number of jiffies left till timeout) if completed.
|
|
*/
|
|
long __sched
|
|
wait_for_completion_interruptible_timeout(struct completion *x,
|
|
unsigned long timeout)
|
|
{
|
|
return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
|
|
}
|
|
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
|
|
|
|
/**
|
|
* wait_for_completion_killable: - waits for completion of a task (killable)
|
|
* @x: holds the state of this particular completion
|
|
*
|
|
* This waits to be signaled for completion of a specific task. It can be
|
|
* interrupted by a kill signal.
|
|
*
|
|
* Return: -ERESTARTSYS if interrupted, 0 if completed.
|
|
*/
|
|
int __sched wait_for_completion_killable(struct completion *x)
|
|
{
|
|
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
|
|
if (t == -ERESTARTSYS)
|
|
return t;
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(wait_for_completion_killable);
|
|
|
|
/**
|
|
* wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
|
|
* @x: holds the state of this particular completion
|
|
* @timeout: timeout value in jiffies
|
|
*
|
|
* This waits for either a completion of a specific task to be
|
|
* signaled or for a specified timeout to expire. It can be
|
|
* interrupted by a kill signal. The timeout is in jiffies.
|
|
*
|
|
* Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
|
|
* or number of jiffies left till timeout) if completed.
|
|
*/
|
|
long __sched
|
|
wait_for_completion_killable_timeout(struct completion *x,
|
|
unsigned long timeout)
|
|
{
|
|
return wait_for_common(x, timeout, TASK_KILLABLE);
|
|
}
|
|
EXPORT_SYMBOL(wait_for_completion_killable_timeout);
|
|
|
|
/**
|
|
* try_wait_for_completion - try to decrement a completion without blocking
|
|
* @x: completion structure
|
|
*
|
|
* Return: 0 if a decrement cannot be done without blocking
|
|
* 1 if a decrement succeeded.
|
|
*
|
|
* If a completion is being used as a counting completion,
|
|
* attempt to decrement the counter without blocking. This
|
|
* enables us to avoid waiting if the resource the completion
|
|
* is protecting is not available.
|
|
*/
|
|
bool try_wait_for_completion(struct completion *x)
|
|
{
|
|
unsigned long flags;
|
|
int ret = 1;
|
|
|
|
/*
|
|
* Since x->done will need to be locked only
|
|
* in the non-blocking case, we check x->done
|
|
* first without taking the lock so we can
|
|
* return early in the blocking case.
|
|
*/
|
|
if (!READ_ONCE(x->done))
|
|
return 0;
|
|
|
|
spin_lock_irqsave(&x->wait.lock, flags);
|
|
if (!x->done)
|
|
ret = 0;
|
|
else
|
|
x->done--;
|
|
spin_unlock_irqrestore(&x->wait.lock, flags);
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(try_wait_for_completion);
|
|
|
|
/**
|
|
* completion_done - Test to see if a completion has any waiters
|
|
* @x: completion structure
|
|
*
|
|
* Return: 0 if there are waiters (wait_for_completion() in progress)
|
|
* 1 if there are no waiters.
|
|
*
|
|
*/
|
|
bool completion_done(struct completion *x)
|
|
{
|
|
if (!READ_ONCE(x->done))
|
|
return false;
|
|
|
|
/*
|
|
* If ->done, we need to wait for complete() to release ->wait.lock
|
|
* otherwise we can end up freeing the completion before complete()
|
|
* is done referencing it.
|
|
*
|
|
* The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
|
|
* the loads of ->done and ->wait.lock such that we cannot observe
|
|
* the lock before complete() acquires it while observing the ->done
|
|
* after it's acquired the lock.
|
|
*/
|
|
smp_rmb();
|
|
spin_unlock_wait(&x->wait.lock);
|
|
return true;
|
|
}
|
|
EXPORT_SYMBOL(completion_done);
|