c1e2f0eaf0
Julia reported futex state corruption in the following scenario: waiter waker stealer (prio > waiter) futex(WAIT_REQUEUE_PI, uaddr, uaddr2, timeout=[N ms]) futex_wait_requeue_pi() futex_wait_queue_me() freezable_schedule() <scheduled out> futex(LOCK_PI, uaddr2) futex(CMP_REQUEUE_PI, uaddr, uaddr2, 1, 0) /* requeues waiter to uaddr2 */ futex(UNLOCK_PI, uaddr2) wake_futex_pi() cmp_futex_value_locked(uaddr2, waiter) wake_up_q() <woken by waker> <hrtimer_wakeup() fires, clears sleeper->task> futex(LOCK_PI, uaddr2) __rt_mutex_start_proxy_lock() try_to_take_rt_mutex() /* steals lock */ rt_mutex_set_owner(lock, stealer) <preempted> <scheduled in> rt_mutex_wait_proxy_lock() __rt_mutex_slowlock() try_to_take_rt_mutex() /* fails, lock held by stealer */ if (timeout && !timeout->task) return -ETIMEDOUT; fixup_owner() /* lock wasn't acquired, so, fixup_pi_state_owner skipped */ return -ETIMEDOUT; /* At this point, we've returned -ETIMEDOUT to userspace, but the * futex word shows waiter to be the owner, and the pi_mutex has * stealer as the owner */ futex_lock(LOCK_PI, uaddr2) -> bails with EDEADLK, futex word says we're owner. And suggested that what commit:73d786bd04
("futex: Rework inconsistent rt_mutex/futex_q state") removes from fixup_owner() looks to be just what is needed. And indeed it is -- I completely missed that requeue_pi could also result in this case. So we need to restore that, except that subsequent patches, like commit:16ffa12d74
("futex: Pull rt_mutex_futex_unlock() out from under hb->lock") changed all the locking rules. Even without that, the sequence: - if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) { - locked = 1; - goto out; - } - raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); - owner = rt_mutex_owner(&q->pi_state->pi_mutex); - if (!owner) - owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); - raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); - ret = fixup_pi_state_owner(uaddr, q, owner); already suggests there were races; otherwise we'd never have to look at next_owner. So instead of doing 3 consecutive wait_lock sections with who knows what races, we do it all in a single section. Additionally, the usage of pi_state->owner in fixup_owner() was only safe because only the rt_mutex owner would modify it, which this additional case wrecks. Luckily the values can only change away and not to the value we're testing, this means we can do a speculative test and double check once we have the wait_lock. Fixes:73d786bd04
("futex: Rework inconsistent rt_mutex/futex_q state") Reported-by: Julia Cartwright <julia@ni.com> Reported-by: Gratian Crisan <gratian.crisan@ni.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Julia Cartwright <julia@ni.com> Tested-by: Gratian Crisan <gratian.crisan@ni.com> Cc: Darren Hart <dvhart@infradead.org> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171208124939.7livp7no2ov65rrc@hirez.programming.kicks-ass.net
166 lines
4.2 KiB
C
166 lines
4.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* RT Mutexes: blocking mutual exclusion locks with PI support
|
|
*
|
|
* started by Ingo Molnar and Thomas Gleixner:
|
|
*
|
|
* Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
|
|
* Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
|
|
*
|
|
* This file contains the private data structure and API definitions.
|
|
*/
|
|
|
|
#ifndef __KERNEL_RTMUTEX_COMMON_H
|
|
#define __KERNEL_RTMUTEX_COMMON_H
|
|
|
|
#include <linux/rtmutex.h>
|
|
#include <linux/sched/wake_q.h>
|
|
|
|
/*
|
|
* This is the control structure for tasks blocked on a rt_mutex,
|
|
* which is allocated on the kernel stack on of the blocked task.
|
|
*
|
|
* @tree_entry: pi node to enqueue into the mutex waiters tree
|
|
* @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree
|
|
* @task: task reference to the blocked task
|
|
*/
|
|
struct rt_mutex_waiter {
|
|
struct rb_node tree_entry;
|
|
struct rb_node pi_tree_entry;
|
|
struct task_struct *task;
|
|
struct rt_mutex *lock;
|
|
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
|
unsigned long ip;
|
|
struct pid *deadlock_task_pid;
|
|
struct rt_mutex *deadlock_lock;
|
|
#endif
|
|
int prio;
|
|
u64 deadline;
|
|
};
|
|
|
|
/*
|
|
* Various helpers to access the waiters-tree:
|
|
*/
|
|
|
|
#ifdef CONFIG_RT_MUTEXES
|
|
|
|
static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
|
|
{
|
|
return !RB_EMPTY_ROOT(&lock->waiters.rb_root);
|
|
}
|
|
|
|
static inline struct rt_mutex_waiter *
|
|
rt_mutex_top_waiter(struct rt_mutex *lock)
|
|
{
|
|
struct rt_mutex_waiter *w;
|
|
|
|
w = rb_entry(lock->waiters.rb_leftmost,
|
|
struct rt_mutex_waiter, tree_entry);
|
|
BUG_ON(w->lock != lock);
|
|
|
|
return w;
|
|
}
|
|
|
|
static inline int task_has_pi_waiters(struct task_struct *p)
|
|
{
|
|
return !RB_EMPTY_ROOT(&p->pi_waiters.rb_root);
|
|
}
|
|
|
|
static inline struct rt_mutex_waiter *
|
|
task_top_pi_waiter(struct task_struct *p)
|
|
{
|
|
return rb_entry(p->pi_waiters.rb_leftmost,
|
|
struct rt_mutex_waiter, pi_tree_entry);
|
|
}
|
|
|
|
#else
|
|
|
|
static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline struct rt_mutex_waiter *
|
|
rt_mutex_top_waiter(struct rt_mutex *lock)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline int task_has_pi_waiters(struct task_struct *p)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline struct rt_mutex_waiter *
|
|
task_top_pi_waiter(struct task_struct *p)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* lock->owner state tracking:
|
|
*/
|
|
#define RT_MUTEX_HAS_WAITERS 1UL
|
|
|
|
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
|
|
{
|
|
unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
|
|
|
|
return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
|
|
}
|
|
|
|
/*
|
|
* Constants for rt mutex functions which have a selectable deadlock
|
|
* detection.
|
|
*
|
|
* RT_MUTEX_MIN_CHAINWALK: Stops the lock chain walk when there are
|
|
* no further PI adjustments to be made.
|
|
*
|
|
* RT_MUTEX_FULL_CHAINWALK: Invoke deadlock detection with a full
|
|
* walk of the lock chain.
|
|
*/
|
|
enum rtmutex_chainwalk {
|
|
RT_MUTEX_MIN_CHAINWALK,
|
|
RT_MUTEX_FULL_CHAINWALK,
|
|
};
|
|
|
|
/*
|
|
* PI-futex support (proxy locking functions, etc.):
|
|
*/
|
|
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
|
|
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
|
struct task_struct *proxy_owner);
|
|
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
|
struct task_struct *proxy_owner);
|
|
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
|
|
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
|
|
struct rt_mutex_waiter *waiter,
|
|
struct task_struct *task);
|
|
extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
|
|
struct rt_mutex_waiter *waiter,
|
|
struct task_struct *task);
|
|
extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
|
|
struct hrtimer_sleeper *to,
|
|
struct rt_mutex_waiter *waiter);
|
|
extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
|
|
struct rt_mutex_waiter *waiter);
|
|
|
|
extern int rt_mutex_futex_trylock(struct rt_mutex *l);
|
|
extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
|
|
|
|
extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
|
|
extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
|
|
struct wake_q_head *wqh);
|
|
|
|
extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
|
|
|
|
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
|
# include "rtmutex-debug.h"
|
|
#else
|
|
# include "rtmutex.h"
|
|
#endif
|
|
|
|
#endif
|