forked from Minki/linux
sched: fix exit_mm vs membarrier (v4)
exit_mm should issue memory barriers after user-space memory accesses, before clearing current->mm, to order user-space memory accesses performed prior to exit_mm before clearing tsk->mm, which has the effect of skipping the membarrier private expedited IPIs. exit_mm should also update the runqueue's membarrier_state so membarrier global expedited IPIs are not sent when they are not needed. The membarrier system call can be issued concurrently with do_exit if we have thread groups created with CLONE_VM but not CLONE_THREAD. Here is the scenario I have in mind: Two thread groups are created, A and B. Thread group B is created by issuing clone from group A with flag CLONE_VM set, but not CLONE_THREAD. Let's assume we have a single thread within each thread group (Thread A and Thread B). The AFAIU we can have: Userspace variables: int x = 0, y = 0; CPU 0 CPU 1 Thread A Thread B (in thread group A) (in thread group B) x = 1 barrier() y = 1 exit() exit_mm() current->mm = NULL; r1 = load y membarrier() skips CPU 0 (no IPI) because its current mm is NULL r2 = load x BUG_ON(r1 == 1 && r2 == 0) Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20201020134715.13909-2-mathieu.desnoyers@efficios.com
This commit is contained in:
parent
45da7a2b0a
commit
5bc7850232
@ -347,6 +347,8 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
|
||||
|
||||
extern void membarrier_exec_mmap(struct mm_struct *mm);
|
||||
|
||||
extern void membarrier_update_current_mm(struct mm_struct *next_mm);
|
||||
|
||||
#else
|
||||
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
||||
@ -361,6 +363,9 @@ static inline void membarrier_exec_mmap(struct mm_struct *mm)
|
||||
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
|
||||
{
|
||||
}
|
||||
static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SCHED_MM_H */
|
||||
|
@ -475,10 +475,24 @@ static void exit_mm(void)
|
||||
BUG_ON(mm != current->active_mm);
|
||||
/* more a memory barrier than a real lock */
|
||||
task_lock(current);
|
||||
/*
|
||||
* When a thread stops operating on an address space, the loop
|
||||
* in membarrier_private_expedited() may not observe that
|
||||
* tsk->mm, and the loop in membarrier_global_expedited() may
|
||||
* not observe a MEMBARRIER_STATE_GLOBAL_EXPEDITED
|
||||
* rq->membarrier_state, so those would not issue an IPI.
|
||||
* Membarrier requires a memory barrier after accessing
|
||||
* user-space memory, before clearing tsk->mm or the
|
||||
* rq->membarrier_state.
|
||||
*/
|
||||
smp_mb__after_spinlock();
|
||||
local_irq_disable();
|
||||
current->mm = NULL;
|
||||
mmap_read_unlock(mm);
|
||||
membarrier_update_current_mm(NULL);
|
||||
enter_lazy_tlb(mm, current);
|
||||
local_irq_enable();
|
||||
task_unlock(current);
|
||||
mmap_read_unlock(mm);
|
||||
mm_update_next_owner(mm);
|
||||
mmput(mm);
|
||||
if (test_thread_flag(TIF_MEMDIE))
|
||||
|
@ -76,6 +76,18 @@ void membarrier_exec_mmap(struct mm_struct *mm)
|
||||
this_cpu_write(runqueues.membarrier_state, 0);
|
||||
}
|
||||
|
||||
void membarrier_update_current_mm(struct mm_struct *next_mm)
|
||||
{
|
||||
struct rq *rq = this_rq();
|
||||
int membarrier_state = 0;
|
||||
|
||||
if (next_mm)
|
||||
membarrier_state = atomic_read(&next_mm->membarrier_state);
|
||||
if (READ_ONCE(rq->membarrier_state) == membarrier_state)
|
||||
return;
|
||||
WRITE_ONCE(rq->membarrier_state, membarrier_state);
|
||||
}
|
||||
|
||||
static int membarrier_global_expedited(void)
|
||||
{
|
||||
int cpu;
|
||||
|
Loading…
Reference in New Issue
Block a user