mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
mm: remove the now-unnecessary mmget_still_valid() hack
The preceding patches have ensured that core dumping properly takes the mmap_lock. Thanks to that, we can now remove mmget_still_valid() and all its users. Signed-off-by: Jann Horn <jannh@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Hugh Dickins <hughd@google.com> Link: http://lkml.kernel.org/r/20200827114932.3572699-8-jannh@google.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
7f3bfab52c
commit
4d45e75a99
@ -845,8 +845,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
|
|||||||
* will only be one mm, so no big deal.
|
* will only be one mm, so no big deal.
|
||||||
*/
|
*/
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
if (!mmget_still_valid(mm))
|
|
||||||
goto skip_mm;
|
|
||||||
mutex_lock(&ufile->umap_lock);
|
mutex_lock(&ufile->umap_lock);
|
||||||
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
|
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
|
||||||
list) {
|
list) {
|
||||||
@ -865,7 +863,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutex_unlock(&ufile->umap_lock);
|
mutex_unlock(&ufile->umap_lock);
|
||||||
skip_mm:
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
}
|
}
|
||||||
|
@ -1480,31 +1480,29 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
|
|||||||
} else {
|
} else {
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
}
|
}
|
||||||
if (mmget_still_valid(mm)) {
|
if (try) {
|
||||||
if (try) {
|
if (!mutex_trylock(&vdev->vma_lock)) {
|
||||||
if (!mutex_trylock(&vdev->vma_lock)) {
|
mmap_read_unlock(mm);
|
||||||
mmap_read_unlock(mm);
|
mmput(mm);
|
||||||
mmput(mm);
|
return 0;
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
mutex_lock(&vdev->vma_lock);
|
|
||||||
}
|
}
|
||||||
list_for_each_entry_safe(mmap_vma, tmp,
|
} else {
|
||||||
&vdev->vma_list, vma_next) {
|
mutex_lock(&vdev->vma_lock);
|
||||||
struct vm_area_struct *vma = mmap_vma->vma;
|
|
||||||
|
|
||||||
if (vma->vm_mm != mm)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
list_del(&mmap_vma->vma_next);
|
|
||||||
kfree(mmap_vma);
|
|
||||||
|
|
||||||
zap_vma_ptes(vma, vma->vm_start,
|
|
||||||
vma->vm_end - vma->vm_start);
|
|
||||||
}
|
|
||||||
mutex_unlock(&vdev->vma_lock);
|
|
||||||
}
|
}
|
||||||
|
list_for_each_entry_safe(mmap_vma, tmp,
|
||||||
|
&vdev->vma_list, vma_next) {
|
||||||
|
struct vm_area_struct *vma = mmap_vma->vma;
|
||||||
|
|
||||||
|
if (vma->vm_mm != mm)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
list_del(&mmap_vma->vma_next);
|
||||||
|
kfree(mmap_vma);
|
||||||
|
|
||||||
|
zap_vma_ptes(vma, vma->vm_start,
|
||||||
|
vma->vm_end - vma->vm_start);
|
||||||
|
}
|
||||||
|
mutex_unlock(&vdev->vma_lock);
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
mmput(mm);
|
mmput(mm);
|
||||||
}
|
}
|
||||||
|
@ -1244,24 +1244,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
|||||||
count = -EINTR;
|
count = -EINTR;
|
||||||
goto out_mm;
|
goto out_mm;
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* Avoid to modify vma->vm_flags
|
|
||||||
* without locked ops while the
|
|
||||||
* coredump reads the vm_flags.
|
|
||||||
*/
|
|
||||||
if (!mmget_still_valid(mm)) {
|
|
||||||
/*
|
|
||||||
* Silently return "count"
|
|
||||||
* like if get_task_mm()
|
|
||||||
* failed. FIXME: should this
|
|
||||||
* function have returned
|
|
||||||
* -ESRCH if get_task_mm()
|
|
||||||
* failed like if
|
|
||||||
* get_proc_task() fails?
|
|
||||||
*/
|
|
||||||
mmap_write_unlock(mm);
|
|
||||||
goto out_mm;
|
|
||||||
}
|
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||||
vma->vm_flags &= ~VM_SOFTDIRTY;
|
vma->vm_flags &= ~VM_SOFTDIRTY;
|
||||||
vma_set_page_prot(vma);
|
vma_set_page_prot(vma);
|
||||||
|
@ -601,8 +601,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
|
|||||||
|
|
||||||
/* the various vma->vm_userfaultfd_ctx still points to it */
|
/* the various vma->vm_userfaultfd_ctx still points to it */
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
/* no task can run (and in turn coredump) yet */
|
|
||||||
VM_WARN_ON(!mmget_still_valid(mm));
|
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
||||||
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
|
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
||||||
@ -842,7 +840,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
/* len == 0 means wake all */
|
/* len == 0 means wake all */
|
||||||
struct userfaultfd_wake_range range = { .len = 0, };
|
struct userfaultfd_wake_range range = { .len = 0, };
|
||||||
unsigned long new_flags;
|
unsigned long new_flags;
|
||||||
bool still_valid;
|
|
||||||
|
|
||||||
WRITE_ONCE(ctx->released, true);
|
WRITE_ONCE(ctx->released, true);
|
||||||
|
|
||||||
@ -858,7 +855,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
* taking the mmap_lock for writing.
|
* taking the mmap_lock for writing.
|
||||||
*/
|
*/
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
still_valid = mmget_still_valid(mm);
|
|
||||||
prev = NULL;
|
prev = NULL;
|
||||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
@ -869,17 +865,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
|
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
|
||||||
if (still_valid) {
|
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
|
||||||
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
|
new_flags, vma->anon_vma,
|
||||||
new_flags, vma->anon_vma,
|
vma->vm_file, vma->vm_pgoff,
|
||||||
vma->vm_file, vma->vm_pgoff,
|
vma_policy(vma),
|
||||||
vma_policy(vma),
|
NULL_VM_UFFD_CTX);
|
||||||
NULL_VM_UFFD_CTX);
|
if (prev)
|
||||||
if (prev)
|
vma = prev;
|
||||||
vma = prev;
|
else
|
||||||
else
|
prev = vma;
|
||||||
prev = vma;
|
|
||||||
}
|
|
||||||
vma->vm_flags = new_flags;
|
vma->vm_flags = new_flags;
|
||||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
||||||
}
|
}
|
||||||
@ -1309,8 +1303,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
if (!mmget_still_valid(mm))
|
|
||||||
goto out_unlock;
|
|
||||||
vma = find_vma_prev(mm, start, &prev);
|
vma = find_vma_prev(mm, start, &prev);
|
||||||
if (!vma)
|
if (!vma)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
@ -1511,8 +1503,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
if (!mmget_still_valid(mm))
|
|
||||||
goto out_unlock;
|
|
||||||
vma = find_vma_prev(mm, start, &prev);
|
vma = find_vma_prev(mm, start, &prev);
|
||||||
if (!vma)
|
if (!vma)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
|
|||||||
__mmdrop(mm);
|
__mmdrop(mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This has to be called after a get_task_mm()/mmget_not_zero()
|
|
||||||
* followed by taking the mmap_lock for writing before modifying the
|
|
||||||
* vmas or anything the coredump pretends not to change from under it.
|
|
||||||
*
|
|
||||||
* It also has to be called when mmgrab() is used in the context of
|
|
||||||
* the process, but then the mm_count refcount is transferred outside
|
|
||||||
* the context of the process to run down_write() on that pinned mm.
|
|
||||||
*
|
|
||||||
* NOTE: find_extend_vma() called from GUP context is the only place
|
|
||||||
* that can modify the "mm" (notably the vm_start/end) under mmap_lock
|
|
||||||
* for reading and outside the context of the process, so it is also
|
|
||||||
* the only case that holds the mmap_lock for reading that must call
|
|
||||||
* this function. Generally if the mmap_lock is hold for reading
|
|
||||||
* there's no need of this check after get_task_mm()/mmget_not_zero().
|
|
||||||
*
|
|
||||||
* This function can be obsoleted and the check can be removed, after
|
|
||||||
* the coredump code will hold the mmap_lock for writing before
|
|
||||||
* invoking the ->core_dump methods.
|
|
||||||
*/
|
|
||||||
static inline bool mmget_still_valid(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
return likely(!mm->core_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mmget() - Pin the address space associated with a &struct mm_struct.
|
* mmget() - Pin the address space associated with a &struct mm_struct.
|
||||||
* @mm: The address space to pin.
|
* @mm: The address space to pin.
|
||||||
|
@ -434,7 +434,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
|
|||||||
|
|
||||||
static inline int khugepaged_test_exit(struct mm_struct *mm)
|
static inline int khugepaged_test_exit(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
|
return atomic_read(&mm->mm_users) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool hugepage_vma_check(struct vm_area_struct *vma,
|
static bool hugepage_vma_check(struct vm_area_struct *vma,
|
||||||
|
17
mm/madvise.c
17
mm/madvise.c
@ -1085,23 +1085,6 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
|
|||||||
if (write) {
|
if (write) {
|
||||||
if (mmap_write_lock_killable(current->mm))
|
if (mmap_write_lock_killable(current->mm))
|
||||||
return -EINTR;
|
return -EINTR;
|
||||||
|
|
||||||
/*
|
|
||||||
* We may have stolen the mm from another process
|
|
||||||
* that is undergoing core dumping.
|
|
||||||
*
|
|
||||||
* Right now that's io_ring, in the future it may
|
|
||||||
* be remote process management and not "current"
|
|
||||||
* at all.
|
|
||||||
*
|
|
||||||
* We need to fix core dumping to not do this,
|
|
||||||
* but for now we have the mmget_still_valid()
|
|
||||||
* model.
|
|
||||||
*/
|
|
||||||
if (!mmget_still_valid(current->mm)) {
|
|
||||||
mmap_write_unlock(current->mm);
|
|
||||||
return -EINTR;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
mmap_read_lock(current->mm);
|
mmap_read_lock(current->mm);
|
||||||
}
|
}
|
||||||
|
@ -2562,7 +2562,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
|
|||||||
if (vma && (vma->vm_start <= addr))
|
if (vma && (vma->vm_start <= addr))
|
||||||
return vma;
|
return vma;
|
||||||
/* don't alter vm_end if the coredump is running */
|
/* don't alter vm_end if the coredump is running */
|
||||||
if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
|
if (!prev || expand_stack(prev, addr))
|
||||||
return NULL;
|
return NULL;
|
||||||
if (prev->vm_flags & VM_LOCKED)
|
if (prev->vm_flags & VM_LOCKED)
|
||||||
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
|
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
|
||||||
@ -2588,9 +2588,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
|
|||||||
return vma;
|
return vma;
|
||||||
if (!(vma->vm_flags & VM_GROWSDOWN))
|
if (!(vma->vm_flags & VM_GROWSDOWN))
|
||||||
return NULL;
|
return NULL;
|
||||||
/* don't alter vm_start if the coredump is running */
|
|
||||||
if (!mmget_still_valid(mm))
|
|
||||||
return NULL;
|
|
||||||
start = vma->vm_start;
|
start = vma->vm_start;
|
||||||
if (expand_stack(vma, addr))
|
if (expand_stack(vma, addr))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
Loading…
Reference in New Issue
Block a user