thp: fix deadlock in split_huge_pmd()

split_huge_pmd() tries to munlock page with munlock_vma_page().  That
requires the page to locked.

If the is locked by caller, we would get a deadlock:

	Unable to find swap-space signature
	INFO: task trinity-c85:1907 blocked for more than 120 seconds.
	      Not tainted 4.4.0-00032-gf19d0bdced41-dirty #1606
	"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
	trinity-c85     D ffff88084d997608     0  1907    309 0x00000000
	Call Trace:
	  schedule+0x9f/0x1c0
	  schedule_timeout+0x48e/0x600
	  io_schedule_timeout+0x1c3/0x390
	  bit_wait_io+0x29/0xd0
	  __wait_on_bit_lock+0x94/0x140
	  __lock_page+0x1d4/0x280
	  __split_huge_pmd+0x5a8/0x10f0
	  split_huge_pmd_address+0x1d9/0x230
	  try_to_unmap_one+0x540/0xc70
	  rmap_walk_anon+0x284/0x810
	  rmap_walk_locked+0x11e/0x190
	  try_to_unmap+0x1b1/0x4b0
	  split_huge_page_to_list+0x49d/0x18a0
	  follow_page_mask+0xa36/0xea0
	  SyS_move_pages+0xaf3/0x1570
	  entry_SYSCALL_64_fastpath+0x12/0x6b
	2 locks held by trinity-c85/1907:
	 #0:  (&mm->mmap_sem){++++++}, at:  SyS_move_pages+0x933/0x1570
	 #1:  (&anon_vma->rwsem){++++..}, at:  split_huge_page_to_list+0x402/0x18a0

I don't think the deadlock is triggerable without split_huge_page()
simplifilcation patchset.

But munlock_vma_page() here is wrong: we want to munlock the page
unconditionally, no need in rmap lookup, that munlock_vma_page() does.

Let's use clear_page_mlock() instead.  It can be called under ptl.

Fixes: e90309c9f7 ("thp: allow mlocked THP again")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Kirill A. Shutemov 2016-03-17 14:20:13 -07:00 committed by Linus Torvalds
parent fec89c109f
commit 5f7377147c

View File

@ -2981,29 +2981,20 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{ {
spinlock_t *ptl; spinlock_t *ptl;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct page *page = NULL;
unsigned long haddr = address & HPAGE_PMD_MASK; unsigned long haddr = address & HPAGE_PMD_MASK;
mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
ptl = pmd_lock(mm, pmd); ptl = pmd_lock(mm, pmd);
if (pmd_trans_huge(*pmd)) { if (pmd_trans_huge(*pmd)) {
page = pmd_page(*pmd); struct page *page = pmd_page(*pmd);
if (PageMlocked(page)) if (PageMlocked(page))
get_page(page); clear_page_mlock(page);
else
page = NULL;
} else if (!pmd_devmap(*pmd)) } else if (!pmd_devmap(*pmd))
goto out; goto out;
__split_huge_pmd_locked(vma, pmd, haddr, freeze); __split_huge_pmd_locked(vma, pmd, haddr, freeze);
out: out:
spin_unlock(ptl); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
if (page) {
lock_page(page);
munlock_vma_page(page);
unlock_page(page);
put_page(page);
}
} }
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,