mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
c7cdff0e86
fill_balloon doing memory allocations under balloon_lock can cause a deadlock when leak_balloon is called from virtballoon_oom_notify and tries to take same lock. To fix, split page allocation and enqueue and do allocations outside the lock. Here's a detailed analysis of the deadlock by Tetsuo Handa: In leak_balloon(), mutex_lock(&vb->balloon_lock) is called in order to serialize against fill_balloon(). But in fill_balloon(), alloc_page(GFP_HIGHUSER[_MOVABLE] | __GFP_NOMEMALLOC | __GFP_NORETRY) is called with vb->balloon_lock mutex held. Since GFP_HIGHUSER[_MOVABLE] implies __GFP_DIRECT_RECLAIM | __GFP_IO | __GFP_FS, despite __GFP_NORETRY is specified, this allocation attempt might indirectly depend on somebody else's __GFP_DIRECT_RECLAIM memory allocation. And such indirect __GFP_DIRECT_RECLAIM memory allocation might call leak_balloon() via virtballoon_oom_notify() via blocking_notifier_call_chain() callback via out_of_memory() when it reached __alloc_pages_may_oom() and held oom_lock mutex. Since vb->balloon_lock mutex is already held by fill_balloon(), it will cause OOM lockup. Thread1 Thread2 fill_balloon() takes a balloon_lock balloon_page_enqueue() alloc_page(GFP_HIGHUSER_MOVABLE) direct reclaim (__GFP_FS context) takes a fs lock waits for that fs lock alloc_page(GFP_NOFS) __alloc_pages_may_oom() takes the oom_lock out_of_memory() blocking_notifier_call_chain() leak_balloon() tries to take that balloon_lock and deadlocks Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: Michal Hocko <mhocko@suse.com> Cc: Wei Wang <wei.w.wang@intel.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
178 lines
5.5 KiB
C
178 lines
5.5 KiB
C
/*
|
|
* mm/balloon_compaction.c
|
|
*
|
|
* Common interface for making balloon pages movable by compaction.
|
|
*
|
|
* Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com>
|
|
*/
|
|
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/export.h>
|
|
#include <linux/balloon_compaction.h>
|
|
|
|
/*
|
|
* balloon_page_alloc - allocates a new page for insertion into the balloon
|
|
* page list.
|
|
*
|
|
* Driver must call it to properly allocate a new enlisted balloon page.
|
|
* Driver must call balloon_page_enqueue before definitively removing it from
|
|
* the guest system. This function returns the page address for the recently
|
|
* allocated page or NULL in the case we fail to allocate a new page this turn.
|
|
*/
|
|
struct page *balloon_page_alloc(void)
|
|
{
|
|
struct page *page = alloc_page(balloon_mapping_gfp_mask() |
|
|
__GFP_NOMEMALLOC | __GFP_NORETRY);
|
|
return page;
|
|
}
|
|
EXPORT_SYMBOL_GPL(balloon_page_alloc);
|
|
|
|
/*
|
|
* balloon_page_enqueue - allocates a new page and inserts it into the balloon
|
|
* page list.
|
|
* @b_dev_info: balloon device descriptor where we will insert a new page to
|
|
* @page: new page to enqueue - allocated using balloon_page_alloc.
|
|
*
|
|
* Driver must call it to properly enqueue a new allocated balloon page
|
|
* before definitively removing it from the guest system.
|
|
* This function returns the page address for the recently enqueued page or
|
|
* NULL in the case we fail to allocate a new page this turn.
|
|
*/
|
|
void balloon_page_enqueue(struct balloon_dev_info *b_dev_info,
|
|
struct page *page)
|
|
{
|
|
unsigned long flags;
|
|
|
|
/*
|
|
* Block others from accessing the 'page' when we get around to
|
|
* establishing additional references. We should be the only one
|
|
* holding a reference to the 'page' at this point.
|
|
*/
|
|
BUG_ON(!trylock_page(page));
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
balloon_page_insert(b_dev_info, page);
|
|
__count_vm_event(BALLOON_INFLATE);
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
unlock_page(page);
|
|
}
|
|
EXPORT_SYMBOL_GPL(balloon_page_enqueue);
|
|
|
|
/*
|
|
* balloon_page_dequeue - removes a page from balloon's page list and returns
|
|
* the its address to allow the driver release the page.
|
|
* @b_dev_info: balloon device decriptor where we will grab a page from.
|
|
*
|
|
* Driver must call it to properly de-allocate a previous enlisted balloon page
|
|
* before definetively releasing it back to the guest system.
|
|
* This function returns the page address for the recently dequeued page or
|
|
* NULL in the case we find balloon's page list temporarily empty due to
|
|
* compaction isolated pages.
|
|
*/
|
|
struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
|
|
{
|
|
struct page *page, *tmp;
|
|
unsigned long flags;
|
|
bool dequeued_page;
|
|
|
|
dequeued_page = false;
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) {
|
|
/*
|
|
* Block others from accessing the 'page' while we get around
|
|
* establishing additional references and preparing the 'page'
|
|
* to be released by the balloon driver.
|
|
*/
|
|
if (trylock_page(page)) {
|
|
#ifdef CONFIG_BALLOON_COMPACTION
|
|
if (PageIsolated(page)) {
|
|
/* raced with isolation */
|
|
unlock_page(page);
|
|
continue;
|
|
}
|
|
#endif
|
|
balloon_page_delete(page);
|
|
__count_vm_event(BALLOON_DEFLATE);
|
|
unlock_page(page);
|
|
dequeued_page = true;
|
|
break;
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
|
|
if (!dequeued_page) {
|
|
/*
|
|
* If we are unable to dequeue a balloon page because the page
|
|
* list is empty and there is no isolated pages, then something
|
|
* went out of track and some balloon pages are lost.
|
|
* BUG() here, otherwise the balloon driver may get stuck into
|
|
* an infinite loop while attempting to release all its pages.
|
|
*/
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
if (unlikely(list_empty(&b_dev_info->pages) &&
|
|
!b_dev_info->isolated_pages))
|
|
BUG();
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
page = NULL;
|
|
}
|
|
return page;
|
|
}
|
|
EXPORT_SYMBOL_GPL(balloon_page_dequeue);
|
|
|
|
#ifdef CONFIG_BALLOON_COMPACTION
|
|
|
|
bool balloon_page_isolate(struct page *page, isolate_mode_t mode)
|
|
|
|
{
|
|
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
list_del(&page->lru);
|
|
b_dev_info->isolated_pages++;
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
|
|
return true;
|
|
}
|
|
|
|
void balloon_page_putback(struct page *page)
|
|
{
|
|
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
list_add(&page->lru, &b_dev_info->pages);
|
|
b_dev_info->isolated_pages--;
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
}
|
|
|
|
|
|
/* move_to_new_page() counterpart for a ballooned page */
|
|
int balloon_page_migrate(struct address_space *mapping,
|
|
struct page *newpage, struct page *page,
|
|
enum migrate_mode mode)
|
|
{
|
|
struct balloon_dev_info *balloon = balloon_page_device(page);
|
|
|
|
/*
|
|
* We can not easily support the no copy case here so ignore it as it
|
|
* is unlikely to be use with ballon pages. See include/linux/hmm.h for
|
|
* user of the MIGRATE_SYNC_NO_COPY mode.
|
|
*/
|
|
if (mode == MIGRATE_SYNC_NO_COPY)
|
|
return -EINVAL;
|
|
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
|
|
|
|
return balloon->migratepage(balloon, newpage, page, mode);
|
|
}
|
|
|
|
const struct address_space_operations balloon_aops = {
|
|
.migratepage = balloon_page_migrate,
|
|
.isolate_page = balloon_page_isolate,
|
|
.putback_page = balloon_page_putback,
|
|
};
|
|
EXPORT_SYMBOL_GPL(balloon_aops);
|
|
|
|
#endif /* CONFIG_BALLOON_COMPACTION */
|