forked from Minki/linux
mm: Convert collapse_shmem to XArray
I found another victim of the radix tree being hard to use. Because there was no call to radix_tree_preload(), khugepaged was allocating radix_tree_nodes using GFP_ATOMIC. I also converted a local_irq_save()/restore() pair to disable()/enable(). Signed-off-by: Matthew Wilcox <willy@infradead.org>
This commit is contained in:
parent
aa5dc07f70
commit
77da9389b9
159
mm/khugepaged.c
159
mm/khugepaged.c
@ -1288,17 +1288,17 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
||||
*
|
||||
* Basic scheme is simple, details are more complex:
|
||||
* - allocate and freeze a new huge page;
|
||||
* - scan over radix tree replacing old pages the new one
|
||||
* - scan page cache replacing old pages with the new one
|
||||
* + swap in pages if necessary;
|
||||
* + fill in gaps;
|
||||
* + keep old pages around in case if rollback is required;
|
||||
* - if replacing succeed:
|
||||
* + keep old pages around in case rollback is required;
|
||||
* - if replacing succeeds:
|
||||
* + copy data over;
|
||||
* + free old pages;
|
||||
* + unfreeze huge page;
|
||||
* - if replacing failed;
|
||||
* + put all pages back and unfreeze them;
|
||||
* + restore gaps in the radix-tree;
|
||||
* + restore gaps in the page cache;
|
||||
* + free huge page;
|
||||
*/
|
||||
static void collapse_shmem(struct mm_struct *mm,
|
||||
@ -1306,12 +1306,11 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||
struct page **hpage, int node)
|
||||
{
|
||||
gfp_t gfp;
|
||||
struct page *page, *new_page, *tmp;
|
||||
struct page *new_page;
|
||||
struct mem_cgroup *memcg;
|
||||
pgoff_t index, end = start + HPAGE_PMD_NR;
|
||||
LIST_HEAD(pagelist);
|
||||
struct radix_tree_iter iter;
|
||||
void **slot;
|
||||
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
|
||||
int nr_none = 0, result = SCAN_SUCCEED;
|
||||
|
||||
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
|
||||
@ -1336,48 +1335,49 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||
__SetPageLocked(new_page);
|
||||
BUG_ON(!page_ref_freeze(new_page, 1));
|
||||
|
||||
|
||||
/*
|
||||
* At this point the new_page is 'frozen' (page_count() is zero), locked
|
||||
* and not up-to-date. It's safe to insert it into radix tree, because
|
||||
* nobody would be able to map it or use it in other way until we
|
||||
* unfreeze it.
|
||||
* At this point the new_page is 'frozen' (page_count() is zero),
|
||||
* locked and not up-to-date. It's safe to insert it into the page
|
||||
* cache, because nobody would be able to map it or use it in other
|
||||
* way until we unfreeze it.
|
||||
*/
|
||||
|
||||
index = start;
|
||||
xa_lock_irq(&mapping->i_pages);
|
||||
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
|
||||
int n = min(iter.index, end) - index;
|
||||
|
||||
/*
|
||||
* Handle holes in the radix tree: charge it from shmem and
|
||||
* insert relevant subpage of new_page into the radix-tree.
|
||||
*/
|
||||
if (n && !shmem_charge(mapping->host, n)) {
|
||||
result = SCAN_FAIL;
|
||||
/* This will be less messy when we use multi-index entries */
|
||||
do {
|
||||
xas_lock_irq(&xas);
|
||||
xas_create_range(&xas);
|
||||
if (!xas_error(&xas))
|
||||
break;
|
||||
}
|
||||
nr_none += n;
|
||||
for (; index < min(iter.index, end); index++) {
|
||||
radix_tree_insert(&mapping->i_pages, index,
|
||||
new_page + (index % HPAGE_PMD_NR));
|
||||
xas_unlock_irq(&xas);
|
||||
if (!xas_nomem(&xas, GFP_KERNEL))
|
||||
goto out;
|
||||
} while (1);
|
||||
|
||||
xas_set(&xas, start);
|
||||
for (index = start; index < end; index++) {
|
||||
struct page *page = xas_next(&xas);
|
||||
|
||||
VM_BUG_ON(index != xas.xa_index);
|
||||
if (!page) {
|
||||
if (!shmem_charge(mapping->host, 1)) {
|
||||
result = SCAN_FAIL;
|
||||
break;
|
||||
}
|
||||
xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
|
||||
nr_none++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We are done. */
|
||||
if (index >= end)
|
||||
break;
|
||||
|
||||
page = radix_tree_deref_slot_protected(slot,
|
||||
&mapping->i_pages.xa_lock);
|
||||
if (xa_is_value(page) || !PageUptodate(page)) {
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
xas_unlock_irq(&xas);
|
||||
/* swap in or instantiate fallocated page */
|
||||
if (shmem_getpage(mapping->host, index, &page,
|
||||
SGP_NOHUGE)) {
|
||||
result = SCAN_FAIL;
|
||||
goto tree_unlocked;
|
||||
goto xa_unlocked;
|
||||
}
|
||||
xa_lock_irq(&mapping->i_pages);
|
||||
xas_lock_irq(&xas);
|
||||
xas_set(&xas, index);
|
||||
} else if (trylock_page(page)) {
|
||||
get_page(page);
|
||||
} else {
|
||||
@ -1397,7 +1397,7 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||
result = SCAN_TRUNCATED;
|
||||
goto out_unlock;
|
||||
}
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
xas_unlock_irq(&xas);
|
||||
|
||||
if (isolate_lru_page(page)) {
|
||||
result = SCAN_DEL_PAGE_LRU;
|
||||
@ -1407,17 +1407,16 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||
if (page_mapped(page))
|
||||
unmap_mapping_pages(mapping, index, 1, false);
|
||||
|
||||
xa_lock_irq(&mapping->i_pages);
|
||||
xas_lock_irq(&xas);
|
||||
xas_set(&xas, index);
|
||||
|
||||
slot = radix_tree_lookup_slot(&mapping->i_pages, index);
|
||||
VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
|
||||
&mapping->i_pages.xa_lock), page);
|
||||
VM_BUG_ON_PAGE(page != xas_load(&xas), page);
|
||||
VM_BUG_ON_PAGE(page_mapped(page), page);
|
||||
|
||||
/*
|
||||
* The page is expected to have page_count() == 3:
|
||||
* - we hold a pin on it;
|
||||
* - one reference from radix tree;
|
||||
* - one reference from page cache;
|
||||
* - one from isolate_lru_page;
|
||||
*/
|
||||
if (!page_ref_freeze(page, 3)) {
|
||||
@ -1432,56 +1431,30 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||
list_add_tail(&page->lru, &pagelist);
|
||||
|
||||
/* Finally, replace with the new page. */
|
||||
radix_tree_replace_slot(&mapping->i_pages, slot,
|
||||
new_page + (index % HPAGE_PMD_NR));
|
||||
|
||||
slot = radix_tree_iter_resume(slot, &iter);
|
||||
index++;
|
||||
xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
|
||||
continue;
|
||||
out_lru:
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
xas_unlock_irq(&xas);
|
||||
putback_lru_page(page);
|
||||
out_isolate_failed:
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto tree_unlocked;
|
||||
goto xa_unlocked;
|
||||
out_unlock:
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
xas_unlock_irq(&xas);
|
||||
|
||||
/*
|
||||
* Handle hole in radix tree at the end of the range.
|
||||
* This code only triggers if there's nothing in radix tree
|
||||
* beyond 'end'.
|
||||
*/
|
||||
if (result == SCAN_SUCCEED && index < end) {
|
||||
int n = end - index;
|
||||
|
||||
if (!shmem_charge(mapping->host, n)) {
|
||||
result = SCAN_FAIL;
|
||||
goto tree_locked;
|
||||
}
|
||||
|
||||
for (; index < end; index++) {
|
||||
radix_tree_insert(&mapping->i_pages, index,
|
||||
new_page + (index % HPAGE_PMD_NR));
|
||||
}
|
||||
nr_none += n;
|
||||
}
|
||||
|
||||
tree_locked:
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
tree_unlocked:
|
||||
|
||||
xa_unlocked:
|
||||
if (result == SCAN_SUCCEED) {
|
||||
unsigned long flags;
|
||||
struct page *page, *tmp;
|
||||
struct zone *zone = page_zone(new_page);
|
||||
|
||||
/*
|
||||
* Replacing old pages with new one has succeed, now we need to
|
||||
* copy the content and free old pages.
|
||||
* Replacing old pages with new one has succeeded, now we
|
||||
* need to copy the content and free the old pages.
|
||||
*/
|
||||
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
|
||||
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
|
||||
@ -1495,16 +1468,16 @@ tree_unlocked:
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
local_irq_save(flags);
|
||||
local_irq_disable();
|
||||
__inc_node_page_state(new_page, NR_SHMEM_THPS);
|
||||
if (nr_none) {
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
|
||||
__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* Remove pte page tables, so we can re-faulti
|
||||
* Remove pte page tables, so we can re-fault
|
||||
* the page as huge.
|
||||
*/
|
||||
retract_page_tables(mapping, start);
|
||||
@ -1521,37 +1494,37 @@ tree_unlocked:
|
||||
|
||||
khugepaged_pages_collapsed++;
|
||||
} else {
|
||||
/* Something went wrong: rollback changes to the radix-tree */
|
||||
struct page *page;
|
||||
/* Something went wrong: roll back page cache changes */
|
||||
shmem_uncharge(mapping->host, nr_none);
|
||||
xa_lock_irq(&mapping->i_pages);
|
||||
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
|
||||
if (iter.index >= end)
|
||||
break;
|
||||
xas_lock_irq(&xas);
|
||||
xas_set(&xas, start);
|
||||
xas_for_each(&xas, page, end - 1) {
|
||||
page = list_first_entry_or_null(&pagelist,
|
||||
struct page, lru);
|
||||
if (!page || iter.index < page->index) {
|
||||
if (!page || xas.xa_index < page->index) {
|
||||
if (!nr_none)
|
||||
break;
|
||||
nr_none--;
|
||||
/* Put holes back where they were */
|
||||
radix_tree_delete(&mapping->i_pages, iter.index);
|
||||
xas_store(&xas, NULL);
|
||||
continue;
|
||||
}
|
||||
|
||||
VM_BUG_ON_PAGE(page->index != iter.index, page);
|
||||
VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
|
||||
|
||||
/* Unfreeze the page. */
|
||||
list_del(&page->lru);
|
||||
page_ref_unfreeze(page, 2);
|
||||
radix_tree_replace_slot(&mapping->i_pages, slot, page);
|
||||
slot = radix_tree_iter_resume(slot, &iter);
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
xas_store(&xas, page);
|
||||
xas_pause(&xas);
|
||||
xas_unlock_irq(&xas);
|
||||
putback_lru_page(page);
|
||||
unlock_page(page);
|
||||
xa_lock_irq(&mapping->i_pages);
|
||||
xas_lock_irq(&xas);
|
||||
}
|
||||
VM_BUG_ON(nr_none);
|
||||
xa_unlock_irq(&mapping->i_pages);
|
||||
xas_unlock_irq(&xas);
|
||||
|
||||
/* Unfreeze new_page, caller would take care about freeing it */
|
||||
page_ref_unfreeze(new_page, 1);
|
||||
|
Loading…
Reference in New Issue
Block a user