mm: page cache: store only head pages in i_pages
Transparent Huge Pages are currently stored in i_pages as pointers to consecutive subpages. This patch changes that to storing consecutive pointers to the head page in preparation for storing huge pages more efficiently in i_pages. Large parts of this are "inspired" by Kirill's patch https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/ Kirill and Huang Ying contributed several fixes. [willy@infradead.org: use compound_nr, squish uninit-var warning] Link: http://lkml.kernel.org/r/20190731210400.7419-1-willy@infradead.org Signed-off-by: Matthew Wilcox <willy@infradead.org> Acked-by: Jan Kara <jack@suse.cz> Reviewed-by: Kirill Shutemov <kirill@shutemov.name> Reviewed-by: Song Liu <songliubraving@fb.com> Tested-by: Song Liu <songliubraving@fb.com> Tested-by: William Kucharski <william.kucharski@oracle.com> Reviewed-by: William Kucharski <william.kucharski@oracle.com> Tested-by: Qian Cai <cai@lca.pw> Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com> Cc: Hugh Dickins <hughd@google.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Song Liu <songliubraving@fb.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									875d91b11a
								
							
						
					
					
						commit
						4101196b19
					
				| @ -333,6 +333,16 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, | ||||
| 			mapping_gfp_mask(mapping)); | ||||
| } | ||||
| 
 | ||||
| static inline struct page *find_subpage(struct page *page, pgoff_t offset) | ||||
| { | ||||
| 	if (PageHuge(page)) | ||||
| 		return page; | ||||
| 
 | ||||
| 	VM_BUG_ON_PAGE(PageTail(page), page); | ||||
| 
 | ||||
| 	return page + (offset & (compound_nr(page) - 1)); | ||||
| } | ||||
| 
 | ||||
| struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); | ||||
| struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); | ||||
| unsigned find_get_entries(struct address_space *mapping, pgoff_t start, | ||||
|  | ||||
							
								
								
									
										147
									
								
								mm/filemap.c
									
									
									
									
									
								
							
							
						
						
									
										147
									
								
								mm/filemap.c
									
									
									
									
									
								
							| @ -281,11 +281,11 @@ EXPORT_SYMBOL(delete_from_page_cache); | ||||
|  * @pvec: pagevec with pages to delete | ||||
|  * | ||||
|  * The function walks over mapping->i_pages and removes pages passed in @pvec | ||||
|  * from the mapping. The function expects @pvec to be sorted by page index. | ||||
|  * from the mapping. The function expects @pvec to be sorted by page index | ||||
|  * and is optimised for it to be dense. | ||||
|  * It tolerates holes in @pvec (mapping entries at those indices are not | ||||
|  * modified). The function expects only THP head pages to be present in the | ||||
|  * @pvec and takes care to delete all corresponding tail pages from the | ||||
|  * mapping as well. | ||||
|  * @pvec. | ||||
|  * | ||||
|  * The function expects the i_pages lock to be held. | ||||
|  */ | ||||
| @ -294,40 +294,43 @@ static void page_cache_delete_batch(struct address_space *mapping, | ||||
| { | ||||
| 	XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index); | ||||
| 	int total_pages = 0; | ||||
| 	int i = 0, tail_pages = 0; | ||||
| 	int i = 0; | ||||
| 	struct page *page; | ||||
| 
 | ||||
| 	mapping_set_update(&xas, mapping); | ||||
| 	xas_for_each(&xas, page, ULONG_MAX) { | ||||
| 		if (i >= pagevec_count(pvec) && !tail_pages) | ||||
| 		if (i >= pagevec_count(pvec)) | ||||
| 			break; | ||||
| 
 | ||||
| 		/* A swap/dax/shadow entry got inserted? Skip it. */ | ||||
| 		if (xa_is_value(page)) | ||||
| 			continue; | ||||
| 		if (!tail_pages) { | ||||
| 			/*
 | ||||
| 			 * Some page got inserted in our range? Skip it. We | ||||
| 			 * have our pages locked so they are protected from | ||||
| 			 * being removed. | ||||
| 			 */ | ||||
| 			if (page != pvec->pages[i]) { | ||||
| 				VM_BUG_ON_PAGE(page->index > | ||||
| 						pvec->pages[i]->index, page); | ||||
| 				continue; | ||||
| 			} | ||||
| 			WARN_ON_ONCE(!PageLocked(page)); | ||||
| 			if (PageTransHuge(page) && !PageHuge(page)) | ||||
| 				tail_pages = HPAGE_PMD_NR - 1; | ||||
| 			page->mapping = NULL; | ||||
| 			/*
 | ||||
| 			 * Leave page->index set: truncation lookup relies | ||||
| 			 * upon it | ||||
| 			 */ | ||||
| 			i++; | ||||
| 		} else { | ||||
| 			VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages | ||||
| 					!= pvec->pages[i]->index, page); | ||||
| 			tail_pages--; | ||||
| 		/*
 | ||||
| 		 * A page got inserted in our range? Skip it. We have our | ||||
| 		 * pages locked so they are protected from being removed. | ||||
| 		 * If we see a page whose index is higher than ours, it | ||||
| 		 * means our page has been removed, which shouldn't be | ||||
| 		 * possible because we're holding the PageLock. | ||||
| 		 */ | ||||
| 		if (page != pvec->pages[i]) { | ||||
| 			VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index, | ||||
| 					page); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		WARN_ON_ONCE(!PageLocked(page)); | ||||
| 
 | ||||
| 		if (page->index == xas.xa_index) | ||||
| 			page->mapping = NULL; | ||||
| 		/* Leave page->index set: truncation lookup relies on it */ | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Move to the next page in the vector if this is a regular | ||||
| 		 * page or the index is of the last sub-page of this compound | ||||
| 		 * page. | ||||
| 		 */ | ||||
| 		if (page->index + compound_nr(page) - 1 == xas.xa_index) | ||||
| 			i++; | ||||
| 		xas_store(&xas, NULL); | ||||
| 		total_pages++; | ||||
| 	} | ||||
| @ -1520,7 +1523,7 @@ EXPORT_SYMBOL(page_cache_prev_miss); | ||||
| struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) | ||||
| { | ||||
| 	XA_STATE(xas, &mapping->i_pages, offset); | ||||
| 	struct page *head, *page; | ||||
| 	struct page *page; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| repeat: | ||||
| @ -1535,25 +1538,19 @@ repeat: | ||||
| 	if (!page || xa_is_value(page)) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	head = compound_head(page); | ||||
| 	if (!page_cache_get_speculative(head)) | ||||
| 	if (!page_cache_get_speculative(page)) | ||||
| 		goto repeat; | ||||
| 
 | ||||
| 	/* The page was split under us? */ | ||||
| 	if (compound_head(page) != head) { | ||||
| 		put_page(head); | ||||
| 		goto repeat; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Has the page moved? | ||||
| 	 * Has the page moved or been split? | ||||
| 	 * This is part of the lockless pagecache protocol. See | ||||
| 	 * include/linux/pagemap.h for details. | ||||
| 	 */ | ||||
| 	if (unlikely(page != xas_reload(&xas))) { | ||||
| 		put_page(head); | ||||
| 		put_page(page); | ||||
| 		goto repeat; | ||||
| 	} | ||||
| 	page = find_subpage(page, offset); | ||||
| out: | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| @ -1735,7 +1732,6 @@ unsigned find_get_entries(struct address_space *mapping, | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	xas_for_each(&xas, page, ULONG_MAX) { | ||||
| 		struct page *head; | ||||
| 		if (xas_retry(&xas, page)) | ||||
| 			continue; | ||||
| 		/*
 | ||||
| @ -1746,17 +1742,13 @@ unsigned find_get_entries(struct address_space *mapping, | ||||
| 		if (xa_is_value(page)) | ||||
| 			goto export; | ||||
| 
 | ||||
| 		head = compound_head(page); | ||||
| 		if (!page_cache_get_speculative(head)) | ||||
| 		if (!page_cache_get_speculative(page)) | ||||
| 			goto retry; | ||||
| 
 | ||||
| 		/* The page was split under us? */ | ||||
| 		if (compound_head(page) != head) | ||||
| 			goto put_page; | ||||
| 
 | ||||
| 		/* Has the page moved? */ | ||||
| 		/* Has the page moved or been split? */ | ||||
| 		if (unlikely(page != xas_reload(&xas))) | ||||
| 			goto put_page; | ||||
| 		page = find_subpage(page, xas.xa_index); | ||||
| 
 | ||||
| export: | ||||
| 		indices[ret] = xas.xa_index; | ||||
| @ -1765,7 +1757,7 @@ export: | ||||
| 			break; | ||||
| 		continue; | ||||
| put_page: | ||||
| 		put_page(head); | ||||
| 		put_page(page); | ||||
| retry: | ||||
| 		xas_reset(&xas); | ||||
| 	} | ||||
| @ -1807,33 +1799,27 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	xas_for_each(&xas, page, end) { | ||||
| 		struct page *head; | ||||
| 		if (xas_retry(&xas, page)) | ||||
| 			continue; | ||||
| 		/* Skip over shadow, swap and DAX entries */ | ||||
| 		if (xa_is_value(page)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		head = compound_head(page); | ||||
| 		if (!page_cache_get_speculative(head)) | ||||
| 		if (!page_cache_get_speculative(page)) | ||||
| 			goto retry; | ||||
| 
 | ||||
| 		/* The page was split under us? */ | ||||
| 		if (compound_head(page) != head) | ||||
| 			goto put_page; | ||||
| 
 | ||||
| 		/* Has the page moved? */ | ||||
| 		/* Has the page moved or been split? */ | ||||
| 		if (unlikely(page != xas_reload(&xas))) | ||||
| 			goto put_page; | ||||
| 
 | ||||
| 		pages[ret] = page; | ||||
| 		pages[ret] = find_subpage(page, xas.xa_index); | ||||
| 		if (++ret == nr_pages) { | ||||
| 			*start = xas.xa_index + 1; | ||||
| 			goto out; | ||||
| 		} | ||||
| 		continue; | ||||
| put_page: | ||||
| 		put_page(head); | ||||
| 		put_page(page); | ||||
| retry: | ||||
| 		xas_reset(&xas); | ||||
| 	} | ||||
| @ -1878,7 +1864,6 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	for (page = xas_load(&xas); page; page = xas_next(&xas)) { | ||||
| 		struct page *head; | ||||
| 		if (xas_retry(&xas, page)) | ||||
| 			continue; | ||||
| 		/*
 | ||||
| @ -1888,24 +1873,19 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, | ||||
| 		if (xa_is_value(page)) | ||||
| 			break; | ||||
| 
 | ||||
| 		head = compound_head(page); | ||||
| 		if (!page_cache_get_speculative(head)) | ||||
| 		if (!page_cache_get_speculative(page)) | ||||
| 			goto retry; | ||||
| 
 | ||||
| 		/* The page was split under us? */ | ||||
| 		if (compound_head(page) != head) | ||||
| 			goto put_page; | ||||
| 
 | ||||
| 		/* Has the page moved? */ | ||||
| 		/* Has the page moved or been split? */ | ||||
| 		if (unlikely(page != xas_reload(&xas))) | ||||
| 			goto put_page; | ||||
| 
 | ||||
| 		pages[ret] = page; | ||||
| 		pages[ret] = find_subpage(page, xas.xa_index); | ||||
| 		if (++ret == nr_pages) | ||||
| 			break; | ||||
| 		continue; | ||||
| put_page: | ||||
| 		put_page(head); | ||||
| 		put_page(page); | ||||
| retry: | ||||
| 		xas_reset(&xas); | ||||
| 	} | ||||
| @ -1941,7 +1921,6 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	xas_for_each_marked(&xas, page, end, tag) { | ||||
| 		struct page *head; | ||||
| 		if (xas_retry(&xas, page)) | ||||
| 			continue; | ||||
| 		/*
 | ||||
| @ -1952,26 +1931,21 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, | ||||
| 		if (xa_is_value(page)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		head = compound_head(page); | ||||
| 		if (!page_cache_get_speculative(head)) | ||||
| 		if (!page_cache_get_speculative(page)) | ||||
| 			goto retry; | ||||
| 
 | ||||
| 		/* The page was split under us? */ | ||||
| 		if (compound_head(page) != head) | ||||
| 			goto put_page; | ||||
| 
 | ||||
| 		/* Has the page moved? */ | ||||
| 		/* Has the page moved or been split? */ | ||||
| 		if (unlikely(page != xas_reload(&xas))) | ||||
| 			goto put_page; | ||||
| 
 | ||||
| 		pages[ret] = page; | ||||
| 		pages[ret] = find_subpage(page, xas.xa_index); | ||||
| 		if (++ret == nr_pages) { | ||||
| 			*index = xas.xa_index + 1; | ||||
| 			goto out; | ||||
| 		} | ||||
| 		continue; | ||||
| put_page: | ||||
| 		put_page(head); | ||||
| 		put_page(page); | ||||
| retry: | ||||
| 		xas_reset(&xas); | ||||
| 	} | ||||
| @ -2652,7 +2626,7 @@ void filemap_map_pages(struct vm_fault *vmf, | ||||
| 	pgoff_t last_pgoff = start_pgoff; | ||||
| 	unsigned long max_idx; | ||||
| 	XA_STATE(xas, &mapping->i_pages, start_pgoff); | ||||
| 	struct page *head, *page; | ||||
| 	struct page *page; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	xas_for_each(&xas, page, end_pgoff) { | ||||
| @ -2661,24 +2635,19 @@ void filemap_map_pages(struct vm_fault *vmf, | ||||
| 		if (xa_is_value(page)) | ||||
| 			goto next; | ||||
| 
 | ||||
| 		head = compound_head(page); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Check for a locked page first, as a speculative | ||||
| 		 * reference may adversely influence page migration. | ||||
| 		 */ | ||||
| 		if (PageLocked(head)) | ||||
| 		if (PageLocked(page)) | ||||
| 			goto next; | ||||
| 		if (!page_cache_get_speculative(head)) | ||||
| 		if (!page_cache_get_speculative(page)) | ||||
| 			goto next; | ||||
| 
 | ||||
| 		/* The page was split under us? */ | ||||
| 		if (compound_head(page) != head) | ||||
| 			goto skip; | ||||
| 
 | ||||
| 		/* Has the page moved? */ | ||||
| 		/* Has the page moved or been split? */ | ||||
| 		if (unlikely(page != xas_reload(&xas))) | ||||
| 			goto skip; | ||||
| 		page = find_subpage(page, xas.xa_index); | ||||
| 
 | ||||
| 		if (!PageUptodate(page) || | ||||
| 				PageReadahead(page) || | ||||
|  | ||||
| @ -2497,6 +2497,8 @@ static void __split_huge_page(struct page *page, struct list_head *list, | ||||
| 	struct page *head = compound_head(page); | ||||
| 	pg_data_t *pgdat = page_pgdat(head); | ||||
| 	struct lruvec *lruvec; | ||||
| 	struct address_space *swap_cache = NULL; | ||||
| 	unsigned long offset = 0; | ||||
| 	int i; | ||||
| 
 | ||||
| 	lruvec = mem_cgroup_page_lruvec(head, pgdat); | ||||
| @ -2504,6 +2506,14 @@ static void __split_huge_page(struct page *page, struct list_head *list, | ||||
| 	/* complete memcg works before add pages to LRU */ | ||||
| 	mem_cgroup_split_huge_fixup(head); | ||||
| 
 | ||||
| 	if (PageAnon(head) && PageSwapCache(head)) { | ||||
| 		swp_entry_t entry = { .val = page_private(head) }; | ||||
| 
 | ||||
| 		offset = swp_offset(entry); | ||||
| 		swap_cache = swap_address_space(entry); | ||||
| 		xa_lock(&swap_cache->i_pages); | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { | ||||
| 		__split_huge_page_tail(head, i, lruvec, list); | ||||
| 		/* Some pages can be beyond i_size: drop them from page cache */ | ||||
| @ -2513,6 +2523,12 @@ static void __split_huge_page(struct page *page, struct list_head *list, | ||||
| 			if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) | ||||
| 				shmem_uncharge(head->mapping->host, 1); | ||||
| 			put_page(head + i); | ||||
| 		} else if (!PageAnon(page)) { | ||||
| 			__xa_store(&head->mapping->i_pages, head[i].index, | ||||
| 					head + i, 0); | ||||
| 		} else if (swap_cache) { | ||||
| 			__xa_store(&swap_cache->i_pages, offset + i, | ||||
| 					head + i, 0); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| @ -2523,10 +2539,12 @@ static void __split_huge_page(struct page *page, struct list_head *list, | ||||
| 	/* See comment in __split_huge_page_tail() */ | ||||
| 	if (PageAnon(head)) { | ||||
| 		/* Additional pin to swap cache */ | ||||
| 		if (PageSwapCache(head)) | ||||
| 		if (PageSwapCache(head)) { | ||||
| 			page_ref_add(head, 2); | ||||
| 		else | ||||
| 			xa_unlock(&swap_cache->i_pages); | ||||
| 		} else { | ||||
| 			page_ref_inc(head); | ||||
| 		} | ||||
| 	} else { | ||||
| 		/* Additional pin to page cache */ | ||||
| 		page_ref_add(head, 2); | ||||
|  | ||||
| @ -1378,7 +1378,7 @@ static void collapse_shmem(struct mm_struct *mm, | ||||
| 				result = SCAN_FAIL; | ||||
| 				goto xa_locked; | ||||
| 			} | ||||
| 			xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); | ||||
| 			xas_store(&xas, new_page); | ||||
| 			nr_none++; | ||||
| 			continue; | ||||
| 		} | ||||
| @ -1454,7 +1454,7 @@ static void collapse_shmem(struct mm_struct *mm, | ||||
| 		list_add_tail(&page->lru, &pagelist); | ||||
| 
 | ||||
| 		/* Finally, replace with the new page. */ | ||||
| 		xas_store(&xas, new_page + (index % HPAGE_PMD_NR)); | ||||
| 		xas_store(&xas, new_page); | ||||
| 		continue; | ||||
| out_unlock: | ||||
| 		unlock_page(page); | ||||
|  | ||||
| @ -39,6 +39,7 @@ static void memfd_tag_pins(struct xa_state *xas) | ||||
| 	xas_for_each(xas, page, ULONG_MAX) { | ||||
| 		if (xa_is_value(page)) | ||||
| 			continue; | ||||
| 		page = find_subpage(page, xas->xa_index); | ||||
| 		if (page_count(page) - page_mapcount(page) > 1) | ||||
| 			xas_set_mark(xas, MEMFD_TAG_PINNED); | ||||
| 
 | ||||
| @ -88,6 +89,7 @@ static int memfd_wait_for_pins(struct address_space *mapping) | ||||
| 			bool clear = true; | ||||
| 			if (xa_is_value(page)) | ||||
| 				continue; | ||||
| 			page = find_subpage(page, xas.xa_index); | ||||
| 			if (page_count(page) - page_mapcount(page) != 1) { | ||||
| 				/*
 | ||||
| 				 * On the last scan, we clean up all those tags | ||||
|  | ||||
| @ -460,7 +460,7 @@ int migrate_page_move_mapping(struct address_space *mapping, | ||||
| 
 | ||||
| 		for (i = 1; i < HPAGE_PMD_NR; i++) { | ||||
| 			xas_next(&xas); | ||||
| 			xas_store(&xas, newpage + i); | ||||
| 			xas_store(&xas, newpage); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|  | ||||
| @ -631,7 +631,7 @@ static int shmem_add_to_page_cache(struct page *page, | ||||
| 		if (xas_error(&xas)) | ||||
| 			goto unlock; | ||||
| next: | ||||
| 		xas_store(&xas, page + i); | ||||
| 		xas_store(&xas, page); | ||||
| 		if (++i < nr) { | ||||
| 			xas_next(&xas); | ||||
| 			goto next; | ||||
|  | ||||
| @ -133,7 +133,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp) | ||||
| 		for (i = 0; i < nr; i++) { | ||||
| 			VM_BUG_ON_PAGE(xas.xa_index != idx + i, page); | ||||
| 			set_page_private(page + i, entry.val + i); | ||||
| 			xas_store(&xas, page + i); | ||||
| 			xas_store(&xas, page); | ||||
| 			xas_next(&xas); | ||||
| 		} | ||||
| 		address_space->nrpages += nr; | ||||
| @ -168,7 +168,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry) | ||||
| 
 | ||||
| 	for (i = 0; i < nr; i++) { | ||||
| 		void *entry = xas_store(&xas, NULL); | ||||
| 		VM_BUG_ON_PAGE(entry != page + i, entry); | ||||
| 		VM_BUG_ON_PAGE(entry != page, entry); | ||||
| 		set_page_private(page + i, 0); | ||||
| 		xas_next(&xas); | ||||
| 	} | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user