mm, swap: skip swapcache for swapin of synchronous device

With fast swap storage, the platforms want to use swap more aggressively
and swap-in is crucial to application latency.

The rw_page() based synchronous devices like zram, pmem and btt are such
fast storage.  When I profile swapin performance with zram lz4
decompress test, S/W overhead is more than 70%.  Maybe, it would be
bigger in nvdimm.

This patch aims to reduce swap-in latency by skipping swapcache if the
swap device is synchronous device like rw_page based device.  It
enhances 45% my swapin test(5G sequential swapin, no readahead, from
2.41sec to 1.64sec).

Link: http://lkml.kernel.org/r/1505886205-9671-5-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Minchan Kim 2017-11-15 17:33:07 -08:00 committed by Linus Torvalds
parent 539a6fea7f
commit 0bcac06f27
4 changed files with 57 additions and 23 deletions

View File

@ -466,6 +466,7 @@ extern int page_swapcount(struct page *);
extern int __swp_swapcount(swp_entry_t entry); extern int __swp_swapcount(swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *); extern struct swap_info_struct *page_swap_info(struct page *);
extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
extern bool reuse_swap_page(struct page *, int *); extern bool reuse_swap_page(struct page *, int *);
extern int try_to_free_swap(struct page *); extern int try_to_free_swap(struct page *);
struct backing_dev_info; struct backing_dev_info;
@ -474,6 +475,16 @@ extern void exit_swap_address_space(unsigned int type);
#else /* CONFIG_SWAP */ #else /* CONFIG_SWAP */
static inline int swap_readpage(struct page *page, bool do_poll)
{
return 0;
}
static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
{
return NULL;
}
#define swap_address_space(entry) (NULL) #define swap_address_space(entry) (NULL)
#define get_nr_swap_pages() 0L #define get_nr_swap_pages() 0L
#define total_swap_pages 0L #define total_swap_pages 0L

View File

@ -2842,7 +2842,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
int do_swap_page(struct vm_fault *vmf) int do_swap_page(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
struct page *page = NULL, *swapcache; struct page *page = NULL, *swapcache = NULL;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
struct vma_swap_readahead swap_ra; struct vma_swap_readahead swap_ra;
swp_entry_t entry; swp_entry_t entry;
@ -2881,17 +2881,35 @@ int do_swap_page(struct vm_fault *vmf)
} }
goto out; goto out;
} }
delayacct_set_flag(DELAYACCT_PF_SWAPIN); delayacct_set_flag(DELAYACCT_PF_SWAPIN);
if (!page) if (!page)
page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
vmf->address); vmf->address);
if (!page) { if (!page) {
if (vma_readahead) struct swap_info_struct *si = swp_swap_info(entry);
page = do_swap_page_readahead(entry,
GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); if (!(si->flags & SWP_SYNCHRONOUS_IO)) {
else if (vma_readahead)
page = swapin_readahead(entry, page = do_swap_page_readahead(entry,
GFP_HIGHUSER_MOVABLE, vma, vmf->address); GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
else
page = swapin_readahead(entry,
GFP_HIGHUSER_MOVABLE, vma, vmf->address);
swapcache = page;
} else {
/* skip swapcache */
page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
if (page) {
__SetPageLocked(page);
__SetPageSwapBacked(page);
set_page_private(page, entry.val);
lru_cache_add_anon(page);
swap_readpage(page, true);
}
}
if (!page) { if (!page) {
/* /*
* Back out if somebody else faulted in this pte * Back out if somebody else faulted in this pte
@ -2920,7 +2938,6 @@ int do_swap_page(struct vm_fault *vmf)
goto out_release; goto out_release;
} }
swapcache = page;
locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
delayacct_clear_flag(DELAYACCT_PF_SWAPIN); delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@ -2935,7 +2952,8 @@ int do_swap_page(struct vm_fault *vmf)
* test below, are not enough to exclude that. Even if it is still * test below, are not enough to exclude that. Even if it is still
* swapcache, we need to check that the page's swap has not changed. * swapcache, we need to check that the page's swap has not changed.
*/ */
if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) if (unlikely((!PageSwapCache(page) ||
page_private(page) != entry.val)) && swapcache)
goto out_page; goto out_page;
page = ksm_might_need_to_copy(page, vma, vmf->address); page = ksm_might_need_to_copy(page, vma, vmf->address);
@ -2988,14 +3006,16 @@ int do_swap_page(struct vm_fault *vmf)
pte = pte_mksoft_dirty(pte); pte = pte_mksoft_dirty(pte);
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
vmf->orig_pte = pte; vmf->orig_pte = pte;
if (page == swapcache) {
do_page_add_anon_rmap(page, vma, vmf->address, exclusive); /* ksm created a completely new copy */
mem_cgroup_commit_charge(page, memcg, true, false); if (unlikely(page != swapcache && swapcache)) {
activate_page(page);
} else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, vmf->address, false); page_add_new_anon_rmap(page, vma, vmf->address, false);
mem_cgroup_commit_charge(page, memcg, false, false); mem_cgroup_commit_charge(page, memcg, false, false);
lru_cache_add_active_or_unevictable(page, vma); lru_cache_add_active_or_unevictable(page, vma);
} else {
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
mem_cgroup_commit_charge(page, memcg, true, false);
activate_page(page);
} }
swap_free(entry); swap_free(entry);
@ -3003,7 +3023,7 @@ int do_swap_page(struct vm_fault *vmf)
(vma->vm_flags & VM_LOCKED) || PageMlocked(page)) (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
try_to_free_swap(page); try_to_free_swap(page);
unlock_page(page); unlock_page(page);
if (page != swapcache) { if (page != swapcache && swapcache) {
/* /*
* Hold the lock to avoid the swap entry to be reused * Hold the lock to avoid the swap entry to be reused
* until we take the PT lock for the pte_same() check * until we take the PT lock for the pte_same() check
@ -3036,7 +3056,7 @@ out_page:
unlock_page(page); unlock_page(page);
out_release: out_release:
put_page(page); put_page(page);
if (page != swapcache) { if (page != swapcache && swapcache) {
unlock_page(swapcache); unlock_page(swapcache);
put_page(swapcache); put_page(swapcache);
} }

View File

@ -347,7 +347,7 @@ out:
return ret; return ret;
} }
int swap_readpage(struct page *page, bool do_poll) int swap_readpage(struct page *page, bool synchronous)
{ {
struct bio *bio; struct bio *bio;
int ret = 0; int ret = 0;
@ -355,7 +355,7 @@ int swap_readpage(struct page *page, bool do_poll)
blk_qc_t qc; blk_qc_t qc;
struct gendisk *disk; struct gendisk *disk;
VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageUptodate(page), page); VM_BUG_ON_PAGE(PageUptodate(page), page);
if (frontswap_load(page) == 0) { if (frontswap_load(page) == 0) {
@ -403,7 +403,7 @@ int swap_readpage(struct page *page, bool do_poll)
count_vm_event(PSWPIN); count_vm_event(PSWPIN);
bio_get(bio); bio_get(bio);
qc = submit_bio(bio); qc = submit_bio(bio);
while (do_poll) { while (synchronous) {
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(bio->bi_private)) if (!READ_ONCE(bio->bi_private))
break; break;

View File

@ -3455,10 +3455,15 @@ int swapcache_prepare(swp_entry_t entry)
return __swap_duplicate(entry, SWAP_HAS_CACHE); return __swap_duplicate(entry, SWAP_HAS_CACHE);
} }
struct swap_info_struct *swp_swap_info(swp_entry_t entry)
{
return swap_info[swp_type(entry)];
}
struct swap_info_struct *page_swap_info(struct page *page) struct swap_info_struct *page_swap_info(struct page *page)
{ {
swp_entry_t swap = { .val = page_private(page) }; swp_entry_t entry = { .val = page_private(page) };
return swap_info[swp_type(swap)]; return swp_swap_info(entry);
} }
/* /*
@ -3466,7 +3471,6 @@ struct swap_info_struct *page_swap_info(struct page *page)
*/ */
struct address_space *__page_file_mapping(struct page *page) struct address_space *__page_file_mapping(struct page *page)
{ {
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
return page_swap_info(page)->swap_file->f_mapping; return page_swap_info(page)->swap_file->f_mapping;
} }
EXPORT_SYMBOL_GPL(__page_file_mapping); EXPORT_SYMBOL_GPL(__page_file_mapping);
@ -3474,7 +3478,6 @@ EXPORT_SYMBOL_GPL(__page_file_mapping);
pgoff_t __page_file_index(struct page *page) pgoff_t __page_file_index(struct page *page)
{ {
swp_entry_t swap = { .val = page_private(page) }; swp_entry_t swap = { .val = page_private(page) };
VM_BUG_ON_PAGE(!PageSwapCache(page), page);
return swp_offset(swap); return swp_offset(swap);
} }
EXPORT_SYMBOL_GPL(__page_file_index); EXPORT_SYMBOL_GPL(__page_file_index);