mm/swap: allocate swap slots in batches
Currently, the swap slots are allocated one page at a time, causing contention to the swap_info lock protecting the swap partition on every page being swapped. This patch adds new functions get_swap_pages and scan_swap_map_slots to request multiple swap slots at once. This will reduces the lock contention on the swap_info lock. Also scan_swap_map_slots can operate more efficiently as swap slots often occurs in clusters close to each other on a swap device and it is quicker to allocate them together. Link: http://lkml.kernel.org/r/9fec2845544371f62c3763d43510045e33d286a6.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Cc: Aaron Lu <aaron.lu@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jonathan Corbet <corbet@lwn.net> escreveu: Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Shaohua Li <shli@kernel.org> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
e8c26ab605
commit
36005bae20
@ -27,6 +27,7 @@ struct bio;
|
||||
#define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
|
||||
SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
|
||||
SWAP_FLAG_DISCARD_PAGES)
|
||||
#define SWAP_BATCH 64
|
||||
|
||||
static inline int current_is_kswapd(void)
|
||||
{
|
||||
@ -386,6 +387,7 @@ static inline long get_nr_swap_pages(void)
|
||||
extern void si_swapinfo(struct sysinfo *);
|
||||
extern swp_entry_t get_swap_page(void);
|
||||
extern swp_entry_t get_swap_page_of_type(int);
|
||||
extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
|
||||
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
|
||||
extern void swap_shmem_alloc(swp_entry_t);
|
||||
extern int swap_duplicate(swp_entry_t);
|
||||
|
136
mm/swapfile.c
136
mm/swapfile.c
@ -496,7 +496,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
|
||||
* Try to get a swap entry from current cpu's swap entry pool (a cluster). This
|
||||
* might involve allocating a new cluster for current CPU too.
|
||||
*/
|
||||
static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
|
||||
static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
|
||||
unsigned long *offset, unsigned long *scan_base)
|
||||
{
|
||||
struct percpu_cluster *cluster;
|
||||
@ -520,7 +520,7 @@ new_cluster:
|
||||
*scan_base = *offset = si->cluster_next;
|
||||
goto new_cluster;
|
||||
} else
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
found_free = false;
|
||||
@ -552,16 +552,22 @@ new_cluster:
|
||||
cluster->next = tmp + 1;
|
||||
*offset = tmp;
|
||||
*scan_base = tmp;
|
||||
return found_free;
|
||||
}
|
||||
|
||||
static unsigned long scan_swap_map(struct swap_info_struct *si,
|
||||
unsigned char usage)
|
||||
static int scan_swap_map_slots(struct swap_info_struct *si,
|
||||
unsigned char usage, int nr,
|
||||
swp_entry_t slots[])
|
||||
{
|
||||
struct swap_cluster_info *ci;
|
||||
unsigned long offset;
|
||||
unsigned long scan_base;
|
||||
unsigned long last_in_cluster = 0;
|
||||
int latency_ration = LATENCY_LIMIT;
|
||||
int n_ret = 0;
|
||||
|
||||
if (nr > SWAP_BATCH)
|
||||
nr = SWAP_BATCH;
|
||||
|
||||
/*
|
||||
* We try to cluster swap pages by allocating them sequentially
|
||||
@ -579,8 +585,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
|
||||
|
||||
/* SSD algorithm */
|
||||
if (si->cluster_info) {
|
||||
scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
|
||||
goto checks;
|
||||
if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
|
||||
goto checks;
|
||||
else
|
||||
goto scan;
|
||||
}
|
||||
|
||||
if (unlikely(!si->cluster_nr--)) {
|
||||
@ -624,8 +632,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
|
||||
|
||||
checks:
|
||||
if (si->cluster_info) {
|
||||
while (scan_swap_map_ssd_cluster_conflict(si, offset))
|
||||
scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
|
||||
while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
|
||||
/* take a break if we already got some slots */
|
||||
if (n_ret)
|
||||
goto done;
|
||||
if (!scan_swap_map_try_ssd_cluster(si, &offset,
|
||||
&scan_base))
|
||||
goto scan;
|
||||
}
|
||||
}
|
||||
if (!(si->flags & SWP_WRITEOK))
|
||||
goto no_page;
|
||||
@ -650,7 +664,10 @@ checks:
|
||||
|
||||
if (si->swap_map[offset]) {
|
||||
unlock_cluster(ci);
|
||||
goto scan;
|
||||
if (!n_ret)
|
||||
goto scan;
|
||||
else
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (offset == si->lowest_bit)
|
||||
@ -669,9 +686,43 @@ checks:
|
||||
inc_cluster_info_page(si, si->cluster_info, offset);
|
||||
unlock_cluster(ci);
|
||||
si->cluster_next = offset + 1;
|
||||
si->flags -= SWP_SCANNING;
|
||||
slots[n_ret++] = swp_entry(si->type, offset);
|
||||
|
||||
return offset;
|
||||
/* got enough slots or reach max slots? */
|
||||
if ((n_ret == nr) || (offset >= si->highest_bit))
|
||||
goto done;
|
||||
|
||||
/* search for next available slot */
|
||||
|
||||
/* time to take a break? */
|
||||
if (unlikely(--latency_ration < 0)) {
|
||||
if (n_ret)
|
||||
goto done;
|
||||
spin_unlock(&si->lock);
|
||||
cond_resched();
|
||||
spin_lock(&si->lock);
|
||||
latency_ration = LATENCY_LIMIT;
|
||||
}
|
||||
|
||||
/* try to get more slots in cluster */
|
||||
if (si->cluster_info) {
|
||||
if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
|
||||
goto checks;
|
||||
else
|
||||
goto done;
|
||||
}
|
||||
/* non-ssd case */
|
||||
++offset;
|
||||
|
||||
/* non-ssd case, still more slots in cluster? */
|
||||
if (si->cluster_nr && !si->swap_map[offset]) {
|
||||
--si->cluster_nr;
|
||||
goto checks;
|
||||
}
|
||||
|
||||
done:
|
||||
si->flags -= SWP_SCANNING;
|
||||
return n_ret;
|
||||
|
||||
scan:
|
||||
spin_unlock(&si->lock);
|
||||
@ -709,17 +760,41 @@ scan:
|
||||
|
||||
no_page:
|
||||
si->flags -= SWP_SCANNING;
|
||||
return 0;
|
||||
return n_ret;
|
||||
}
|
||||
|
||||
swp_entry_t get_swap_page(void)
|
||||
static unsigned long scan_swap_map(struct swap_info_struct *si,
|
||||
unsigned char usage)
|
||||
{
|
||||
swp_entry_t entry;
|
||||
int n_ret;
|
||||
|
||||
n_ret = scan_swap_map_slots(si, usage, 1, &entry);
|
||||
|
||||
if (n_ret)
|
||||
return swp_offset(entry);
|
||||
else
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
|
||||
{
|
||||
struct swap_info_struct *si, *next;
|
||||
pgoff_t offset;
|
||||
long avail_pgs;
|
||||
int n_ret = 0;
|
||||
|
||||
if (atomic_long_read(&nr_swap_pages) <= 0)
|
||||
avail_pgs = atomic_long_read(&nr_swap_pages);
|
||||
if (avail_pgs <= 0)
|
||||
goto noswap;
|
||||
atomic_long_dec(&nr_swap_pages);
|
||||
|
||||
if (n_goal > SWAP_BATCH)
|
||||
n_goal = SWAP_BATCH;
|
||||
|
||||
if (n_goal > avail_pgs)
|
||||
n_goal = avail_pgs;
|
||||
|
||||
atomic_long_sub(n_goal, &nr_swap_pages);
|
||||
|
||||
spin_lock(&swap_avail_lock);
|
||||
|
||||
@ -745,14 +820,14 @@ start_over:
|
||||
spin_unlock(&si->lock);
|
||||
goto nextsi;
|
||||
}
|
||||
|
||||
/* This is called for allocating swap entry for cache */
|
||||
offset = scan_swap_map(si, SWAP_HAS_CACHE);
|
||||
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
|
||||
n_goal, swp_entries);
|
||||
spin_unlock(&si->lock);
|
||||
if (offset)
|
||||
return swp_entry(si->type, offset);
|
||||
if (n_ret)
|
||||
goto check_out;
|
||||
pr_debug("scan_swap_map of si %d failed to find offset\n",
|
||||
si->type);
|
||||
si->type);
|
||||
|
||||
spin_lock(&swap_avail_lock);
|
||||
nextsi:
|
||||
/*
|
||||
@ -763,7 +838,8 @@ nextsi:
|
||||
* up between us dropping swap_avail_lock and taking si->lock.
|
||||
* Since we dropped the swap_avail_lock, the swap_avail_head
|
||||
* list may have been modified; so if next is still in the
|
||||
* swap_avail_head list then try it, otherwise start over.
|
||||
* swap_avail_head list then try it, otherwise start over
|
||||
* if we have not gotten any slots.
|
||||
*/
|
||||
if (plist_node_empty(&next->avail_list))
|
||||
goto start_over;
|
||||
@ -771,9 +847,19 @@ nextsi:
|
||||
|
||||
spin_unlock(&swap_avail_lock);
|
||||
|
||||
atomic_long_inc(&nr_swap_pages);
|
||||
check_out:
|
||||
if (n_ret < n_goal)
|
||||
atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
|
||||
noswap:
|
||||
return (swp_entry_t) {0};
|
||||
return n_ret;
|
||||
}
|
||||
|
||||
swp_entry_t get_swap_page(void)
|
||||
{
|
||||
swp_entry_t entry;
|
||||
|
||||
get_swap_pages(1, &entry);
|
||||
return entry;
|
||||
}
|
||||
|
||||
/* The only caller of this function is now suspend routine */
|
||||
|
Loading…
Reference in New Issue
Block a user