mirror of
https://github.com/torvalds/linux.git
synced 2024-11-24 13:11:40 +00:00
17 hotfixes. 9 are cc:stable. 13 are MM and 4 are non-MM.
The usual collection of singletons - please see the changelogs. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZyfGDAAKCRDdBJ7gKXxA jr19AQD6bfDF/6L2Alq1QG26pgrgccEbKzDSzR6pBajwCbdrNQD/XPhiv3zRJfGf lgt0Qkqwe/ApBhVYUnL8y1CePv3EDgA= =W5W0 -----END PGP SIGNATURE----- Merge tag 'mm-hotfixes-stable-2024-11-03-10-50' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull misc fixes from Andrew Morton: "17 hotfixes. 9 are cc:stable. 13 are MM and 4 are non-MM. The usual collection of singletons - please see the changelogs" * tag 'mm-hotfixes-stable-2024-11-03-10-50' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: mm: multi-gen LRU: use {ptep,pmdp}_clear_young_notify() mm: multi-gen LRU: remove MM_LEAF_OLD and MM_NONLEAF_TOTAL stats mm, mmap: limit THP alignment of anonymous mappings to PMD-aligned sizes mm: shrinker: avoid memleak in alloc_shrinker_info .mailmap: update e-mail address for Eugen Hristev vmscan,migrate: fix page count imbalance on node stats when demoting pages mailmap: update Jarkko's email addresses mm: allow set/clear page_type again nilfs2: fix potential deadlock with newly created symlinks Squashfs: fix variable overflow in squashfs_readpage_block kasan: remove vmalloc_percpu test tools/mm: -Werror fixes in page-types/slabinfo mm, swap: avoid over reclaim of full clusters mm: fix PSWPIN counter for large folios swap-in mm: avoid VM_BUG_ON when try to map an anon large folio to zero page. mm/codetag: fix null pointer check logic for ref and tag mm/gup: stop leaking pinned pages in low memory conditions
This commit is contained in:
commit
a8cc743272
5
.mailmap
5
.mailmap
@ -199,7 +199,8 @@ Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
|
||||
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
|
||||
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
|
||||
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
|
||||
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com>
|
||||
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com>
|
||||
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
|
||||
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
|
||||
Faith Ekstrand <faith.ekstrand@collabora.com> <jason@jlekstrand.net>
|
||||
@ -282,7 +283,7 @@ Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
|
||||
Jan Kuliga <jtkuliga.kdev@gmail.com> <jankul@alatek.krakow.pl>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@tuni.fi>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@parity.io>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
|
||||
|
@ -157,6 +157,9 @@ static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
||||
/* slow symlink */
|
||||
inode->i_op = &nilfs_symlink_inode_operations;
|
||||
inode_nohighmem(inode);
|
||||
mapping_set_gfp_mask(inode->i_mapping,
|
||||
mapping_gfp_constraint(inode->i_mapping,
|
||||
~__GFP_FS));
|
||||
inode->i_mapping->a_ops = &nilfs_aops;
|
||||
err = page_symlink(inode, symname, l);
|
||||
if (err)
|
||||
|
@ -30,7 +30,8 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
|
||||
int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
|
||||
loff_t start_index = folio->index & ~mask;
|
||||
loff_t end_index = start_index | mask;
|
||||
int i, n, pages, bytes, res = -ENOMEM;
|
||||
loff_t index;
|
||||
int i, pages, bytes, res = -ENOMEM;
|
||||
struct page **page, *last_page;
|
||||
struct squashfs_page_actor *actor;
|
||||
void *pageaddr;
|
||||
@ -45,9 +46,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
|
||||
return res;
|
||||
|
||||
/* Try to grab all the pages covered by the Squashfs block */
|
||||
for (i = 0, n = start_index; n <= end_index; n++) {
|
||||
page[i] = (n == folio->index) ? target_page :
|
||||
grab_cache_page_nowait(target_page->mapping, n);
|
||||
for (i = 0, index = start_index; index <= end_index; index++) {
|
||||
page[i] = (index == folio->index) ? target_page :
|
||||
grab_cache_page_nowait(target_page->mapping, index);
|
||||
|
||||
if (page[i] == NULL)
|
||||
continue;
|
||||
|
@ -135,18 +135,21 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
|
||||
#endif
|
||||
|
||||
/* Caller should verify both ref and tag to be valid */
|
||||
static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
|
||||
static inline bool __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
|
||||
{
|
||||
alloc_tag_add_check(ref, tag);
|
||||
if (!ref || !tag)
|
||||
return;
|
||||
return false;
|
||||
|
||||
ref->ct = &tag->ct;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
|
||||
static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
|
||||
{
|
||||
__alloc_tag_ref_set(ref, tag);
|
||||
if (unlikely(!__alloc_tag_ref_set(ref, tag)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* We need in increment the call counter every time we have a new
|
||||
* allocation or when we split a large allocation into smaller ones.
|
||||
@ -154,12 +157,13 @@ static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *t
|
||||
* counter because when we free each part the counter will be decremented.
|
||||
*/
|
||||
this_cpu_inc(tag->counters->calls);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
|
||||
{
|
||||
alloc_tag_ref_set(ref, tag);
|
||||
this_cpu_add(tag->counters->bytes, bytes);
|
||||
if (likely(alloc_tag_ref_set(ref, tag)))
|
||||
this_cpu_add(tag->counters->bytes, bytes);
|
||||
}
|
||||
|
||||
static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
|
||||
|
@ -458,9 +458,7 @@ struct lru_gen_folio {
|
||||
|
||||
enum {
|
||||
MM_LEAF_TOTAL, /* total leaf entries */
|
||||
MM_LEAF_OLD, /* old leaf entries */
|
||||
MM_LEAF_YOUNG, /* young leaf entries */
|
||||
MM_NONLEAF_TOTAL, /* total non-leaf entries */
|
||||
MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */
|
||||
MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */
|
||||
NR_MM_STATS
|
||||
@ -557,7 +555,7 @@ struct lru_gen_memcg {
|
||||
|
||||
void lru_gen_init_pgdat(struct pglist_data *pgdat);
|
||||
void lru_gen_init_lruvec(struct lruvec *lruvec);
|
||||
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
|
||||
bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
|
||||
|
||||
void lru_gen_init_memcg(struct mem_cgroup *memcg);
|
||||
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
|
||||
@ -576,8 +574,9 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
|
@ -975,12 +975,16 @@ static __always_inline bool folio_test_##fname(const struct folio *folio) \
|
||||
} \
|
||||
static __always_inline void __folio_set_##fname(struct folio *folio) \
|
||||
{ \
|
||||
if (folio_test_##fname(folio)) \
|
||||
return; \
|
||||
VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \
|
||||
folio); \
|
||||
folio->page.page_type = (unsigned int)PGTY_##lname << 24; \
|
||||
} \
|
||||
static __always_inline void __folio_clear_##fname(struct folio *folio) \
|
||||
{ \
|
||||
if (folio->page.page_type == UINT_MAX) \
|
||||
return; \
|
||||
VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
|
||||
folio->page.page_type = UINT_MAX; \
|
||||
}
|
||||
@ -993,11 +997,15 @@ static __always_inline int Page##uname(const struct page *page) \
|
||||
} \
|
||||
static __always_inline void __SetPage##uname(struct page *page) \
|
||||
{ \
|
||||
if (Page##uname(page)) \
|
||||
return; \
|
||||
VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \
|
||||
page->page_type = (unsigned int)PGTY_##lname << 24; \
|
||||
} \
|
||||
static __always_inline void __ClearPage##uname(struct page *page) \
|
||||
{ \
|
||||
if (page->page_type == UINT_MAX) \
|
||||
return; \
|
||||
VM_BUG_ON_PAGE(!Page##uname(page), page); \
|
||||
page->page_type = UINT_MAX; \
|
||||
}
|
||||
|
@ -335,6 +335,7 @@ struct swap_info_struct {
|
||||
* list.
|
||||
*/
|
||||
struct work_struct discard_work; /* discard worker */
|
||||
struct work_struct reclaim_work; /* reclaim worker */
|
||||
struct list_head discard_clusters; /* discard clusters list */
|
||||
struct plist_node avail_lists[]; /*
|
||||
* entries in swap_avail_heads, one
|
||||
|
33
mm/gup.c
33
mm/gup.c
@ -2394,20 +2394,25 @@ err:
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether all folios are *allowed* to be pinned indefinitely (longterm).
|
||||
* Check whether all folios are *allowed* to be pinned indefinitely (long term).
|
||||
* Rather confusingly, all folios in the range are required to be pinned via
|
||||
* FOLL_PIN, before calling this routine.
|
||||
*
|
||||
* If any folios in the range are not allowed to be pinned, then this routine
|
||||
* will migrate those folios away, unpin all the folios in the range and return
|
||||
* -EAGAIN. The caller should re-pin the entire range with FOLL_PIN and then
|
||||
* call this routine again.
|
||||
* Return values:
|
||||
*
|
||||
* If an error other than -EAGAIN occurs, this indicates a migration failure.
|
||||
* The caller should give up, and propagate the error back up the call stack.
|
||||
*
|
||||
* If everything is OK and all folios in the range are allowed to be pinned,
|
||||
* 0: if everything is OK and all folios in the range are allowed to be pinned,
|
||||
* then this routine leaves all folios pinned and returns zero for success.
|
||||
*
|
||||
* -EAGAIN: if any folios in the range are not allowed to be pinned, then this
|
||||
* routine will migrate those folios away, unpin all the folios in the range. If
|
||||
* migration of the entire set of folios succeeds, then -EAGAIN is returned. The
|
||||
* caller should re-pin the entire range with FOLL_PIN and then call this
|
||||
* routine again.
|
||||
*
|
||||
* -ENOMEM, or any other -errno: if an error *other* than -EAGAIN occurs, this
|
||||
* indicates a migration failure. The caller should give up, and propagate the
|
||||
* error back up the call stack. The caller does not need to unpin any folios in
|
||||
* that case, because this routine will do the unpinning.
|
||||
*/
|
||||
static long check_and_migrate_movable_folios(unsigned long nr_folios,
|
||||
struct folio **folios)
|
||||
@ -2425,10 +2430,8 @@ static long check_and_migrate_movable_folios(unsigned long nr_folios,
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine just converts all the pages in the @pages array to folios and
|
||||
* calls check_and_migrate_movable_folios() to do the heavy lifting.
|
||||
*
|
||||
* Please see the check_and_migrate_movable_folios() documentation for details.
|
||||
* Return values and behavior are the same as those for
|
||||
* check_and_migrate_movable_folios().
|
||||
*/
|
||||
static long check_and_migrate_movable_pages(unsigned long nr_pages,
|
||||
struct page **pages)
|
||||
@ -2437,8 +2440,10 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
|
||||
long i, ret;
|
||||
|
||||
folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL);
|
||||
if (!folios)
|
||||
if (!folios) {
|
||||
unpin_user_pages(pages, nr_pages);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
folios[i] = page_folio(pages[i]);
|
||||
|
@ -1810,32 +1810,6 @@ static void vm_map_ram_tags(struct kunit *test)
|
||||
free_pages((unsigned long)p_ptr, 1);
|
||||
}
|
||||
|
||||
static void vmalloc_percpu(struct kunit *test)
|
||||
{
|
||||
char __percpu *ptr;
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* This test is specifically crafted for the software tag-based mode,
|
||||
* the only tag-based mode that poisons percpu mappings.
|
||||
*/
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
|
||||
|
||||
ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
char *c_ptr = per_cpu_ptr(ptr, cpu);
|
||||
|
||||
KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN);
|
||||
KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL);
|
||||
|
||||
/* Make sure that in-bounds accesses don't crash the kernel. */
|
||||
*c_ptr = 0;
|
||||
}
|
||||
|
||||
free_percpu(ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the assigned pointer tag falls within the [KASAN_TAG_MIN,
|
||||
* KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based
|
||||
@ -2023,7 +1997,6 @@ static struct kunit_case kasan_kunit_test_cases[] = {
|
||||
KUNIT_CASE(vmalloc_oob),
|
||||
KUNIT_CASE(vmap_tags),
|
||||
KUNIT_CASE(vm_map_ram_tags),
|
||||
KUNIT_CASE(vmalloc_percpu),
|
||||
KUNIT_CASE(match_all_not_assigned),
|
||||
KUNIT_CASE(match_all_ptr_tag),
|
||||
KUNIT_CASE(match_all_mem_tag),
|
||||
|
@ -206,7 +206,8 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
|
||||
pte_t newpte;
|
||||
void *addr;
|
||||
|
||||
VM_BUG_ON_PAGE(PageCompound(page), page);
|
||||
if (PageCompound(page))
|
||||
return false;
|
||||
VM_BUG_ON_PAGE(!PageAnon(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
|
||||
@ -1177,7 +1178,7 @@ static void migrate_folio_done(struct folio *src,
|
||||
* not accounted to NR_ISOLATED_*. They can be recognized
|
||||
* as __folio_test_movable
|
||||
*/
|
||||
if (likely(!__folio_test_movable(src)))
|
||||
if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION)
|
||||
mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
|
||||
folio_is_file_lru(src), -folio_nr_pages(src));
|
||||
|
||||
|
@ -900,7 +900,8 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
|
||||
|
||||
if (get_area) {
|
||||
addr = get_area(file, addr, len, pgoff, flags);
|
||||
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
|
||||
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||
&& IS_ALIGNED(len, PMD_SIZE)) {
|
||||
/* Ensures that larger anonymous mappings are THP aligned. */
|
||||
addr = thp_get_unmapped_area_vmflags(file, addr, len,
|
||||
pgoff, flags, vm_flags);
|
||||
|
@ -570,7 +570,7 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
|
||||
* attempt to access it in the page fault retry time check.
|
||||
*/
|
||||
get_task_struct(current);
|
||||
count_vm_event(PSWPIN);
|
||||
count_vm_events(PSWPIN, folio_nr_pages(folio));
|
||||
submit_bio_wait(&bio);
|
||||
__end_swap_bio_read(&bio);
|
||||
put_task_struct(current);
|
||||
@ -585,7 +585,7 @@ static void swap_read_folio_bdev_async(struct folio *folio,
|
||||
bio->bi_iter.bi_sector = swap_folio_sector(folio);
|
||||
bio->bi_end_io = end_swap_bio_read;
|
||||
bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
|
||||
count_vm_event(PSWPIN);
|
||||
count_vm_events(PSWPIN, folio_nr_pages(folio));
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
|
@ -885,13 +885,10 @@ static bool folio_referenced_one(struct folio *folio,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pvmw.pte) {
|
||||
if (lru_gen_enabled() &&
|
||||
pte_young(ptep_get(pvmw.pte))) {
|
||||
lru_gen_look_around(&pvmw);
|
||||
if (lru_gen_enabled() && pvmw.pte) {
|
||||
if (lru_gen_look_around(&pvmw))
|
||||
referenced++;
|
||||
}
|
||||
|
||||
} else if (pvmw.pte) {
|
||||
if (ptep_clear_flush_young_notify(vma, address,
|
||||
pvmw.pte))
|
||||
referenced++;
|
||||
|
@ -76,19 +76,21 @@ void free_shrinker_info(struct mem_cgroup *memcg)
|
||||
|
||||
int alloc_shrinker_info(struct mem_cgroup *memcg)
|
||||
{
|
||||
struct shrinker_info *info;
|
||||
int nid, ret = 0;
|
||||
int array_size = 0;
|
||||
|
||||
mutex_lock(&shrinker_mutex);
|
||||
array_size = shrinker_unit_size(shrinker_nr_max);
|
||||
for_each_node(nid) {
|
||||
info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid);
|
||||
struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size,
|
||||
GFP_KERNEL, nid);
|
||||
if (!info)
|
||||
goto err;
|
||||
info->map_nr_max = shrinker_nr_max;
|
||||
if (shrinker_unit_alloc(info, NULL, nid))
|
||||
if (shrinker_unit_alloc(info, NULL, nid)) {
|
||||
kvfree(info);
|
||||
goto err;
|
||||
}
|
||||
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
|
||||
}
|
||||
mutex_unlock(&shrinker_mutex);
|
||||
|
@ -731,15 +731,16 @@ done:
|
||||
return offset;
|
||||
}
|
||||
|
||||
static void swap_reclaim_full_clusters(struct swap_info_struct *si)
|
||||
/* Return true if reclaimed a whole cluster */
|
||||
static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
|
||||
{
|
||||
long to_scan = 1;
|
||||
unsigned long offset, end;
|
||||
struct swap_cluster_info *ci;
|
||||
unsigned char *map = si->swap_map;
|
||||
int nr_reclaim, total_reclaimed = 0;
|
||||
int nr_reclaim;
|
||||
|
||||
if (atomic_long_read(&nr_swap_pages) <= SWAPFILE_CLUSTER)
|
||||
if (force)
|
||||
to_scan = si->inuse_pages / SWAPFILE_CLUSTER;
|
||||
|
||||
while (!list_empty(&si->full_clusters)) {
|
||||
@ -749,28 +750,36 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si)
|
||||
end = min(si->max, offset + SWAPFILE_CLUSTER);
|
||||
to_scan--;
|
||||
|
||||
spin_unlock(&si->lock);
|
||||
while (offset < end) {
|
||||
if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) {
|
||||
spin_unlock(&si->lock);
|
||||
nr_reclaim = __try_to_reclaim_swap(si, offset,
|
||||
TTRS_ANYWAY | TTRS_DIRECT);
|
||||
spin_lock(&si->lock);
|
||||
if (nr_reclaim > 0) {
|
||||
offset += nr_reclaim;
|
||||
total_reclaimed += nr_reclaim;
|
||||
continue;
|
||||
} else if (nr_reclaim < 0) {
|
||||
offset += -nr_reclaim;
|
||||
if (nr_reclaim) {
|
||||
offset += abs(nr_reclaim);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
if (to_scan <= 0 || total_reclaimed)
|
||||
spin_lock(&si->lock);
|
||||
|
||||
if (to_scan <= 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void swap_reclaim_work(struct work_struct *work)
|
||||
{
|
||||
struct swap_info_struct *si;
|
||||
|
||||
si = container_of(work, struct swap_info_struct, reclaim_work);
|
||||
|
||||
spin_lock(&si->lock);
|
||||
swap_reclaim_full_clusters(si, true);
|
||||
spin_unlock(&si->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to get swap entries with specified order from current cpu's swap entry
|
||||
* pool (a cluster). This might involve allocating a new cluster for current CPU
|
||||
@ -800,6 +809,10 @@ new_cluster:
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Try reclaim from full clusters if free clusters list is drained */
|
||||
if (vm_swap_full())
|
||||
swap_reclaim_full_clusters(si, false);
|
||||
|
||||
if (order < PMD_ORDER) {
|
||||
unsigned int frags = 0;
|
||||
|
||||
@ -881,13 +894,6 @@ new_cluster:
|
||||
}
|
||||
|
||||
done:
|
||||
/* Try reclaim from full clusters if device is nearfull */
|
||||
if (vm_swap_full() && (!found || (si->pages - si->inuse_pages) < SWAPFILE_CLUSTER)) {
|
||||
swap_reclaim_full_clusters(si);
|
||||
if (!found && !order && si->pages != si->inuse_pages)
|
||||
goto new_cluster;
|
||||
}
|
||||
|
||||
cluster->next[order] = offset;
|
||||
return found;
|
||||
}
|
||||
@ -922,6 +928,9 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
|
||||
si->lowest_bit = si->max;
|
||||
si->highest_bit = 0;
|
||||
del_from_avail_list(si);
|
||||
|
||||
if (vm_swap_full())
|
||||
schedule_work(&si->reclaim_work);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2816,6 +2825,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
||||
wait_for_completion(&p->comp);
|
||||
|
||||
flush_work(&p->discard_work);
|
||||
flush_work(&p->reclaim_work);
|
||||
|
||||
destroy_swap_extents(p);
|
||||
if (p->flags & SWP_CONTINUED)
|
||||
@ -3376,6 +3386,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
||||
return PTR_ERR(si);
|
||||
|
||||
INIT_WORK(&si->discard_work, swap_discard_work);
|
||||
INIT_WORK(&si->reclaim_work, swap_reclaim_work);
|
||||
|
||||
name = getname(specialfile);
|
||||
if (IS_ERR(name)) {
|
||||
|
102
mm/vmscan.c
102
mm/vmscan.c
@ -56,6 +56,7 @@
|
||||
#include <linux/khugepaged.h>
|
||||
#include <linux/rculist_nulls.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/div64.h>
|
||||
@ -3294,7 +3295,8 @@ static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk
|
||||
return false;
|
||||
}
|
||||
|
||||
static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
|
||||
static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr,
|
||||
struct pglist_data *pgdat)
|
||||
{
|
||||
unsigned long pfn = pte_pfn(pte);
|
||||
|
||||
@ -3306,13 +3308,20 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
|
||||
if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
|
||||
return -1;
|
||||
|
||||
if (!pte_young(pte) && !mm_has_notifiers(vma->vm_mm))
|
||||
return -1;
|
||||
|
||||
if (WARN_ON_ONCE(!pfn_valid(pfn)))
|
||||
return -1;
|
||||
|
||||
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
|
||||
return -1;
|
||||
|
||||
return pfn;
|
||||
}
|
||||
|
||||
static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
|
||||
static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr,
|
||||
struct pglist_data *pgdat)
|
||||
{
|
||||
unsigned long pfn = pmd_pfn(pmd);
|
||||
|
||||
@ -3324,9 +3333,15 @@ static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned
|
||||
if (WARN_ON_ONCE(pmd_devmap(pmd)))
|
||||
return -1;
|
||||
|
||||
if (!pmd_young(pmd) && !mm_has_notifiers(vma->vm_mm))
|
||||
return -1;
|
||||
|
||||
if (WARN_ON_ONCE(!pfn_valid(pfn)))
|
||||
return -1;
|
||||
|
||||
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
|
||||
return -1;
|
||||
|
||||
return pfn;
|
||||
}
|
||||
|
||||
@ -3335,10 +3350,6 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
|
||||
{
|
||||
struct folio *folio;
|
||||
|
||||
/* try to avoid unnecessary memory loads */
|
||||
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
|
||||
return NULL;
|
||||
|
||||
folio = pfn_folio(pfn);
|
||||
if (folio_nid(folio) != pgdat->node_id)
|
||||
return NULL;
|
||||
@ -3394,21 +3405,16 @@ restart:
|
||||
total++;
|
||||
walk->mm_stats[MM_LEAF_TOTAL]++;
|
||||
|
||||
pfn = get_pte_pfn(ptent, args->vma, addr);
|
||||
pfn = get_pte_pfn(ptent, args->vma, addr, pgdat);
|
||||
if (pfn == -1)
|
||||
continue;
|
||||
|
||||
if (!pte_young(ptent)) {
|
||||
walk->mm_stats[MM_LEAF_OLD]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
|
||||
if (!folio)
|
||||
continue;
|
||||
|
||||
if (!ptep_test_and_clear_young(args->vma, addr, pte + i))
|
||||
VM_WARN_ON_ONCE(true);
|
||||
if (!ptep_clear_young_notify(args->vma, addr, pte + i))
|
||||
continue;
|
||||
|
||||
young++;
|
||||
walk->mm_stats[MM_LEAF_YOUNG]++;
|
||||
@ -3474,21 +3480,25 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
|
||||
/* don't round down the first address */
|
||||
addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
|
||||
|
||||
pfn = get_pmd_pfn(pmd[i], vma, addr);
|
||||
if (pfn == -1)
|
||||
if (!pmd_present(pmd[i]))
|
||||
goto next;
|
||||
|
||||
if (!pmd_trans_huge(pmd[i])) {
|
||||
if (!walk->force_scan && should_clear_pmd_young())
|
||||
if (!walk->force_scan && should_clear_pmd_young() &&
|
||||
!mm_has_notifiers(args->mm))
|
||||
pmdp_test_and_clear_young(vma, addr, pmd + i);
|
||||
goto next;
|
||||
}
|
||||
|
||||
pfn = get_pmd_pfn(pmd[i], vma, addr, pgdat);
|
||||
if (pfn == -1)
|
||||
goto next;
|
||||
|
||||
folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
|
||||
if (!folio)
|
||||
goto next;
|
||||
|
||||
if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
|
||||
if (!pmdp_clear_young_notify(vma, addr, pmd + i))
|
||||
goto next;
|
||||
|
||||
walk->mm_stats[MM_LEAF_YOUNG]++;
|
||||
@ -3546,27 +3556,18 @@ restart:
|
||||
}
|
||||
|
||||
if (pmd_trans_huge(val)) {
|
||||
unsigned long pfn = pmd_pfn(val);
|
||||
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
|
||||
unsigned long pfn = get_pmd_pfn(val, vma, addr, pgdat);
|
||||
|
||||
walk->mm_stats[MM_LEAF_TOTAL]++;
|
||||
|
||||
if (!pmd_young(val)) {
|
||||
walk->mm_stats[MM_LEAF_OLD]++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* try to avoid unnecessary memory loads */
|
||||
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
|
||||
continue;
|
||||
|
||||
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
|
||||
if (pfn != -1)
|
||||
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
|
||||
continue;
|
||||
}
|
||||
|
||||
walk->mm_stats[MM_NONLEAF_TOTAL]++;
|
||||
|
||||
if (!walk->force_scan && should_clear_pmd_young()) {
|
||||
if (!walk->force_scan && should_clear_pmd_young() &&
|
||||
!mm_has_notifiers(args->mm)) {
|
||||
if (!pmd_young(val))
|
||||
continue;
|
||||
|
||||
@ -4040,13 +4041,13 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
* the PTE table to the Bloom filter. This forms a feedback loop between the
|
||||
* eviction and the aging.
|
||||
*/
|
||||
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
{
|
||||
int i;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
struct lru_gen_mm_walk *walk;
|
||||
int young = 0;
|
||||
int young = 1;
|
||||
pte_t *pte = pvmw->pte;
|
||||
unsigned long addr = pvmw->address;
|
||||
struct vm_area_struct *vma = pvmw->vma;
|
||||
@ -4062,12 +4063,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
lockdep_assert_held(pvmw->ptl);
|
||||
VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
|
||||
|
||||
if (!ptep_clear_young_notify(vma, addr, pte))
|
||||
return false;
|
||||
|
||||
if (spin_is_contended(pvmw->ptl))
|
||||
return;
|
||||
return true;
|
||||
|
||||
/* exclude special VMAs containing anon pages from COW */
|
||||
if (vma->vm_flags & VM_SPECIAL)
|
||||
return;
|
||||
return true;
|
||||
|
||||
/* avoid taking the LRU lock under the PTL when possible */
|
||||
walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
|
||||
@ -4075,6 +4079,9 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
start = max(addr & PMD_MASK, vma->vm_start);
|
||||
end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1;
|
||||
|
||||
if (end - start == PAGE_SIZE)
|
||||
return true;
|
||||
|
||||
if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
|
||||
if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
end = start + MIN_LRU_BATCH * PAGE_SIZE;
|
||||
@ -4088,7 +4095,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
|
||||
/* folio_update_gen() requires stable folio_memcg() */
|
||||
if (!mem_cgroup_trylock_pages(memcg))
|
||||
return;
|
||||
return true;
|
||||
|
||||
arch_enter_lazy_mmu_mode();
|
||||
|
||||
@ -4098,19 +4105,16 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
unsigned long pfn;
|
||||
pte_t ptent = ptep_get(pte + i);
|
||||
|
||||
pfn = get_pte_pfn(ptent, vma, addr);
|
||||
pfn = get_pte_pfn(ptent, vma, addr, pgdat);
|
||||
if (pfn == -1)
|
||||
continue;
|
||||
|
||||
if (!pte_young(ptent))
|
||||
continue;
|
||||
|
||||
folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
|
||||
if (!folio)
|
||||
continue;
|
||||
|
||||
if (!ptep_test_and_clear_young(vma, addr, pte + i))
|
||||
VM_WARN_ON_ONCE(true);
|
||||
if (!ptep_clear_young_notify(vma, addr, pte + i))
|
||||
continue;
|
||||
|
||||
young++;
|
||||
|
||||
@ -4140,6 +4144,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
/* feedback from rmap walkers to page table walkers */
|
||||
if (mm_state && suitable_to_scan(i, young))
|
||||
update_bloom_filter(mm_state, max_seq, pvmw->pmd);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
@ -5254,11 +5260,11 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
|
||||
for (tier = 0; tier < MAX_NR_TIERS; tier++) {
|
||||
seq_printf(m, " %10d", tier);
|
||||
for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
const char *s = " ";
|
||||
const char *s = "xxx";
|
||||
unsigned long n[3] = {};
|
||||
|
||||
if (seq == max_seq) {
|
||||
s = "RT ";
|
||||
s = "RTx";
|
||||
n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
|
||||
n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
|
||||
} else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
|
||||
@ -5280,14 +5286,14 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
|
||||
|
||||
seq_puts(m, " ");
|
||||
for (i = 0; i < NR_MM_STATS; i++) {
|
||||
const char *s = " ";
|
||||
const char *s = "xxxx";
|
||||
unsigned long n = 0;
|
||||
|
||||
if (seq == max_seq && NR_HIST_GENS == 1) {
|
||||
s = "LOYNFA";
|
||||
s = "TYFA";
|
||||
n = READ_ONCE(mm_state->stats[hist][i]);
|
||||
} else if (seq != max_seq && NR_HIST_GENS > 1) {
|
||||
s = "loynfa";
|
||||
s = "tyfa";
|
||||
n = READ_ONCE(mm_state->stats[hist][i]);
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <time.h>
|
||||
#include <setjmp.h>
|
||||
#include <signal.h>
|
||||
#include <inttypes.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/fcntl.h>
|
||||
@ -391,9 +392,9 @@ static void show_page_range(unsigned long voffset, unsigned long offset,
|
||||
if (opt_file)
|
||||
printf("%lx\t", voff);
|
||||
if (opt_list_cgroup)
|
||||
printf("@%llu\t", (unsigned long long)cgroup0);
|
||||
printf("@%" PRIu64 "\t", cgroup0);
|
||||
if (opt_list_mapcnt)
|
||||
printf("%lu\t", mapcnt0);
|
||||
printf("%" PRIu64 "\t", mapcnt0);
|
||||
printf("%lx\t%lx\t%s\n",
|
||||
index, count, page_flag_name(flags0));
|
||||
}
|
||||
@ -419,9 +420,9 @@ static void show_page(unsigned long voffset, unsigned long offset,
|
||||
if (opt_file)
|
||||
printf("%lx\t", voffset);
|
||||
if (opt_list_cgroup)
|
||||
printf("@%llu\t", (unsigned long long)cgroup);
|
||||
printf("@%" PRIu64 "\t", cgroup)
|
||||
if (opt_list_mapcnt)
|
||||
printf("%lu\t", mapcnt);
|
||||
printf("%" PRIu64 "\t", mapcnt);
|
||||
|
||||
printf("%lx\t%s\n", offset, page_flag_name(flags));
|
||||
}
|
||||
|
@ -1297,7 +1297,9 @@ static void read_slab_dir(void)
|
||||
slab->cpu_partial_free = get_obj("cpu_partial_free");
|
||||
slab->alloc_node_mismatch = get_obj("alloc_node_mismatch");
|
||||
slab->deactivate_bypass = get_obj("deactivate_bypass");
|
||||
chdir("..");
|
||||
if (chdir(".."))
|
||||
fatal("Unable to chdir from slab ../%s\n",
|
||||
slab->name);
|
||||
if (slab->name[0] == ':')
|
||||
alias_targets++;
|
||||
slab++;
|
||||
|
Loading…
Reference in New Issue
Block a user