17 hotfixes. 9 are cc:stable. 13 are MM and 4 are non-MM.

The usual collection of singletons - please see the changelogs.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZyfGDAAKCRDdBJ7gKXxA
 jr19AQD6bfDF/6L2Alq1QG26pgrgccEbKzDSzR6pBajwCbdrNQD/XPhiv3zRJfGf
 lgt0Qkqwe/ApBhVYUnL8y1CePv3EDgA=
 =W5W0
 -----END PGP SIGNATURE-----

Merge tag 'mm-hotfixes-stable-2024-11-03-10-50' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "17 hotfixes.  9 are cc:stable.  13 are MM and 4 are non-MM.

  The usual collection of singletons - please see the changelogs"

* tag 'mm-hotfixes-stable-2024-11-03-10-50' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm: multi-gen LRU: use {ptep,pmdp}_clear_young_notify()
  mm: multi-gen LRU: remove MM_LEAF_OLD and MM_NONLEAF_TOTAL stats
  mm, mmap: limit THP alignment of anonymous mappings to PMD-aligned sizes
  mm: shrinker: avoid memleak in alloc_shrinker_info
  .mailmap: update e-mail address for Eugen Hristev
  vmscan,migrate: fix page count imbalance on node stats when demoting pages
  mailmap: update Jarkko's email addresses
  mm: allow set/clear page_type again
  nilfs2: fix potential deadlock with newly created symlinks
  Squashfs: fix variable overflow in squashfs_readpage_block
  kasan: remove vmalloc_percpu test
  tools/mm: -Werror fixes in page-types/slabinfo
  mm, swap: avoid over reclaim of full clusters
  mm: fix PSWPIN counter for large folios swap-in
  mm: avoid VM_BUG_ON when try to map an anon large folio to zero page.
  mm/codetag: fix null pointer check logic for ref and tag
  mm/gup: stop leaking pinned pages in low memory conditions
This commit is contained in:
Linus Torvalds 2024-11-03 10:25:05 -10:00
commit a8cc743272
18 changed files with 159 additions and 143 deletions

View File

@ -199,7 +199,8 @@ Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com>
Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com>
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
Faith Ekstrand <faith.ekstrand@collabora.com> <jason@jlekstrand.net>
@ -282,7 +283,7 @@ Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
Jan Kuliga <jtkuliga.kdev@gmail.com> <jankul@alatek.krakow.pl>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@tuni.fi>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@parity.io>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>

View File

@ -157,6 +157,9 @@ static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
inode_nohighmem(inode);
mapping_set_gfp_mask(inode->i_mapping,
mapping_gfp_constraint(inode->i_mapping,
~__GFP_FS));
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)

View File

@ -30,7 +30,8 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
loff_t start_index = folio->index & ~mask;
loff_t end_index = start_index | mask;
int i, n, pages, bytes, res = -ENOMEM;
loff_t index;
int i, pages, bytes, res = -ENOMEM;
struct page **page, *last_page;
struct squashfs_page_actor *actor;
void *pageaddr;
@ -45,9 +46,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
return res;
/* Try to grab all the pages covered by the Squashfs block */
for (i = 0, n = start_index; n <= end_index; n++) {
page[i] = (n == folio->index) ? target_page :
grab_cache_page_nowait(target_page->mapping, n);
for (i = 0, index = start_index; index <= end_index; index++) {
page[i] = (index == folio->index) ? target_page :
grab_cache_page_nowait(target_page->mapping, index);
if (page[i] == NULL)
continue;

View File

@ -135,18 +135,21 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
#endif
/* Caller should verify both ref and tag to be valid */
static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
static inline bool __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
{
alloc_tag_add_check(ref, tag);
if (!ref || !tag)
return;
return false;
ref->ct = &tag->ct;
return true;
}
static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
{
__alloc_tag_ref_set(ref, tag);
if (unlikely(!__alloc_tag_ref_set(ref, tag)))
return false;
/*
* We need in increment the call counter every time we have a new
* allocation or when we split a large allocation into smaller ones.
@ -154,12 +157,13 @@ static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *t
* counter because when we free each part the counter will be decremented.
*/
this_cpu_inc(tag->counters->calls);
return true;
}
static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
{
alloc_tag_ref_set(ref, tag);
this_cpu_add(tag->counters->bytes, bytes);
if (likely(alloc_tag_ref_set(ref, tag)))
this_cpu_add(tag->counters->bytes, bytes);
}
static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)

View File

@ -458,9 +458,7 @@ struct lru_gen_folio {
enum {
MM_LEAF_TOTAL, /* total leaf entries */
MM_LEAF_OLD, /* old leaf entries */
MM_LEAF_YOUNG, /* young leaf entries */
MM_NONLEAF_TOTAL, /* total non-leaf entries */
MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */
MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */
NR_MM_STATS
@ -557,7 +555,7 @@ struct lru_gen_memcg {
void lru_gen_init_pgdat(struct pglist_data *pgdat);
void lru_gen_init_lruvec(struct lruvec *lruvec);
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
void lru_gen_init_memcg(struct mem_cgroup *memcg);
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
@ -576,8 +574,9 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
{
}
static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
return false;
}
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)

View File

@ -975,12 +975,16 @@ static __always_inline bool folio_test_##fname(const struct folio *folio) \
} \
static __always_inline void __folio_set_##fname(struct folio *folio) \
{ \
if (folio_test_##fname(folio)) \
return; \
VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \
folio); \
folio->page.page_type = (unsigned int)PGTY_##lname << 24; \
} \
static __always_inline void __folio_clear_##fname(struct folio *folio) \
{ \
if (folio->page.page_type == UINT_MAX) \
return; \
VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
folio->page.page_type = UINT_MAX; \
}
@ -993,11 +997,15 @@ static __always_inline int Page##uname(const struct page *page) \
} \
static __always_inline void __SetPage##uname(struct page *page) \
{ \
if (Page##uname(page)) \
return; \
VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \
page->page_type = (unsigned int)PGTY_##lname << 24; \
} \
static __always_inline void __ClearPage##uname(struct page *page) \
{ \
if (page->page_type == UINT_MAX) \
return; \
VM_BUG_ON_PAGE(!Page##uname(page), page); \
page->page_type = UINT_MAX; \
}

View File

@ -335,6 +335,7 @@ struct swap_info_struct {
* list.
*/
struct work_struct discard_work; /* discard worker */
struct work_struct reclaim_work; /* reclaim worker */
struct list_head discard_clusters; /* discard clusters list */
struct plist_node avail_lists[]; /*
* entries in swap_avail_heads, one

View File

@ -2394,20 +2394,25 @@ err:
}
/*
* Check whether all folios are *allowed* to be pinned indefinitely (longterm).
* Check whether all folios are *allowed* to be pinned indefinitely (long term).
* Rather confusingly, all folios in the range are required to be pinned via
* FOLL_PIN, before calling this routine.
*
* If any folios in the range are not allowed to be pinned, then this routine
* will migrate those folios away, unpin all the folios in the range and return
* -EAGAIN. The caller should re-pin the entire range with FOLL_PIN and then
* call this routine again.
* Return values:
*
* If an error other than -EAGAIN occurs, this indicates a migration failure.
* The caller should give up, and propagate the error back up the call stack.
*
* If everything is OK and all folios in the range are allowed to be pinned,
* 0: if everything is OK and all folios in the range are allowed to be pinned,
* then this routine leaves all folios pinned and returns zero for success.
*
* -EAGAIN: if any folios in the range are not allowed to be pinned, then this
* routine will migrate those folios away, unpin all the folios in the range. If
* migration of the entire set of folios succeeds, then -EAGAIN is returned. The
* caller should re-pin the entire range with FOLL_PIN and then call this
* routine again.
*
* -ENOMEM, or any other -errno: if an error *other* than -EAGAIN occurs, this
* indicates a migration failure. The caller should give up, and propagate the
* error back up the call stack. The caller does not need to unpin any folios in
* that case, because this routine will do the unpinning.
*/
static long check_and_migrate_movable_folios(unsigned long nr_folios,
struct folio **folios)
@ -2425,10 +2430,8 @@ static long check_and_migrate_movable_folios(unsigned long nr_folios,
}
/*
* This routine just converts all the pages in the @pages array to folios and
* calls check_and_migrate_movable_folios() to do the heavy lifting.
*
* Please see the check_and_migrate_movable_folios() documentation for details.
* Return values and behavior are the same as those for
* check_and_migrate_movable_folios().
*/
static long check_and_migrate_movable_pages(unsigned long nr_pages,
struct page **pages)
@ -2437,8 +2440,10 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
long i, ret;
folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL);
if (!folios)
if (!folios) {
unpin_user_pages(pages, nr_pages);
return -ENOMEM;
}
for (i = 0; i < nr_pages; i++)
folios[i] = page_folio(pages[i]);

View File

@ -1810,32 +1810,6 @@ static void vm_map_ram_tags(struct kunit *test)
free_pages((unsigned long)p_ptr, 1);
}
static void vmalloc_percpu(struct kunit *test)
{
char __percpu *ptr;
int cpu;
/*
* This test is specifically crafted for the software tag-based mode,
* the only tag-based mode that poisons percpu mappings.
*/
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
for_each_possible_cpu(cpu) {
char *c_ptr = per_cpu_ptr(ptr, cpu);
KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN);
KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL);
/* Make sure that in-bounds accesses don't crash the kernel. */
*c_ptr = 0;
}
free_percpu(ptr);
}
/*
* Check that the assigned pointer tag falls within the [KASAN_TAG_MIN,
* KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based
@ -2023,7 +1997,6 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(vmalloc_oob),
KUNIT_CASE(vmap_tags),
KUNIT_CASE(vm_map_ram_tags),
KUNIT_CASE(vmalloc_percpu),
KUNIT_CASE(match_all_not_assigned),
KUNIT_CASE(match_all_ptr_tag),
KUNIT_CASE(match_all_mem_tag),

View File

@ -206,7 +206,8 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
pte_t newpte;
void *addr;
VM_BUG_ON_PAGE(PageCompound(page), page);
if (PageCompound(page))
return false;
VM_BUG_ON_PAGE(!PageAnon(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
@ -1177,7 +1178,7 @@ static void migrate_folio_done(struct folio *src,
* not accounted to NR_ISOLATED_*. They can be recognized
* as __folio_test_movable
*/
if (likely(!__folio_test_movable(src)))
if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION)
mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
folio_is_file_lru(src), -folio_nr_pages(src));

View File

@ -900,7 +900,8 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
if (get_area) {
addr = get_area(file, addr, len, pgoff, flags);
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
&& IS_ALIGNED(len, PMD_SIZE)) {
/* Ensures that larger anonymous mappings are THP aligned. */
addr = thp_get_unmapped_area_vmflags(file, addr, len,
pgoff, flags, vm_flags);

View File

@ -570,7 +570,7 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
* attempt to access it in the page fault retry time check.
*/
get_task_struct(current);
count_vm_event(PSWPIN);
count_vm_events(PSWPIN, folio_nr_pages(folio));
submit_bio_wait(&bio);
__end_swap_bio_read(&bio);
put_task_struct(current);
@ -585,7 +585,7 @@ static void swap_read_folio_bdev_async(struct folio *folio,
bio->bi_iter.bi_sector = swap_folio_sector(folio);
bio->bi_end_io = end_swap_bio_read;
bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
count_vm_event(PSWPIN);
count_vm_events(PSWPIN, folio_nr_pages(folio));
submit_bio(bio);
}

View File

@ -885,13 +885,10 @@ static bool folio_referenced_one(struct folio *folio,
return false;
}
if (pvmw.pte) {
if (lru_gen_enabled() &&
pte_young(ptep_get(pvmw.pte))) {
lru_gen_look_around(&pvmw);
if (lru_gen_enabled() && pvmw.pte) {
if (lru_gen_look_around(&pvmw))
referenced++;
}
} else if (pvmw.pte) {
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte))
referenced++;

View File

@ -76,19 +76,21 @@ void free_shrinker_info(struct mem_cgroup *memcg)
int alloc_shrinker_info(struct mem_cgroup *memcg)
{
struct shrinker_info *info;
int nid, ret = 0;
int array_size = 0;
mutex_lock(&shrinker_mutex);
array_size = shrinker_unit_size(shrinker_nr_max);
for_each_node(nid) {
info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid);
struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size,
GFP_KERNEL, nid);
if (!info)
goto err;
info->map_nr_max = shrinker_nr_max;
if (shrinker_unit_alloc(info, NULL, nid))
if (shrinker_unit_alloc(info, NULL, nid)) {
kvfree(info);
goto err;
}
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
}
mutex_unlock(&shrinker_mutex);

View File

@ -731,15 +731,16 @@ done:
return offset;
}
static void swap_reclaim_full_clusters(struct swap_info_struct *si)
/* Return true if reclaimed a whole cluster */
static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force)
{
long to_scan = 1;
unsigned long offset, end;
struct swap_cluster_info *ci;
unsigned char *map = si->swap_map;
int nr_reclaim, total_reclaimed = 0;
int nr_reclaim;
if (atomic_long_read(&nr_swap_pages) <= SWAPFILE_CLUSTER)
if (force)
to_scan = si->inuse_pages / SWAPFILE_CLUSTER;
while (!list_empty(&si->full_clusters)) {
@ -749,28 +750,36 @@ static void swap_reclaim_full_clusters(struct swap_info_struct *si)
end = min(si->max, offset + SWAPFILE_CLUSTER);
to_scan--;
spin_unlock(&si->lock);
while (offset < end) {
if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) {
spin_unlock(&si->lock);
nr_reclaim = __try_to_reclaim_swap(si, offset,
TTRS_ANYWAY | TTRS_DIRECT);
spin_lock(&si->lock);
if (nr_reclaim > 0) {
offset += nr_reclaim;
total_reclaimed += nr_reclaim;
continue;
} else if (nr_reclaim < 0) {
offset += -nr_reclaim;
if (nr_reclaim) {
offset += abs(nr_reclaim);
continue;
}
}
offset++;
}
if (to_scan <= 0 || total_reclaimed)
spin_lock(&si->lock);
if (to_scan <= 0)
break;
}
}
static void swap_reclaim_work(struct work_struct *work)
{
struct swap_info_struct *si;
si = container_of(work, struct swap_info_struct, reclaim_work);
spin_lock(&si->lock);
swap_reclaim_full_clusters(si, true);
spin_unlock(&si->lock);
}
/*
* Try to get swap entries with specified order from current cpu's swap entry
* pool (a cluster). This might involve allocating a new cluster for current CPU
@ -800,6 +809,10 @@ new_cluster:
goto done;
}
/* Try reclaim from full clusters if free clusters list is drained */
if (vm_swap_full())
swap_reclaim_full_clusters(si, false);
if (order < PMD_ORDER) {
unsigned int frags = 0;
@ -881,13 +894,6 @@ new_cluster:
}
done:
/* Try reclaim from full clusters if device is nearfull */
if (vm_swap_full() && (!found || (si->pages - si->inuse_pages) < SWAPFILE_CLUSTER)) {
swap_reclaim_full_clusters(si);
if (!found && !order && si->pages != si->inuse_pages)
goto new_cluster;
}
cluster->next[order] = offset;
return found;
}
@ -922,6 +928,9 @@ static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset,
si->lowest_bit = si->max;
si->highest_bit = 0;
del_from_avail_list(si);
if (vm_swap_full())
schedule_work(&si->reclaim_work);
}
}
@ -2816,6 +2825,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
wait_for_completion(&p->comp);
flush_work(&p->discard_work);
flush_work(&p->reclaim_work);
destroy_swap_extents(p);
if (p->flags & SWP_CONTINUED)
@ -3376,6 +3386,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
return PTR_ERR(si);
INIT_WORK(&si->discard_work, swap_discard_work);
INIT_WORK(&si->reclaim_work, swap_reclaim_work);
name = getname(specialfile);
if (IS_ERR(name)) {

View File

@ -56,6 +56,7 @@
#include <linux/khugepaged.h>
#include <linux/rculist_nulls.h>
#include <linux/random.h>
#include <linux/mmu_notifier.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@ -3294,7 +3295,8 @@ static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk
return false;
}
static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr,
struct pglist_data *pgdat)
{
unsigned long pfn = pte_pfn(pte);
@ -3306,13 +3308,20 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
return -1;
if (!pte_young(pte) && !mm_has_notifiers(vma->vm_mm))
return -1;
if (WARN_ON_ONCE(!pfn_valid(pfn)))
return -1;
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
return -1;
return pfn;
}
static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr,
struct pglist_data *pgdat)
{
unsigned long pfn = pmd_pfn(pmd);
@ -3324,9 +3333,15 @@ static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned
if (WARN_ON_ONCE(pmd_devmap(pmd)))
return -1;
if (!pmd_young(pmd) && !mm_has_notifiers(vma->vm_mm))
return -1;
if (WARN_ON_ONCE(!pfn_valid(pfn)))
return -1;
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
return -1;
return pfn;
}
@ -3335,10 +3350,6 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
{
struct folio *folio;
/* try to avoid unnecessary memory loads */
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
return NULL;
folio = pfn_folio(pfn);
if (folio_nid(folio) != pgdat->node_id)
return NULL;
@ -3394,21 +3405,16 @@ restart:
total++;
walk->mm_stats[MM_LEAF_TOTAL]++;
pfn = get_pte_pfn(ptent, args->vma, addr);
pfn = get_pte_pfn(ptent, args->vma, addr, pgdat);
if (pfn == -1)
continue;
if (!pte_young(ptent)) {
walk->mm_stats[MM_LEAF_OLD]++;
continue;
}
folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
if (!folio)
continue;
if (!ptep_test_and_clear_young(args->vma, addr, pte + i))
VM_WARN_ON_ONCE(true);
if (!ptep_clear_young_notify(args->vma, addr, pte + i))
continue;
young++;
walk->mm_stats[MM_LEAF_YOUNG]++;
@ -3474,21 +3480,25 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
/* don't round down the first address */
addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
pfn = get_pmd_pfn(pmd[i], vma, addr);
if (pfn == -1)
if (!pmd_present(pmd[i]))
goto next;
if (!pmd_trans_huge(pmd[i])) {
if (!walk->force_scan && should_clear_pmd_young())
if (!walk->force_scan && should_clear_pmd_young() &&
!mm_has_notifiers(args->mm))
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
}
pfn = get_pmd_pfn(pmd[i], vma, addr, pgdat);
if (pfn == -1)
goto next;
folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
if (!folio)
goto next;
if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
if (!pmdp_clear_young_notify(vma, addr, pmd + i))
goto next;
walk->mm_stats[MM_LEAF_YOUNG]++;
@ -3546,27 +3556,18 @@ restart:
}
if (pmd_trans_huge(val)) {
unsigned long pfn = pmd_pfn(val);
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
unsigned long pfn = get_pmd_pfn(val, vma, addr, pgdat);
walk->mm_stats[MM_LEAF_TOTAL]++;
if (!pmd_young(val)) {
walk->mm_stats[MM_LEAF_OLD]++;
continue;
}
/* try to avoid unnecessary memory loads */
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
continue;
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
if (pfn != -1)
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
continue;
}
walk->mm_stats[MM_NONLEAF_TOTAL]++;
if (!walk->force_scan && should_clear_pmd_young()) {
if (!walk->force_scan && should_clear_pmd_young() &&
!mm_has_notifiers(args->mm)) {
if (!pmd_young(val))
continue;
@ -4040,13 +4041,13 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
* the PTE table to the Bloom filter. This forms a feedback loop between the
* eviction and the aging.
*/
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
int i;
unsigned long start;
unsigned long end;
struct lru_gen_mm_walk *walk;
int young = 0;
int young = 1;
pte_t *pte = pvmw->pte;
unsigned long addr = pvmw->address;
struct vm_area_struct *vma = pvmw->vma;
@ -4062,12 +4063,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
lockdep_assert_held(pvmw->ptl);
VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
if (!ptep_clear_young_notify(vma, addr, pte))
return false;
if (spin_is_contended(pvmw->ptl))
return;
return true;
/* exclude special VMAs containing anon pages from COW */
if (vma->vm_flags & VM_SPECIAL)
return;
return true;
/* avoid taking the LRU lock under the PTL when possible */
walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
@ -4075,6 +4079,9 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
start = max(addr & PMD_MASK, vma->vm_start);
end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1;
if (end - start == PAGE_SIZE)
return true;
if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
end = start + MIN_LRU_BATCH * PAGE_SIZE;
@ -4088,7 +4095,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
/* folio_update_gen() requires stable folio_memcg() */
if (!mem_cgroup_trylock_pages(memcg))
return;
return true;
arch_enter_lazy_mmu_mode();
@ -4098,19 +4105,16 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
unsigned long pfn;
pte_t ptent = ptep_get(pte + i);
pfn = get_pte_pfn(ptent, vma, addr);
pfn = get_pte_pfn(ptent, vma, addr, pgdat);
if (pfn == -1)
continue;
if (!pte_young(ptent))
continue;
folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
if (!folio)
continue;
if (!ptep_test_and_clear_young(vma, addr, pte + i))
VM_WARN_ON_ONCE(true);
if (!ptep_clear_young_notify(vma, addr, pte + i))
continue;
young++;
@ -4140,6 +4144,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
/* feedback from rmap walkers to page table walkers */
if (mm_state && suitable_to_scan(i, young))
update_bloom_filter(mm_state, max_seq, pvmw->pmd);
return true;
}
/******************************************************************************
@ -5254,11 +5260,11 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
for (tier = 0; tier < MAX_NR_TIERS; tier++) {
seq_printf(m, " %10d", tier);
for (type = 0; type < ANON_AND_FILE; type++) {
const char *s = " ";
const char *s = "xxx";
unsigned long n[3] = {};
if (seq == max_seq) {
s = "RT ";
s = "RTx";
n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
} else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
@ -5280,14 +5286,14 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
seq_puts(m, " ");
for (i = 0; i < NR_MM_STATS; i++) {
const char *s = " ";
const char *s = "xxxx";
unsigned long n = 0;
if (seq == max_seq && NR_HIST_GENS == 1) {
s = "LOYNFA";
s = "TYFA";
n = READ_ONCE(mm_state->stats[hist][i]);
} else if (seq != max_seq && NR_HIST_GENS > 1) {
s = "loynfa";
s = "tyfa";
n = READ_ONCE(mm_state->stats[hist][i]);
}

View File

@ -22,6 +22,7 @@
#include <time.h>
#include <setjmp.h>
#include <signal.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
@ -391,9 +392,9 @@ static void show_page_range(unsigned long voffset, unsigned long offset,
if (opt_file)
printf("%lx\t", voff);
if (opt_list_cgroup)
printf("@%llu\t", (unsigned long long)cgroup0);
printf("@%" PRIu64 "\t", cgroup0);
if (opt_list_mapcnt)
printf("%lu\t", mapcnt0);
printf("%" PRIu64 "\t", mapcnt0);
printf("%lx\t%lx\t%s\n",
index, count, page_flag_name(flags0));
}
@ -419,9 +420,9 @@ static void show_page(unsigned long voffset, unsigned long offset,
if (opt_file)
printf("%lx\t", voffset);
if (opt_list_cgroup)
printf("@%llu\t", (unsigned long long)cgroup);
printf("@%" PRIu64 "\t", cgroup)
if (opt_list_mapcnt)
printf("%lu\t", mapcnt);
printf("%" PRIu64 "\t", mapcnt);
printf("%lx\t%s\n", offset, page_flag_name(flags));
}

View File

@ -1297,7 +1297,9 @@ static void read_slab_dir(void)
slab->cpu_partial_free = get_obj("cpu_partial_free");
slab->alloc_node_mismatch = get_obj("alloc_node_mismatch");
slab->deactivate_bypass = get_obj("deactivate_bypass");
chdir("..");
if (chdir(".."))
fatal("Unable to chdir from slab ../%s\n",
slab->name);
if (slab->name[0] == ':')
alias_targets++;
slab++;