forked from Minki/linux
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "11 patches. Subsystems affected by this patch series: mm (memcg, memory-failure, oom-kill, secretmem, vmalloc, hugetlb, damon, and tools), and ocfs2" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: tools/testing/selftests/vm/split_huge_page_test.c: fix application of sizeof to pointer mm/damon/core-test: fix wrong expectations for 'damon_split_regions_of()' mm: khugepaged: skip huge page collapse for special files mm, thp: bail out early in collapse_file for writeback page mm/vmalloc: fix numa spreading for large hash tables mm/secretmem: avoid letting secretmem_users drop to zero ocfs2: fix race between searching chunks and release journal_head from buffer_head mm/oom_kill.c: prevent a race between process_mrelease and exit_mmap mm: filemap: check if THP has hwpoisoned subpage for PMD page fault mm: hwpoison: remove the unnecessary THP check memcg: page_alloc: skip bulk allocator for __GFP_ACCOUNT
This commit is contained in:
commit
2c04d67ec1
@ -1251,7 +1251,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
|
||||
{
|
||||
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
|
||||
struct journal_head *jh;
|
||||
int ret;
|
||||
int ret = 1;
|
||||
|
||||
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
|
||||
return 0;
|
||||
@ -1259,14 +1259,18 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
|
||||
if (!buffer_jbd(bg_bh))
|
||||
return 1;
|
||||
|
||||
jh = bh2jh(bg_bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
|
||||
if (bg)
|
||||
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
|
||||
else
|
||||
ret = 1;
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
jbd_lock_bh_journal_head(bg_bh);
|
||||
if (buffer_jbd(bg_bh)) {
|
||||
jh = bh2jh(bg_bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
|
||||
if (bg)
|
||||
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
|
||||
else
|
||||
ret = 1;
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
jbd_unlock_bh_journal_head(bg_bh);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -171,6 +171,15 @@ enum pageflags {
|
||||
/* Compound pages. Stored in first tail page's flags */
|
||||
PG_double_map = PG_workingset,
|
||||
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
/*
|
||||
* Compound pages. Stored in first tail page's flags.
|
||||
* Indicates that at least one subpage is hwpoisoned in the
|
||||
* THP.
|
||||
*/
|
||||
PG_has_hwpoisoned = PG_mappedtodisk,
|
||||
#endif
|
||||
|
||||
/* non-lru isolated movable page */
|
||||
PG_isolated = PG_reclaim,
|
||||
|
||||
@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap)
|
||||
TESTSCFLAG_FALSE(DoubleMap)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||
/*
|
||||
* PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
|
||||
* compound page.
|
||||
*
|
||||
* This flag is set by hwpoison handler. Cleared by THP split or free page.
|
||||
*/
|
||||
PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
|
||||
TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
|
||||
#else
|
||||
PAGEFLAG_FALSE(HasHWPoisoned)
|
||||
TESTSCFLAG_FALSE(HasHWPoisoned)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check if a page is currently marked HWPoisoned. Note that this check is
|
||||
* best effort only and inherently racy: there is no way to synchronize with
|
||||
|
@ -219,14 +219,14 @@ static void damon_test_split_regions_of(struct kunit *test)
|
||||
r = damon_new_region(0, 22);
|
||||
damon_add_region(r, t);
|
||||
damon_split_regions_of(c, t, 2);
|
||||
KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 2u);
|
||||
KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u);
|
||||
damon_free_target(t);
|
||||
|
||||
t = damon_new_target(42);
|
||||
r = damon_new_region(0, 220);
|
||||
damon_add_region(r, t);
|
||||
damon_split_regions_of(c, t, 4);
|
||||
KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 4u);
|
||||
KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u);
|
||||
damon_free_target(t);
|
||||
damon_destroy_ctx(c);
|
||||
}
|
||||
|
@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
/* lock lru list/PageCompound, ref frozen by page_ref_freeze */
|
||||
lruvec = lock_page_lruvec(head);
|
||||
|
||||
ClearPageHasHWPoisoned(head);
|
||||
|
||||
for (i = nr - 1; i >= 1; i--) {
|
||||
__split_huge_page_tail(head, i, lruvec, list);
|
||||
/* Some pages can be beyond EOF: drop them from page cache */
|
||||
|
@ -445,22 +445,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma,
|
||||
if (!transhuge_vma_enabled(vma, vm_flags))
|
||||
return false;
|
||||
|
||||
if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) -
|
||||
vma->vm_pgoff, HPAGE_PMD_NR))
|
||||
return false;
|
||||
|
||||
/* Enabled via shmem mount options or sysfs settings. */
|
||||
if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
|
||||
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
|
||||
HPAGE_PMD_NR);
|
||||
}
|
||||
if (shmem_file(vma->vm_file))
|
||||
return shmem_huge_enabled(vma);
|
||||
|
||||
/* THP settings require madvise. */
|
||||
if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
|
||||
return false;
|
||||
|
||||
/* Read-only file mappings need to be aligned for THP to work. */
|
||||
/* Only regular file is valid */
|
||||
if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
|
||||
!inode_is_open_for_write(vma->vm_file->f_inode) &&
|
||||
(vm_flags & VM_EXEC)) {
|
||||
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
|
||||
HPAGE_PMD_NR);
|
||||
struct inode *inode = vma->vm_file->f_inode;
|
||||
|
||||
return !inode_is_open_for_write(inode) &&
|
||||
S_ISREG(inode->i_mode);
|
||||
}
|
||||
|
||||
if (!vma->anon_vma || vma->vm_ops)
|
||||
@ -1763,6 +1766,10 @@ static void collapse_file(struct mm_struct *mm,
|
||||
filemap_flush(mapping);
|
||||
result = SCAN_FAIL;
|
||||
goto xa_unlocked;
|
||||
} else if (PageWriteback(page)) {
|
||||
xas_unlock_irq(&xas);
|
||||
result = SCAN_FAIL;
|
||||
goto xa_unlocked;
|
||||
} else if (trylock_page(page)) {
|
||||
get_page(page);
|
||||
xas_unlock_irq(&xas);
|
||||
@ -1798,7 +1805,8 @@ static void collapse_file(struct mm_struct *mm,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!is_shmem && PageDirty(page)) {
|
||||
if (!is_shmem && (PageDirty(page) ||
|
||||
PageWriteback(page))) {
|
||||
/*
|
||||
* khugepaged only works on read-only fd, so this
|
||||
* page is dirty because it hasn't been flushed
|
||||
|
@ -1147,20 +1147,6 @@ static int __get_hwpoison_page(struct page *page)
|
||||
if (!HWPoisonHandlable(head))
|
||||
return -EBUSY;
|
||||
|
||||
if (PageTransHuge(head)) {
|
||||
/*
|
||||
* Non anonymous thp exists only in allocation/free time. We
|
||||
* can't handle such a case correctly, so let's give it up.
|
||||
* This should be better than triggering BUG_ON when kernel
|
||||
* tries to touch the "partially handled" page.
|
||||
*/
|
||||
if (!PageAnon(head)) {
|
||||
pr_err("Memory failure: %#lx: non anonymous thp\n",
|
||||
page_to_pfn(page));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (get_page_unless_zero(head)) {
|
||||
if (head == compound_head(page))
|
||||
return 1;
|
||||
@ -1708,6 +1694,20 @@ try_again:
|
||||
}
|
||||
|
||||
if (PageTransHuge(hpage)) {
|
||||
/*
|
||||
* The flag must be set after the refcount is bumped
|
||||
* otherwise it may race with THP split.
|
||||
* And the flag can't be set in get_hwpoison_page() since
|
||||
* it is called by soft offline too and it is just called
|
||||
* for !MF_COUNT_INCREASE. So here seems to be the best
|
||||
* place.
|
||||
*
|
||||
* Don't need care about the above error handling paths for
|
||||
* get_hwpoison_page() since they handle either free page
|
||||
* or unhandlable page. The refcount is bumped iff the
|
||||
* page is a valid handlable page.
|
||||
*/
|
||||
SetPageHasHWPoisoned(hpage);
|
||||
if (try_to_split_thp_page(p, "Memory Failure") < 0) {
|
||||
action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
|
||||
res = -EBUSY;
|
||||
|
@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
|
||||
if (compound_order(page) != HPAGE_PMD_ORDER)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Just backoff if any subpage of a THP is corrupted otherwise
|
||||
* the corrupted page may mapped by PMD silently to escape the
|
||||
* check. This kind of THP just can be PTE mapped. Access to
|
||||
* the corrupted subpage should trigger SIGBUS as expected.
|
||||
*/
|
||||
if (unlikely(PageHasHWPoisoned(page)))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Archs like ppc64 need additional space to store information
|
||||
* related to pte entry. Use the preallocated table for that.
|
||||
|
@ -1150,7 +1150,7 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
|
||||
struct task_struct *task;
|
||||
struct task_struct *p;
|
||||
unsigned int f_flags;
|
||||
bool reap = true;
|
||||
bool reap = false;
|
||||
struct pid *pid;
|
||||
long ret = 0;
|
||||
|
||||
@ -1177,15 +1177,15 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
|
||||
goto put_task;
|
||||
}
|
||||
|
||||
mm = p->mm;
|
||||
mmgrab(mm);
|
||||
|
||||
/* If the work has been done already, just exit with success */
|
||||
if (test_bit(MMF_OOM_SKIP, &mm->flags))
|
||||
reap = false;
|
||||
else if (!task_will_free_mem(p)) {
|
||||
reap = false;
|
||||
ret = -EINVAL;
|
||||
if (mmget_not_zero(p->mm)) {
|
||||
mm = p->mm;
|
||||
if (task_will_free_mem(p))
|
||||
reap = true;
|
||||
else {
|
||||
/* Error only if the work has not been done already */
|
||||
if (!test_bit(MMF_OOM_SKIP, &mm->flags))
|
||||
ret = -EINVAL;
|
||||
}
|
||||
}
|
||||
task_unlock(p);
|
||||
|
||||
@ -1201,7 +1201,8 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
drop_mm:
|
||||
mmdrop(mm);
|
||||
if (mm)
|
||||
mmput(mm);
|
||||
put_task:
|
||||
put_task_struct(task);
|
||||
put_pid:
|
||||
|
@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
||||
|
||||
VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
|
||||
|
||||
if (compound)
|
||||
if (compound) {
|
||||
ClearPageDoubleMap(page);
|
||||
ClearPageHasHWPoisoned(page);
|
||||
}
|
||||
for (i = 1; i < (1 << order); i++) {
|
||||
if (compound)
|
||||
bad += free_tail_pages_check(page, page + i);
|
||||
@ -5223,6 +5225,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
|
||||
if (unlikely(page_array && nr_pages - nr_populated == 0))
|
||||
goto out;
|
||||
|
||||
/* Bulk allocator does not support memcg accounting. */
|
||||
if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT))
|
||||
goto failed;
|
||||
|
||||
/* Use the single page allocator for one page. */
|
||||
if (nr_pages - nr_populated == 1)
|
||||
goto failed;
|
||||
|
@ -218,8 +218,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
|
||||
|
||||
file->f_flags |= O_LARGEFILE;
|
||||
|
||||
fd_install(fd, file);
|
||||
atomic_inc(&secretmem_users);
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
|
||||
err_put_fd:
|
||||
|
15
mm/vmalloc.c
15
mm/vmalloc.c
@ -2816,6 +2816,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
|
||||
unsigned int order, unsigned int nr_pages, struct page **pages)
|
||||
{
|
||||
unsigned int nr_allocated = 0;
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* For order-0 pages we make use of bulk allocator, if
|
||||
@ -2823,7 +2825,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
|
||||
* to fails, fallback to a single page allocator that is
|
||||
* more permissive.
|
||||
*/
|
||||
if (!order) {
|
||||
if (!order && nid != NUMA_NO_NODE) {
|
||||
while (nr_allocated < nr_pages) {
|
||||
unsigned int nr, nr_pages_request;
|
||||
|
||||
@ -2848,7 +2850,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
|
||||
if (nr != nr_pages_request)
|
||||
break;
|
||||
}
|
||||
} else
|
||||
} else if (order)
|
||||
/*
|
||||
* Compound pages required for remap_vmalloc_page if
|
||||
* high-order pages.
|
||||
@ -2856,11 +2858,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
|
||||
gfp |= __GFP_COMP;
|
||||
|
||||
/* High-order pages or fallback path if "bulk" fails. */
|
||||
while (nr_allocated < nr_pages) {
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
page = alloc_pages_node(nid, gfp, order);
|
||||
while (nr_allocated < nr_pages) {
|
||||
if (nid == NUMA_NO_NODE)
|
||||
page = alloc_pages(gfp, order);
|
||||
else
|
||||
page = alloc_pages_node(nid, gfp, order);
|
||||
if (unlikely(!page))
|
||||
break;
|
||||
|
||||
|
@ -341,7 +341,7 @@ void split_file_backed_thp(void)
|
||||
}
|
||||
|
||||
/* write something to the file, so a file-backed THP can be allocated */
|
||||
num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc));
|
||||
num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1);
|
||||
close(fd);
|
||||
|
||||
if (num_written < 1) {
|
||||
|
Loading…
Reference in New Issue
Block a user