mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
mm/khugepaged: propagate enum scan_result codes back to callers
Propagate enum scan_result codes back through return values of functions downstream of khugepaged_scan_file() and khugepaged_scan_pmd() to inform callers if the operation was successful, and if not, why. Since khugepaged_scan_pmd()'s return value already has a specific meaning (whether mmap_lock was unlocked or not), add a bool* argument to khugepaged_scan_pmd() to retrieve this information. Change khugepaged to take action based on the return values of khugepaged_scan_file() and khugepaged_scan_pmd() instead of acting deep within the collapsing functions themselves. hugepage_vma_revalidate() now returns SCAN_SUCCEED on success to be more consistent with enum scan_result propagation. Remove dependency on error pointers to communicate to khugepaged that allocation failed and it should sleep; instead just use the result of the scan (SCAN_ALLOC_HUGE_PAGE_FAIL if allocation fails). Link: https://lkml.kernel.org/r/20220706235936.2197195-6-zokeefe@google.com Signed-off-by: Zach O'Keefe <zokeefe@google.com> Reviewed-by: Yang Shi <shy828301@gmail.com> Cc: Alex Shi <alex.shi@linux.alibaba.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Chris Kennelly <ckennelly@google.com> Cc: Chris Zankel <chris@zankel.net> Cc: David Hildenbrand <david@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Helge Deller <deller@gmx.de> Cc: Hugh Dickins <hughd@google.com> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: Pavel Begunkov <asml.silence@gmail.com> Cc: Peter Xu <peterx@redhat.com> Cc: Rongwei Wang <rongwei.wang@linux.alibaba.com> Cc: SeongJae Park <sj@kernel.org> Cc: Song Liu <songliubraving@fb.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Zi Yan <ziy@nvidia.com> Cc: Dan Carpenter <dan.carpenter@oracle.com> Cc: "Souptick Joarder (HPE)" <jrdr.linux@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
9710a78ab2
commit
50ad2f24b3
233
mm/khugepaged.c
233
mm/khugepaged.c
@ -558,7 +558,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
|
|||||||
{
|
{
|
||||||
struct page *page = NULL;
|
struct page *page = NULL;
|
||||||
pte_t *_pte;
|
pte_t *_pte;
|
||||||
int none_or_zero = 0, shared = 0, result = 0, referenced = 0;
|
int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0;
|
||||||
bool writable = false;
|
bool writable = false;
|
||||||
|
|
||||||
for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
|
for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
|
||||||
@ -672,13 +672,13 @@ next:
|
|||||||
result = SCAN_SUCCEED;
|
result = SCAN_SUCCEED;
|
||||||
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
|
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
|
||||||
referenced, writable, result);
|
referenced, writable, result);
|
||||||
return 1;
|
return result;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
release_pte_pages(pte, _pte, compound_pagelist);
|
release_pte_pages(pte, _pte, compound_pagelist);
|
||||||
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
|
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
|
||||||
referenced, writable, result);
|
referenced, writable, result);
|
||||||
return 0;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
|
static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
|
||||||
@ -818,7 +818,6 @@ static bool khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
|
|||||||
*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
|
*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
|
||||||
if (unlikely(!*hpage)) {
|
if (unlikely(!*hpage)) {
|
||||||
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
|
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
|
||||||
*hpage = ERR_PTR(-ENOMEM);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -830,8 +829,7 @@ static bool khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
|
|||||||
/*
|
/*
|
||||||
* If mmap_lock temporarily dropped, revalidate vma
|
* If mmap_lock temporarily dropped, revalidate vma
|
||||||
* before taking mmap_lock.
|
* before taking mmap_lock.
|
||||||
* Return 0 if succeeds, otherwise return none-zero
|
* Returns enum scan_result value.
|
||||||
* value (scan code).
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
|
static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
|
||||||
@ -859,7 +857,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
|
|||||||
*/
|
*/
|
||||||
if (!vma->anon_vma || !vma_is_anonymous(vma))
|
if (!vma->anon_vma || !vma_is_anonymous(vma))
|
||||||
return SCAN_VMA_CHECK;
|
return SCAN_VMA_CHECK;
|
||||||
return 0;
|
return SCAN_SUCCEED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -870,10 +868,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
|
|||||||
* Note that if false is returned, mmap_lock will be released.
|
* Note that if false is returned, mmap_lock will be released.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool __collapse_huge_page_swapin(struct mm_struct *mm,
|
static int __collapse_huge_page_swapin(struct mm_struct *mm,
|
||||||
struct vm_area_struct *vma,
|
struct vm_area_struct *vma,
|
||||||
unsigned long haddr, pmd_t *pmd,
|
unsigned long haddr, pmd_t *pmd,
|
||||||
int referenced)
|
int referenced)
|
||||||
{
|
{
|
||||||
int swapped_in = 0;
|
int swapped_in = 0;
|
||||||
vm_fault_t ret = 0;
|
vm_fault_t ret = 0;
|
||||||
@ -904,12 +902,13 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
|
|||||||
*/
|
*/
|
||||||
if (ret & VM_FAULT_RETRY) {
|
if (ret & VM_FAULT_RETRY) {
|
||||||
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
|
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
|
||||||
return false;
|
/* Likely, but not guaranteed, that page lock failed */
|
||||||
|
return SCAN_PAGE_LOCK;
|
||||||
}
|
}
|
||||||
if (ret & VM_FAULT_ERROR) {
|
if (ret & VM_FAULT_ERROR) {
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
|
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
|
||||||
return false;
|
return SCAN_FAIL;
|
||||||
}
|
}
|
||||||
swapped_in++;
|
swapped_in++;
|
||||||
}
|
}
|
||||||
@ -919,7 +918,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
|
|||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
|
|
||||||
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 1);
|
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 1);
|
||||||
return true;
|
return SCAN_SUCCEED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
|
static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
|
||||||
@ -937,17 +936,17 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
|
|||||||
return SCAN_SUCCEED;
|
return SCAN_SUCCEED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
||||||
struct page **hpage, int referenced,
|
int referenced, int unmapped,
|
||||||
int unmapped, struct collapse_control *cc)
|
struct collapse_control *cc)
|
||||||
{
|
{
|
||||||
LIST_HEAD(compound_pagelist);
|
LIST_HEAD(compound_pagelist);
|
||||||
pmd_t *pmd, _pmd;
|
pmd_t *pmd, _pmd;
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
pgtable_t pgtable;
|
pgtable_t pgtable;
|
||||||
struct page *new_page;
|
struct page *hpage;
|
||||||
spinlock_t *pmd_ptl, *pte_ptl;
|
spinlock_t *pmd_ptl, *pte_ptl;
|
||||||
int isolated = 0, result = 0;
|
int result = SCAN_FAIL;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
struct mmu_notifier_range range;
|
struct mmu_notifier_range range;
|
||||||
|
|
||||||
@ -961,15 +960,13 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
|||||||
*/
|
*/
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
result = alloc_charge_hpage(hpage, mm, cc);
|
result = alloc_charge_hpage(&hpage, mm, cc);
|
||||||
if (result != SCAN_SUCCEED)
|
if (result != SCAN_SUCCEED)
|
||||||
goto out_nolock;
|
goto out_nolock;
|
||||||
|
|
||||||
new_page = *hpage;
|
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
result = hugepage_vma_revalidate(mm, address, &vma);
|
result = hugepage_vma_revalidate(mm, address, &vma);
|
||||||
if (result) {
|
if (result != SCAN_SUCCEED) {
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
goto out_nolock;
|
goto out_nolock;
|
||||||
}
|
}
|
||||||
@ -981,14 +978,16 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
|||||||
goto out_nolock;
|
goto out_nolock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
if (unmapped) {
|
||||||
* __collapse_huge_page_swapin will return with mmap_lock released
|
/*
|
||||||
* when it fails. So we jump out_nolock directly in that case.
|
* __collapse_huge_page_swapin will return with mmap_lock
|
||||||
* Continuing to collapse causes inconsistency.
|
* released when it fails. So we jump out_nolock directly in
|
||||||
*/
|
* that case. Continuing to collapse causes inconsistency.
|
||||||
if (unmapped && !__collapse_huge_page_swapin(mm, vma, address,
|
*/
|
||||||
pmd, referenced)) {
|
result = __collapse_huge_page_swapin(mm, vma, address, pmd,
|
||||||
goto out_nolock;
|
referenced);
|
||||||
|
if (result != SCAN_SUCCEED)
|
||||||
|
goto out_nolock;
|
||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
@ -999,7 +998,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
|||||||
*/
|
*/
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
result = hugepage_vma_revalidate(mm, address, &vma);
|
result = hugepage_vma_revalidate(mm, address, &vma);
|
||||||
if (result)
|
if (result != SCAN_SUCCEED)
|
||||||
goto out_up_write;
|
goto out_up_write;
|
||||||
/* check if the pmd is still valid */
|
/* check if the pmd is still valid */
|
||||||
if (mm_find_pmd(mm, address) != pmd)
|
if (mm_find_pmd(mm, address) != pmd)
|
||||||
@ -1026,11 +1025,11 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
|||||||
mmu_notifier_invalidate_range_end(&range);
|
mmu_notifier_invalidate_range_end(&range);
|
||||||
|
|
||||||
spin_lock(pte_ptl);
|
spin_lock(pte_ptl);
|
||||||
isolated = __collapse_huge_page_isolate(vma, address, pte,
|
result = __collapse_huge_page_isolate(vma, address, pte,
|
||||||
&compound_pagelist);
|
&compound_pagelist);
|
||||||
spin_unlock(pte_ptl);
|
spin_unlock(pte_ptl);
|
||||||
|
|
||||||
if (unlikely(!isolated)) {
|
if (unlikely(result != SCAN_SUCCEED)) {
|
||||||
pte_unmap(pte);
|
pte_unmap(pte);
|
||||||
spin_lock(pmd_ptl);
|
spin_lock(pmd_ptl);
|
||||||
BUG_ON(!pmd_none(*pmd));
|
BUG_ON(!pmd_none(*pmd));
|
||||||
@ -1042,7 +1041,6 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
|||||||
pmd_populate(mm, pmd, pmd_pgtable(_pmd));
|
pmd_populate(mm, pmd, pmd_pgtable(_pmd));
|
||||||
spin_unlock(pmd_ptl);
|
spin_unlock(pmd_ptl);
|
||||||
anon_vma_unlock_write(vma->anon_vma);
|
anon_vma_unlock_write(vma->anon_vma);
|
||||||
result = SCAN_FAIL;
|
|
||||||
goto out_up_write;
|
goto out_up_write;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1052,8 +1050,8 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
|||||||
*/
|
*/
|
||||||
anon_vma_unlock_write(vma->anon_vma);
|
anon_vma_unlock_write(vma->anon_vma);
|
||||||
|
|
||||||
__collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl,
|
__collapse_huge_page_copy(pte, hpage, vma, address, pte_ptl,
|
||||||
&compound_pagelist);
|
&compound_pagelist);
|
||||||
pte_unmap(pte);
|
pte_unmap(pte);
|
||||||
/*
|
/*
|
||||||
* spin_lock() below is not the equivalent of smp_wmb(), but
|
* spin_lock() below is not the equivalent of smp_wmb(), but
|
||||||
@ -1061,43 +1059,42 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
|
|||||||
* avoid the copy_huge_page writes to become visible after
|
* avoid the copy_huge_page writes to become visible after
|
||||||
* the set_pmd_at() write.
|
* the set_pmd_at() write.
|
||||||
*/
|
*/
|
||||||
__SetPageUptodate(new_page);
|
__SetPageUptodate(hpage);
|
||||||
pgtable = pmd_pgtable(_pmd);
|
pgtable = pmd_pgtable(_pmd);
|
||||||
|
|
||||||
_pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
|
_pmd = mk_huge_pmd(hpage, vma->vm_page_prot);
|
||||||
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
|
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
|
||||||
|
|
||||||
spin_lock(pmd_ptl);
|
spin_lock(pmd_ptl);
|
||||||
BUG_ON(!pmd_none(*pmd));
|
BUG_ON(!pmd_none(*pmd));
|
||||||
page_add_new_anon_rmap(new_page, vma, address);
|
page_add_new_anon_rmap(hpage, vma, address);
|
||||||
lru_cache_add_inactive_or_unevictable(new_page, vma);
|
lru_cache_add_inactive_or_unevictable(hpage, vma);
|
||||||
pgtable_trans_huge_deposit(mm, pmd, pgtable);
|
pgtable_trans_huge_deposit(mm, pmd, pgtable);
|
||||||
set_pmd_at(mm, address, pmd, _pmd);
|
set_pmd_at(mm, address, pmd, _pmd);
|
||||||
update_mmu_cache_pmd(vma, address, pmd);
|
update_mmu_cache_pmd(vma, address, pmd);
|
||||||
spin_unlock(pmd_ptl);
|
spin_unlock(pmd_ptl);
|
||||||
|
|
||||||
*hpage = NULL;
|
hpage = NULL;
|
||||||
|
|
||||||
khugepaged_pages_collapsed++;
|
|
||||||
result = SCAN_SUCCEED;
|
result = SCAN_SUCCEED;
|
||||||
out_up_write:
|
out_up_write:
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
out_nolock:
|
out_nolock:
|
||||||
if (!IS_ERR_OR_NULL(*hpage)) {
|
if (hpage) {
|
||||||
mem_cgroup_uncharge(page_folio(*hpage));
|
mem_cgroup_uncharge(page_folio(hpage));
|
||||||
put_page(*hpage);
|
put_page(hpage);
|
||||||
}
|
}
|
||||||
trace_mm_collapse_huge_page(mm, isolated, result);
|
trace_mm_collapse_huge_page(mm, result == SCAN_SUCCEED, result);
|
||||||
return;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
unsigned long address, struct page **hpage,
|
unsigned long address, bool *mmap_locked,
|
||||||
struct collapse_control *cc)
|
struct collapse_control *cc)
|
||||||
{
|
{
|
||||||
pmd_t *pmd;
|
pmd_t *pmd;
|
||||||
pte_t *pte, *_pte;
|
pte_t *pte, *_pte;
|
||||||
int ret = 0, result = 0, referenced = 0;
|
int result = SCAN_FAIL, referenced = 0;
|
||||||
int none_or_zero = 0, shared = 0;
|
int none_or_zero = 0, shared = 0;
|
||||||
struct page *page = NULL;
|
struct page *page = NULL;
|
||||||
unsigned long _address;
|
unsigned long _address;
|
||||||
@ -1234,19 +1231,19 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||||||
result = SCAN_LACK_REFERENCED_PAGE;
|
result = SCAN_LACK_REFERENCED_PAGE;
|
||||||
} else {
|
} else {
|
||||||
result = SCAN_SUCCEED;
|
result = SCAN_SUCCEED;
|
||||||
ret = 1;
|
|
||||||
}
|
}
|
||||||
out_unmap:
|
out_unmap:
|
||||||
pte_unmap_unlock(pte, ptl);
|
pte_unmap_unlock(pte, ptl);
|
||||||
if (ret) {
|
if (result == SCAN_SUCCEED) {
|
||||||
|
result = collapse_huge_page(mm, address, referenced,
|
||||||
|
unmapped, cc);
|
||||||
/* collapse_huge_page will return with the mmap_lock released */
|
/* collapse_huge_page will return with the mmap_lock released */
|
||||||
collapse_huge_page(mm, address, hpage, referenced, unmapped,
|
*mmap_locked = false;
|
||||||
cc);
|
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
|
trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
|
||||||
none_or_zero, result, unmapped);
|
none_or_zero, result, unmapped);
|
||||||
return ret;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void collect_mm_slot(struct mm_slot *mm_slot)
|
static void collect_mm_slot(struct mm_slot *mm_slot)
|
||||||
@ -1508,7 +1505,6 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|||||||
* @mm: process address space where collapse happens
|
* @mm: process address space where collapse happens
|
||||||
* @file: file that collapse on
|
* @file: file that collapse on
|
||||||
* @start: collapse start address
|
* @start: collapse start address
|
||||||
* @hpage: new allocated huge page for collapse
|
|
||||||
* @cc: collapse context and scratchpad
|
* @cc: collapse context and scratchpad
|
||||||
*
|
*
|
||||||
* Basic scheme is simple, details are more complex:
|
* Basic scheme is simple, details are more complex:
|
||||||
@ -1526,12 +1522,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|||||||
* + restore gaps in the page cache;
|
* + restore gaps in the page cache;
|
||||||
* + unlock and free huge page;
|
* + unlock and free huge page;
|
||||||
*/
|
*/
|
||||||
static void collapse_file(struct mm_struct *mm, struct file *file,
|
static int collapse_file(struct mm_struct *mm, struct file *file,
|
||||||
pgoff_t start, struct page **hpage,
|
pgoff_t start, struct collapse_control *cc)
|
||||||
struct collapse_control *cc)
|
|
||||||
{
|
{
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
struct page *new_page;
|
struct page *hpage;
|
||||||
pgoff_t index, end = start + HPAGE_PMD_NR;
|
pgoff_t index, end = start + HPAGE_PMD_NR;
|
||||||
LIST_HEAD(pagelist);
|
LIST_HEAD(pagelist);
|
||||||
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
|
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
|
||||||
@ -1542,12 +1537,10 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
|
|||||||
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
|
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
|
||||||
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
|
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
|
||||||
|
|
||||||
result = alloc_charge_hpage(hpage, mm, cc);
|
result = alloc_charge_hpage(&hpage, mm, cc);
|
||||||
if (result != SCAN_SUCCEED)
|
if (result != SCAN_SUCCEED)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
new_page = *hpage;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure we have slots for all the pages in the range. This is
|
* Ensure we have slots for all the pages in the range. This is
|
||||||
* almost certainly a no-op because most of the pages must be present
|
* almost certainly a no-op because most of the pages must be present
|
||||||
@ -1564,14 +1557,14 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
|
|||||||
}
|
}
|
||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
__SetPageLocked(new_page);
|
__SetPageLocked(hpage);
|
||||||
if (is_shmem)
|
if (is_shmem)
|
||||||
__SetPageSwapBacked(new_page);
|
__SetPageSwapBacked(hpage);
|
||||||
new_page->index = start;
|
hpage->index = start;
|
||||||
new_page->mapping = mapping;
|
hpage->mapping = mapping;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this point the new_page is locked and not up-to-date.
|
* At this point the hpage is locked and not up-to-date.
|
||||||
* It's safe to insert it into the page cache, because nobody would
|
* It's safe to insert it into the page cache, because nobody would
|
||||||
* be able to map it or use it in another way until we unlock it.
|
* be able to map it or use it in another way until we unlock it.
|
||||||
*/
|
*/
|
||||||
@ -1599,7 +1592,7 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
|
|||||||
result = SCAN_FAIL;
|
result = SCAN_FAIL;
|
||||||
goto xa_locked;
|
goto xa_locked;
|
||||||
}
|
}
|
||||||
xas_store(&xas, new_page);
|
xas_store(&xas, hpage);
|
||||||
nr_none++;
|
nr_none++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1741,19 +1734,19 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
|
|||||||
list_add_tail(&page->lru, &pagelist);
|
list_add_tail(&page->lru, &pagelist);
|
||||||
|
|
||||||
/* Finally, replace with the new page. */
|
/* Finally, replace with the new page. */
|
||||||
xas_store(&xas, new_page);
|
xas_store(&xas, hpage);
|
||||||
continue;
|
continue;
|
||||||
out_unlock:
|
out_unlock:
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
goto xa_unlocked;
|
goto xa_unlocked;
|
||||||
}
|
}
|
||||||
nr = thp_nr_pages(new_page);
|
nr = thp_nr_pages(hpage);
|
||||||
|
|
||||||
if (is_shmem)
|
if (is_shmem)
|
||||||
__mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr);
|
__mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
|
||||||
else {
|
else {
|
||||||
__mod_lruvec_page_state(new_page, NR_FILE_THPS, nr);
|
__mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
|
||||||
filemap_nr_thps_inc(mapping);
|
filemap_nr_thps_inc(mapping);
|
||||||
/*
|
/*
|
||||||
* Paired with smp_mb() in do_dentry_open() to ensure
|
* Paired with smp_mb() in do_dentry_open() to ensure
|
||||||
@ -1764,21 +1757,21 @@ out_unlock:
|
|||||||
smp_mb();
|
smp_mb();
|
||||||
if (inode_is_open_for_write(mapping->host)) {
|
if (inode_is_open_for_write(mapping->host)) {
|
||||||
result = SCAN_FAIL;
|
result = SCAN_FAIL;
|
||||||
__mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr);
|
__mod_lruvec_page_state(hpage, NR_FILE_THPS, -nr);
|
||||||
filemap_nr_thps_dec(mapping);
|
filemap_nr_thps_dec(mapping);
|
||||||
goto xa_locked;
|
goto xa_locked;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nr_none) {
|
if (nr_none) {
|
||||||
__mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none);
|
__mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
|
||||||
/* nr_none is always 0 for non-shmem. */
|
/* nr_none is always 0 for non-shmem. */
|
||||||
__mod_lruvec_page_state(new_page, NR_SHMEM, nr_none);
|
__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Join all the small entries into a single multi-index entry */
|
/* Join all the small entries into a single multi-index entry */
|
||||||
xas_set_order(&xas, start, HPAGE_PMD_ORDER);
|
xas_set_order(&xas, start, HPAGE_PMD_ORDER);
|
||||||
xas_store(&xas, new_page);
|
xas_store(&xas, hpage);
|
||||||
xa_locked:
|
xa_locked:
|
||||||
xas_unlock_irq(&xas);
|
xas_unlock_irq(&xas);
|
||||||
xa_unlocked:
|
xa_unlocked:
|
||||||
@ -1800,11 +1793,11 @@ xa_unlocked:
|
|||||||
index = start;
|
index = start;
|
||||||
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
|
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
|
||||||
while (index < page->index) {
|
while (index < page->index) {
|
||||||
clear_highpage(new_page + (index % HPAGE_PMD_NR));
|
clear_highpage(hpage + (index % HPAGE_PMD_NR));
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
|
copy_highpage(hpage + (page->index % HPAGE_PMD_NR),
|
||||||
page);
|
page);
|
||||||
list_del(&page->lru);
|
list_del(&page->lru);
|
||||||
page->mapping = NULL;
|
page->mapping = NULL;
|
||||||
page_ref_unfreeze(page, 1);
|
page_ref_unfreeze(page, 1);
|
||||||
@ -1815,23 +1808,22 @@ xa_unlocked:
|
|||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
while (index < end) {
|
while (index < end) {
|
||||||
clear_highpage(new_page + (index % HPAGE_PMD_NR));
|
clear_highpage(hpage + (index % HPAGE_PMD_NR));
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
|
|
||||||
SetPageUptodate(new_page);
|
SetPageUptodate(hpage);
|
||||||
page_ref_add(new_page, HPAGE_PMD_NR - 1);
|
page_ref_add(hpage, HPAGE_PMD_NR - 1);
|
||||||
if (is_shmem)
|
if (is_shmem)
|
||||||
set_page_dirty(new_page);
|
set_page_dirty(hpage);
|
||||||
lru_cache_add(new_page);
|
lru_cache_add(hpage);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove pte page tables, so we can re-fault the page as huge.
|
* Remove pte page tables, so we can re-fault the page as huge.
|
||||||
*/
|
*/
|
||||||
retract_page_tables(mapping, start);
|
retract_page_tables(mapping, start);
|
||||||
*hpage = NULL;
|
unlock_page(hpage);
|
||||||
|
hpage = NULL;
|
||||||
khugepaged_pages_collapsed++;
|
|
||||||
} else {
|
} else {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
|
|
||||||
@ -1870,22 +1862,23 @@ xa_unlocked:
|
|||||||
VM_BUG_ON(nr_none);
|
VM_BUG_ON(nr_none);
|
||||||
xas_unlock_irq(&xas);
|
xas_unlock_irq(&xas);
|
||||||
|
|
||||||
new_page->mapping = NULL;
|
hpage->mapping = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock_page(new_page);
|
if (hpage)
|
||||||
|
unlock_page(hpage);
|
||||||
out:
|
out:
|
||||||
VM_BUG_ON(!list_empty(&pagelist));
|
VM_BUG_ON(!list_empty(&pagelist));
|
||||||
if (!IS_ERR_OR_NULL(*hpage)) {
|
if (hpage) {
|
||||||
mem_cgroup_uncharge(page_folio(*hpage));
|
mem_cgroup_uncharge(page_folio(hpage));
|
||||||
put_page(*hpage);
|
put_page(hpage);
|
||||||
}
|
}
|
||||||
/* TODO: tracepoints */
|
/* TODO: tracepoints */
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
|
static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
|
||||||
pgoff_t start, struct page **hpage,
|
pgoff_t start, struct collapse_control *cc)
|
||||||
struct collapse_control *cc)
|
|
||||||
{
|
{
|
||||||
struct page *page = NULL;
|
struct page *page = NULL;
|
||||||
struct address_space *mapping = file->f_mapping;
|
struct address_space *mapping = file->f_mapping;
|
||||||
@ -1958,16 +1951,16 @@ static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
|
|||||||
result = SCAN_EXCEED_NONE_PTE;
|
result = SCAN_EXCEED_NONE_PTE;
|
||||||
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
|
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
|
||||||
} else {
|
} else {
|
||||||
collapse_file(mm, file, start, hpage, cc);
|
result = collapse_file(mm, file, start, cc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: tracepoints */
|
/* TODO: tracepoints */
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
|
static int khugepaged_scan_file(struct mm_struct *mm, struct file *file,
|
||||||
pgoff_t start, struct page **hpage,
|
pgoff_t start, struct collapse_control *cc)
|
||||||
struct collapse_control *cc)
|
|
||||||
{
|
{
|
||||||
BUILD_BUG();
|
BUILD_BUG();
|
||||||
}
|
}
|
||||||
@ -1977,8 +1970,7 @@ static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
|
static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
|
||||||
struct page **hpage,
|
|
||||||
struct collapse_control *cc)
|
struct collapse_control *cc)
|
||||||
__releases(&khugepaged_mm_lock)
|
__releases(&khugepaged_mm_lock)
|
||||||
__acquires(&khugepaged_mm_lock)
|
__acquires(&khugepaged_mm_lock)
|
||||||
@ -1990,6 +1982,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
|
|||||||
|
|
||||||
VM_BUG_ON(!pages);
|
VM_BUG_ON(!pages);
|
||||||
lockdep_assert_held(&khugepaged_mm_lock);
|
lockdep_assert_held(&khugepaged_mm_lock);
|
||||||
|
*result = SCAN_FAIL;
|
||||||
|
|
||||||
if (khugepaged_scan.mm_slot)
|
if (khugepaged_scan.mm_slot)
|
||||||
mm_slot = khugepaged_scan.mm_slot;
|
mm_slot = khugepaged_scan.mm_slot;
|
||||||
@ -2036,7 +2029,8 @@ skip:
|
|||||||
VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
|
VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
|
||||||
|
|
||||||
while (khugepaged_scan.address < hend) {
|
while (khugepaged_scan.address < hend) {
|
||||||
int ret;
|
bool mmap_locked = true;
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
if (unlikely(khugepaged_test_exit(mm)))
|
if (unlikely(khugepaged_test_exit(mm)))
|
||||||
goto breakouterloop;
|
goto breakouterloop;
|
||||||
@ -2050,20 +2044,28 @@ skip:
|
|||||||
khugepaged_scan.address);
|
khugepaged_scan.address);
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
ret = 1;
|
*result = khugepaged_scan_file(mm, file, pgoff,
|
||||||
khugepaged_scan_file(mm, file, pgoff, hpage,
|
cc);
|
||||||
cc);
|
mmap_locked = false;
|
||||||
fput(file);
|
fput(file);
|
||||||
} else {
|
} else {
|
||||||
ret = khugepaged_scan_pmd(mm, vma,
|
*result = khugepaged_scan_pmd(mm, vma,
|
||||||
khugepaged_scan.address,
|
khugepaged_scan.address,
|
||||||
hpage, cc);
|
&mmap_locked, cc);
|
||||||
}
|
}
|
||||||
|
if (*result == SCAN_SUCCEED)
|
||||||
|
++khugepaged_pages_collapsed;
|
||||||
/* move to next address */
|
/* move to next address */
|
||||||
khugepaged_scan.address += HPAGE_PMD_SIZE;
|
khugepaged_scan.address += HPAGE_PMD_SIZE;
|
||||||
progress += HPAGE_PMD_NR;
|
progress += HPAGE_PMD_NR;
|
||||||
if (ret)
|
if (!mmap_locked)
|
||||||
/* we released mmap_lock so break loop */
|
/*
|
||||||
|
* We released mmap_lock so break loop. Note
|
||||||
|
* that we drop mmap_lock before all hugepage
|
||||||
|
* allocations, so if allocation fails, we are
|
||||||
|
* guaranteed to break here and report the
|
||||||
|
* correct result back to caller.
|
||||||
|
*/
|
||||||
goto breakouterloop_mmap_lock;
|
goto breakouterloop_mmap_lock;
|
||||||
if (progress >= pages)
|
if (progress >= pages)
|
||||||
goto breakouterloop;
|
goto breakouterloop;
|
||||||
@ -2115,10 +2117,10 @@ static int khugepaged_wait_event(void)
|
|||||||
|
|
||||||
static void khugepaged_do_scan(struct collapse_control *cc)
|
static void khugepaged_do_scan(struct collapse_control *cc)
|
||||||
{
|
{
|
||||||
struct page *hpage = NULL;
|
|
||||||
unsigned int progress = 0, pass_through_head = 0;
|
unsigned int progress = 0, pass_through_head = 0;
|
||||||
unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
|
unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
|
||||||
bool wait = true;
|
bool wait = true;
|
||||||
|
int result = SCAN_SUCCEED;
|
||||||
|
|
||||||
lru_add_drain_all();
|
lru_add_drain_all();
|
||||||
|
|
||||||
@ -2134,7 +2136,7 @@ static void khugepaged_do_scan(struct collapse_control *cc)
|
|||||||
if (khugepaged_has_work() &&
|
if (khugepaged_has_work() &&
|
||||||
pass_through_head < 2)
|
pass_through_head < 2)
|
||||||
progress += khugepaged_scan_mm_slot(pages - progress,
|
progress += khugepaged_scan_mm_slot(pages - progress,
|
||||||
&hpage, cc);
|
&result, cc);
|
||||||
else
|
else
|
||||||
progress = pages;
|
progress = pages;
|
||||||
spin_unlock(&khugepaged_mm_lock);
|
spin_unlock(&khugepaged_mm_lock);
|
||||||
@ -2142,7 +2144,7 @@ static void khugepaged_do_scan(struct collapse_control *cc)
|
|||||||
if (progress >= pages)
|
if (progress >= pages)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (IS_ERR(hpage)) {
|
if (result == SCAN_ALLOC_HUGE_PAGE_FAIL) {
|
||||||
/*
|
/*
|
||||||
* If fail to allocate the first time, try to sleep for
|
* If fail to allocate the first time, try to sleep for
|
||||||
* a while. When hit again, cancel the scan.
|
* a while. When hit again, cancel the scan.
|
||||||
@ -2150,7 +2152,6 @@ static void khugepaged_do_scan(struct collapse_control *cc)
|
|||||||
if (!wait)
|
if (!wait)
|
||||||
break;
|
break;
|
||||||
wait = false;
|
wait = false;
|
||||||
hpage = NULL;
|
|
||||||
khugepaged_alloc_sleep();
|
khugepaged_alloc_sleep();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user