mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
fs/proc/task_mmu: implement IOCTL to get and optionally clear info about PTEs
The PAGEMAP_SCAN IOCTL on the pagemap file can be used to get or optionally clear the info about page table entries. The following operations are supported in this IOCTL: - Scan the address range and get the memory ranges matching the provided criteria. This is performed when the output buffer is specified. - Write-protect the pages. The PM_SCAN_WP_MATCHING is used to write-protect the pages of interest. The PM_SCAN_CHECK_WPASYNC aborts the operation if non-Async Write Protected pages are found. The ``PM_SCAN_WP_MATCHING`` can be used with or without PM_SCAN_CHECK_WPASYNC. - Both of those operations can be combined into one atomic operation where we can get and write protect the pages as well. Following flags about pages are currently supported: - PAGE_IS_WPALLOWED - Page has async-write-protection enabled - PAGE_IS_WRITTEN - Page has been written to from the time it was write protected - PAGE_IS_FILE - Page is file backed - PAGE_IS_PRESENT - Page is present in the memory - PAGE_IS_SWAPPED - Page is in swapped - PAGE_IS_PFNZERO - Page has zero PFN - PAGE_IS_HUGE - Page is THP or Hugetlb backed This IOCTL can be extended to get information about more PTE bits. The entire address range passed by user [start, end) is scanned until either the user provided buffer is full or max_pages have been found. [akpm@linux-foundation.org: update it for "mm: hugetlb: add huge page size param to set_huge_pte_at()"] [akpm@linux-foundation.org: fix CONFIG_HUGETLB_PAGE=n warning] [arnd@arndb.de: hide unused pagemap_scan_backout_range() function] Link: https://lkml.kernel.org/r/20230927060257.2975412-1-arnd@kernel.org [sfr@canb.auug.org.au: fix "fs/proc/task_mmu: hide unused pagemap_scan_backout_range() function"] Link: https://lkml.kernel.org/r/20230928092223.0625c6bf@canb.auug.org.au Link: https://lkml.kernel.org/r/20230821141518.870589-3-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com> Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Reviewed-by: Andrei Vagin <avagin@gmail.com> Reviewed-by: Michał Mirosław <mirq-linux@rere.qmqm.pl> Cc: Alex Sierra <alex.sierra@amd.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Christian Brauner <brauner@kernel.org> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Gustavo A. R. Silva <gustavoars@kernel.org> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Michal Miroslaw <emmir@google.com> Cc: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Nadav Amit <namit@vmware.com> Cc: Pasha Tatashin <pasha.tatashin@soleen.com> Cc: Paul Gofman <pgofman@codeweavers.com> Cc: Peter Xu <peterx@redhat.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Yang Shi <shy828301@gmail.com> Cc: Yun Zhou <yun.zhou@windriver.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
d61ea1cb00
commit
52526ca7fd
@ -20,6 +20,8 @@
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/pkeys.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/overflow.h>
|
||||
|
||||
#include <asm/elf.h>
|
||||
#include <asm/tlb.h>
|
||||
@ -1761,11 +1763,701 @@ static int pagemap_release(struct inode *inode, struct file *file)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
|
||||
PAGE_IS_FILE | PAGE_IS_PRESENT | \
|
||||
PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
|
||||
PAGE_IS_HUGE)
|
||||
#define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
|
||||
|
||||
struct pagemap_scan_private {
|
||||
struct pm_scan_arg arg;
|
||||
unsigned long masks_of_interest, cur_vma_category;
|
||||
struct page_region *vec_buf;
|
||||
unsigned long vec_buf_len, vec_buf_index, found_pages;
|
||||
struct page_region __user *vec_out;
|
||||
};
|
||||
|
||||
static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t pte)
|
||||
{
|
||||
unsigned long categories = 0;
|
||||
|
||||
if (pte_present(pte)) {
|
||||
struct page *page;
|
||||
|
||||
categories |= PAGE_IS_PRESENT;
|
||||
if (!pte_uffd_wp(pte))
|
||||
categories |= PAGE_IS_WRITTEN;
|
||||
|
||||
if (p->masks_of_interest & PAGE_IS_FILE) {
|
||||
page = vm_normal_page(vma, addr, pte);
|
||||
if (page && !PageAnon(page))
|
||||
categories |= PAGE_IS_FILE;
|
||||
}
|
||||
|
||||
if (is_zero_pfn(pte_pfn(pte)))
|
||||
categories |= PAGE_IS_PFNZERO;
|
||||
} else if (is_swap_pte(pte)) {
|
||||
swp_entry_t swp;
|
||||
|
||||
categories |= PAGE_IS_SWAPPED;
|
||||
if (!pte_swp_uffd_wp_any(pte))
|
||||
categories |= PAGE_IS_WRITTEN;
|
||||
|
||||
if (p->masks_of_interest & PAGE_IS_FILE) {
|
||||
swp = pte_to_swp_entry(pte);
|
||||
if (is_pfn_swap_entry(swp) &&
|
||||
!PageAnon(pfn_swap_entry_to_page(swp)))
|
||||
categories |= PAGE_IS_FILE;
|
||||
}
|
||||
}
|
||||
|
||||
return categories;
|
||||
}
|
||||
|
||||
static void make_uffd_wp_pte(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *pte)
|
||||
{
|
||||
pte_t ptent = ptep_get(pte);
|
||||
|
||||
if (pte_present(ptent)) {
|
||||
pte_t old_pte;
|
||||
|
||||
old_pte = ptep_modify_prot_start(vma, addr, pte);
|
||||
ptent = pte_mkuffd_wp(ptent);
|
||||
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
|
||||
} else if (is_swap_pte(ptent)) {
|
||||
ptent = pte_swp_mkuffd_wp(ptent);
|
||||
set_pte_at(vma->vm_mm, addr, pte, ptent);
|
||||
} else {
|
||||
set_pte_at(vma->vm_mm, addr, pte,
|
||||
make_pte_marker(PTE_MARKER_UFFD_WP));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
|
||||
struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t pmd)
|
||||
{
|
||||
unsigned long categories = PAGE_IS_HUGE;
|
||||
|
||||
if (pmd_present(pmd)) {
|
||||
struct page *page;
|
||||
|
||||
categories |= PAGE_IS_PRESENT;
|
||||
if (!pmd_uffd_wp(pmd))
|
||||
categories |= PAGE_IS_WRITTEN;
|
||||
|
||||
if (p->masks_of_interest & PAGE_IS_FILE) {
|
||||
page = vm_normal_page_pmd(vma, addr, pmd);
|
||||
if (page && !PageAnon(page))
|
||||
categories |= PAGE_IS_FILE;
|
||||
}
|
||||
|
||||
if (is_zero_pfn(pmd_pfn(pmd)))
|
||||
categories |= PAGE_IS_PFNZERO;
|
||||
} else if (is_swap_pmd(pmd)) {
|
||||
swp_entry_t swp;
|
||||
|
||||
categories |= PAGE_IS_SWAPPED;
|
||||
if (!pmd_swp_uffd_wp(pmd))
|
||||
categories |= PAGE_IS_WRITTEN;
|
||||
|
||||
if (p->masks_of_interest & PAGE_IS_FILE) {
|
||||
swp = pmd_to_swp_entry(pmd);
|
||||
if (is_pfn_swap_entry(swp) &&
|
||||
!PageAnon(pfn_swap_entry_to_page(swp)))
|
||||
categories |= PAGE_IS_FILE;
|
||||
}
|
||||
}
|
||||
|
||||
return categories;
|
||||
}
|
||||
|
||||
static void make_uffd_wp_pmd(struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
pmd_t old, pmd = *pmdp;
|
||||
|
||||
if (pmd_present(pmd)) {
|
||||
old = pmdp_invalidate_ad(vma, addr, pmdp);
|
||||
pmd = pmd_mkuffd_wp(old);
|
||||
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
|
||||
} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
|
||||
pmd = pmd_swp_mkuffd_wp(pmd);
|
||||
set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
static unsigned long pagemap_hugetlb_category(pte_t pte)
|
||||
{
|
||||
unsigned long categories = PAGE_IS_HUGE;
|
||||
|
||||
/*
|
||||
* According to pagemap_hugetlb_range(), file-backed HugeTLB
|
||||
* page cannot be swapped. So PAGE_IS_FILE is not checked for
|
||||
* swapped pages.
|
||||
*/
|
||||
if (pte_present(pte)) {
|
||||
categories |= PAGE_IS_PRESENT;
|
||||
if (!huge_pte_uffd_wp(pte))
|
||||
categories |= PAGE_IS_WRITTEN;
|
||||
if (!PageAnon(pte_page(pte)))
|
||||
categories |= PAGE_IS_FILE;
|
||||
if (is_zero_pfn(pte_pfn(pte)))
|
||||
categories |= PAGE_IS_PFNZERO;
|
||||
} else if (is_swap_pte(pte)) {
|
||||
categories |= PAGE_IS_SWAPPED;
|
||||
if (!pte_swp_uffd_wp_any(pte))
|
||||
categories |= PAGE_IS_WRITTEN;
|
||||
}
|
||||
|
||||
return categories;
|
||||
}
|
||||
|
||||
static void make_uffd_wp_huge_pte(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t *ptep,
|
||||
pte_t ptent)
|
||||
{
|
||||
unsigned long psize;
|
||||
|
||||
if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent))
|
||||
return;
|
||||
|
||||
psize = huge_page_size(hstate_vma(vma));
|
||||
|
||||
if (is_hugetlb_entry_migration(ptent))
|
||||
set_huge_pte_at(vma->vm_mm, addr, ptep,
|
||||
pte_swp_mkuffd_wp(ptent), psize);
|
||||
else if (!huge_pte_none(ptent))
|
||||
huge_ptep_modify_prot_commit(vma, addr, ptep, ptent,
|
||||
huge_pte_mkuffd_wp(ptent));
|
||||
else
|
||||
set_huge_pte_at(vma->vm_mm, addr, ptep,
|
||||
make_pte_marker(PTE_MARKER_UFFD_WP), psize);
|
||||
}
|
||||
#endif /* CONFIG_HUGETLB_PAGE */
|
||||
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
|
||||
static void pagemap_scan_backout_range(struct pagemap_scan_private *p,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
|
||||
|
||||
if (cur_buf->start != addr)
|
||||
cur_buf->end = addr;
|
||||
else
|
||||
cur_buf->start = cur_buf->end = 0;
|
||||
|
||||
p->found_pages -= (end - addr) / PAGE_SIZE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool pagemap_scan_is_interesting_page(unsigned long categories,
|
||||
const struct pagemap_scan_private *p)
|
||||
{
|
||||
categories ^= p->arg.category_inverted;
|
||||
if ((categories & p->arg.category_mask) != p->arg.category_mask)
|
||||
return false;
|
||||
if (p->arg.category_anyof_mask && !(categories & p->arg.category_anyof_mask))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool pagemap_scan_is_interesting_vma(unsigned long categories,
|
||||
const struct pagemap_scan_private *p)
|
||||
{
|
||||
unsigned long required = p->arg.category_mask & PAGE_IS_WPALLOWED;
|
||||
|
||||
categories ^= p->arg.category_inverted;
|
||||
if ((categories & required) != required)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
struct pagemap_scan_private *p = walk->private;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
unsigned long vma_category = 0;
|
||||
|
||||
if (userfaultfd_wp_async(vma) && userfaultfd_wp_use_markers(vma))
|
||||
vma_category |= PAGE_IS_WPALLOWED;
|
||||
else if (p->arg.flags & PM_SCAN_CHECK_WPASYNC)
|
||||
return -EPERM;
|
||||
|
||||
if (vma->vm_flags & VM_PFNMAP)
|
||||
return 1;
|
||||
|
||||
if (!pagemap_scan_is_interesting_vma(vma_category, p))
|
||||
return 1;
|
||||
|
||||
p->cur_vma_category = vma_category;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool pagemap_scan_push_range(unsigned long categories,
|
||||
struct pagemap_scan_private *p,
|
||||
unsigned long addr, unsigned long end)
|
||||
{
|
||||
struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
|
||||
|
||||
/*
|
||||
* When there is no output buffer provided at all, the sentinel values
|
||||
* won't match here. There is no other way for `cur_buf->end` to be
|
||||
* non-zero other than it being non-empty.
|
||||
*/
|
||||
if (addr == cur_buf->end && categories == cur_buf->categories) {
|
||||
cur_buf->end = end;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cur_buf->end) {
|
||||
if (p->vec_buf_index >= p->vec_buf_len - 1)
|
||||
return false;
|
||||
|
||||
cur_buf = &p->vec_buf[++p->vec_buf_index];
|
||||
}
|
||||
|
||||
cur_buf->start = addr;
|
||||
cur_buf->end = end;
|
||||
cur_buf->categories = categories;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int pagemap_scan_output(unsigned long categories,
|
||||
struct pagemap_scan_private *p,
|
||||
unsigned long addr, unsigned long *end)
|
||||
{
|
||||
unsigned long n_pages, total_pages;
|
||||
int ret = 0;
|
||||
|
||||
if (!p->vec_buf)
|
||||
return 0;
|
||||
|
||||
categories &= p->arg.return_mask;
|
||||
|
||||
n_pages = (*end - addr) / PAGE_SIZE;
|
||||
if (check_add_overflow(p->found_pages, n_pages, &total_pages) ||
|
||||
total_pages > p->arg.max_pages) {
|
||||
size_t n_too_much = total_pages - p->arg.max_pages;
|
||||
*end -= n_too_much * PAGE_SIZE;
|
||||
n_pages -= n_too_much;
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
|
||||
if (!pagemap_scan_push_range(categories, p, addr, *end)) {
|
||||
*end = addr;
|
||||
n_pages = 0;
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
|
||||
p->found_pages += n_pages;
|
||||
if (ret)
|
||||
p->arg.walk_end = *end;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pagemap_scan_thp_entry(pmd_t *pmd, unsigned long start,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
{
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
struct pagemap_scan_private *p = walk->private;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
unsigned long categories;
|
||||
spinlock_t *ptl;
|
||||
int ret = 0;
|
||||
|
||||
ptl = pmd_trans_huge_lock(pmd, vma);
|
||||
if (!ptl)
|
||||
return -ENOENT;
|
||||
|
||||
categories = p->cur_vma_category |
|
||||
pagemap_thp_category(p, vma, start, *pmd);
|
||||
|
||||
if (!pagemap_scan_is_interesting_page(categories, p))
|
||||
goto out_unlock;
|
||||
|
||||
ret = pagemap_scan_output(categories, p, start, &end);
|
||||
if (start == end)
|
||||
goto out_unlock;
|
||||
|
||||
if (~p->arg.flags & PM_SCAN_WP_MATCHING)
|
||||
goto out_unlock;
|
||||
if (~categories & PAGE_IS_WRITTEN)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Break huge page into small pages if the WP operation
|
||||
* needs to be performed on a portion of the huge page.
|
||||
*/
|
||||
if (end != start + HPAGE_SIZE) {
|
||||
spin_unlock(ptl);
|
||||
split_huge_pmd(vma, pmd, start);
|
||||
pagemap_scan_backout_range(p, start, end);
|
||||
/* Report as if there was no THP */
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
make_uffd_wp_pmd(vma, start, pmd);
|
||||
flush_tlb_range(vma, start, end);
|
||||
out_unlock:
|
||||
spin_unlock(ptl);
|
||||
return ret;
|
||||
#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
return -ENOENT;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
|
||||
unsigned long end, struct mm_walk *walk)
|
||||
{
|
||||
struct pagemap_scan_private *p = walk->private;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
unsigned long addr, flush_end = 0;
|
||||
pte_t *pte, *start_pte;
|
||||
spinlock_t *ptl;
|
||||
int ret;
|
||||
|
||||
arch_enter_lazy_mmu_mode();
|
||||
|
||||
ret = pagemap_scan_thp_entry(pmd, start, end, walk);
|
||||
if (ret != -ENOENT) {
|
||||
arch_leave_lazy_mmu_mode();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
|
||||
if (!pte) {
|
||||
arch_leave_lazy_mmu_mode();
|
||||
walk->action = ACTION_AGAIN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
|
||||
unsigned long categories = p->cur_vma_category |
|
||||
pagemap_page_category(p, vma, addr, ptep_get(pte));
|
||||
unsigned long next = addr + PAGE_SIZE;
|
||||
|
||||
if (!pagemap_scan_is_interesting_page(categories, p))
|
||||
continue;
|
||||
|
||||
ret = pagemap_scan_output(categories, p, addr, &next);
|
||||
if (next == addr)
|
||||
break;
|
||||
|
||||
if (~p->arg.flags & PM_SCAN_WP_MATCHING)
|
||||
continue;
|
||||
if (~categories & PAGE_IS_WRITTEN)
|
||||
continue;
|
||||
|
||||
make_uffd_wp_pte(vma, addr, pte);
|
||||
if (!flush_end)
|
||||
start = addr;
|
||||
flush_end = next;
|
||||
}
|
||||
|
||||
if (flush_end)
|
||||
flush_tlb_range(vma, start, addr);
|
||||
|
||||
pte_unmap_unlock(start_pte, ptl);
|
||||
arch_leave_lazy_mmu_mode();
|
||||
|
||||
cond_resched();
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
|
||||
unsigned long start, unsigned long end,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
struct pagemap_scan_private *p = walk->private;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
unsigned long categories;
|
||||
spinlock_t *ptl;
|
||||
int ret = 0;
|
||||
pte_t pte;
|
||||
|
||||
if (~p->arg.flags & PM_SCAN_WP_MATCHING) {
|
||||
/* Go the short route when not write-protecting pages. */
|
||||
|
||||
pte = huge_ptep_get(ptep);
|
||||
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
|
||||
|
||||
if (!pagemap_scan_is_interesting_page(categories, p))
|
||||
return 0;
|
||||
|
||||
return pagemap_scan_output(categories, p, start, &end);
|
||||
}
|
||||
|
||||
i_mmap_lock_write(vma->vm_file->f_mapping);
|
||||
ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep);
|
||||
|
||||
pte = huge_ptep_get(ptep);
|
||||
categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
|
||||
|
||||
if (!pagemap_scan_is_interesting_page(categories, p))
|
||||
goto out_unlock;
|
||||
|
||||
ret = pagemap_scan_output(categories, p, start, &end);
|
||||
if (start == end)
|
||||
goto out_unlock;
|
||||
|
||||
if (~categories & PAGE_IS_WRITTEN)
|
||||
goto out_unlock;
|
||||
|
||||
if (end != start + HPAGE_SIZE) {
|
||||
/* Partial HugeTLB page WP isn't possible. */
|
||||
pagemap_scan_backout_range(p, start, end);
|
||||
p->arg.walk_end = start;
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
make_uffd_wp_huge_pte(vma, start, ptep, pte);
|
||||
flush_hugetlb_tlb_range(vma, start, end);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(ptl);
|
||||
i_mmap_unlock_write(vma->vm_file->f_mapping);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
#define pagemap_scan_hugetlb_entry NULL
|
||||
#endif
|
||||
|
||||
static int pagemap_scan_pte_hole(unsigned long addr, unsigned long end,
|
||||
int depth, struct mm_walk *walk)
|
||||
{
|
||||
struct pagemap_scan_private *p = walk->private;
|
||||
struct vm_area_struct *vma = walk->vma;
|
||||
int ret, err;
|
||||
|
||||
if (!vma || !pagemap_scan_is_interesting_page(p->cur_vma_category, p))
|
||||
return 0;
|
||||
|
||||
ret = pagemap_scan_output(p->cur_vma_category, p, addr, &end);
|
||||
if (addr == end)
|
||||
return ret;
|
||||
|
||||
if (~p->arg.flags & PM_SCAN_WP_MATCHING)
|
||||
return ret;
|
||||
|
||||
err = uffd_wp_range(vma, addr, end - addr, true);
|
||||
if (err < 0)
|
||||
ret = err;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops pagemap_scan_ops = {
|
||||
.test_walk = pagemap_scan_test_walk,
|
||||
.pmd_entry = pagemap_scan_pmd_entry,
|
||||
.pte_hole = pagemap_scan_pte_hole,
|
||||
.hugetlb_entry = pagemap_scan_hugetlb_entry,
|
||||
};
|
||||
|
||||
static int pagemap_scan_get_args(struct pm_scan_arg *arg,
|
||||
unsigned long uarg)
|
||||
{
|
||||
if (copy_from_user(arg, (void __user *)uarg, sizeof(*arg)))
|
||||
return -EFAULT;
|
||||
|
||||
if (arg->size != sizeof(struct pm_scan_arg))
|
||||
return -EINVAL;
|
||||
|
||||
/* Validate requested features */
|
||||
if (arg->flags & ~PM_SCAN_FLAGS)
|
||||
return -EINVAL;
|
||||
if ((arg->category_inverted | arg->category_mask |
|
||||
arg->category_anyof_mask | arg->return_mask) & ~PM_SCAN_CATEGORIES)
|
||||
return -EINVAL;
|
||||
|
||||
arg->start = untagged_addr((unsigned long)arg->start);
|
||||
arg->end = untagged_addr((unsigned long)arg->end);
|
||||
arg->vec = untagged_addr((unsigned long)arg->vec);
|
||||
|
||||
/* Validate memory pointers */
|
||||
if (!IS_ALIGNED(arg->start, PAGE_SIZE))
|
||||
return -EINVAL;
|
||||
if (!access_ok((void __user *)(long)arg->start, arg->end - arg->start))
|
||||
return -EFAULT;
|
||||
if (!arg->vec && arg->vec_len)
|
||||
return -EINVAL;
|
||||
if (arg->vec && !access_ok((void __user *)(long)arg->vec,
|
||||
arg->vec_len * sizeof(struct page_region)))
|
||||
return -EFAULT;
|
||||
|
||||
/* Fixup default values */
|
||||
arg->end = ALIGN(arg->end, PAGE_SIZE);
|
||||
arg->walk_end = 0;
|
||||
if (!arg->max_pages)
|
||||
arg->max_pages = ULONG_MAX;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pagemap_scan_writeback_args(struct pm_scan_arg *arg,
|
||||
unsigned long uargl)
|
||||
{
|
||||
struct pm_scan_arg __user *uarg = (void __user *)uargl;
|
||||
|
||||
if (copy_to_user(&uarg->walk_end, &arg->walk_end, sizeof(arg->walk_end)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pagemap_scan_init_bounce_buffer(struct pagemap_scan_private *p)
|
||||
{
|
||||
if (!p->arg.vec_len)
|
||||
return 0;
|
||||
|
||||
p->vec_buf_len = min_t(size_t, PAGEMAP_WALK_SIZE >> PAGE_SHIFT,
|
||||
p->arg.vec_len);
|
||||
p->vec_buf = kmalloc_array(p->vec_buf_len, sizeof(*p->vec_buf),
|
||||
GFP_KERNEL);
|
||||
if (!p->vec_buf)
|
||||
return -ENOMEM;
|
||||
|
||||
p->vec_buf->start = p->vec_buf->end = 0;
|
||||
p->vec_out = (struct page_region __user *)(long)p->arg.vec;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long pagemap_scan_flush_buffer(struct pagemap_scan_private *p)
|
||||
{
|
||||
const struct page_region *buf = p->vec_buf;
|
||||
long n = p->vec_buf_index;
|
||||
|
||||
if (!p->vec_buf)
|
||||
return 0;
|
||||
|
||||
if (buf[n].end != buf[n].start)
|
||||
n++;
|
||||
|
||||
if (!n)
|
||||
return 0;
|
||||
|
||||
if (copy_to_user(p->vec_out, buf, n * sizeof(*buf)))
|
||||
return -EFAULT;
|
||||
|
||||
p->arg.vec_len -= n;
|
||||
p->vec_out += n;
|
||||
|
||||
p->vec_buf_index = 0;
|
||||
p->vec_buf_len = min_t(size_t, p->vec_buf_len, p->arg.vec_len);
|
||||
p->vec_buf->start = p->vec_buf->end = 0;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
|
||||
{
|
||||
struct mmu_notifier_range range;
|
||||
struct pagemap_scan_private p = {0};
|
||||
unsigned long walk_start;
|
||||
size_t n_ranges_out = 0;
|
||||
int ret;
|
||||
|
||||
ret = pagemap_scan_get_args(&p.arg, uarg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
p.masks_of_interest = p.arg.category_mask | p.arg.category_anyof_mask |
|
||||
p.arg.return_mask;
|
||||
ret = pagemap_scan_init_bounce_buffer(&p);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Protection change for the range is going to happen. */
|
||||
if (p.arg.flags & PM_SCAN_WP_MATCHING) {
|
||||
mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0,
|
||||
mm, p.arg.start, p.arg.end);
|
||||
mmu_notifier_invalidate_range_start(&range);
|
||||
}
|
||||
|
||||
for (walk_start = p.arg.start; walk_start < p.arg.end;
|
||||
walk_start = p.arg.walk_end) {
|
||||
long n_out;
|
||||
|
||||
if (fatal_signal_pending(current)) {
|
||||
ret = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = mmap_read_lock_killable(mm);
|
||||
if (ret)
|
||||
break;
|
||||
ret = walk_page_range(mm, walk_start, p.arg.end,
|
||||
&pagemap_scan_ops, &p);
|
||||
mmap_read_unlock(mm);
|
||||
|
||||
n_out = pagemap_scan_flush_buffer(&p);
|
||||
if (n_out < 0)
|
||||
ret = n_out;
|
||||
else
|
||||
n_ranges_out += n_out;
|
||||
|
||||
if (ret != -ENOSPC)
|
||||
break;
|
||||
|
||||
if (p.arg.vec_len == 0 || p.found_pages == p.arg.max_pages)
|
||||
break;
|
||||
}
|
||||
|
||||
/* ENOSPC signifies early stop (buffer full) from the walk. */
|
||||
if (!ret || ret == -ENOSPC)
|
||||
ret = n_ranges_out;
|
||||
|
||||
/* The walk_end isn't set when ret is zero */
|
||||
if (!p.arg.walk_end)
|
||||
p.arg.walk_end = p.arg.end;
|
||||
if (pagemap_scan_writeback_args(&p.arg, uarg))
|
||||
ret = -EFAULT;
|
||||
|
||||
if (p.arg.flags & PM_SCAN_WP_MATCHING)
|
||||
mmu_notifier_invalidate_range_end(&range);
|
||||
|
||||
kfree(p.vec_buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long do_pagemap_cmd(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct mm_struct *mm = file->private_data;
|
||||
|
||||
switch (cmd) {
|
||||
case PAGEMAP_SCAN:
|
||||
return do_pagemap_scan(mm, arg);
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
const struct file_operations proc_pagemap_operations = {
|
||||
.llseek = mem_lseek, /* borrow this */
|
||||
.read = pagemap_read,
|
||||
.open = pagemap_open,
|
||||
.release = pagemap_release,
|
||||
.unlocked_ioctl = do_pagemap_cmd,
|
||||
.compat_ioctl = do_pagemap_cmd,
|
||||
};
|
||||
#endif /* CONFIG_PROC_PAGE_MONITOR */
|
||||
|
||||
|
@ -280,6 +280,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
|
||||
unsigned long cp_flags);
|
||||
|
||||
bool is_hugetlb_entry_migration(pte_t pte);
|
||||
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
|
||||
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
|
||||
|
||||
#else /* !CONFIG_HUGETLB_PAGE */
|
||||
|
@ -221,6 +221,13 @@ static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static inline long uffd_wp_range(struct vm_area_struct *vma,
|
||||
unsigned long start, unsigned long len,
|
||||
bool enable_wp)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
|
||||
struct vm_userfaultfd_ctx vm_ctx)
|
||||
{
|
||||
|
@ -305,4 +305,63 @@ typedef int __bitwise __kernel_rwf_t;
|
||||
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
|
||||
RWF_APPEND)
|
||||
|
||||
/* Pagemap ioctl */
|
||||
#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
|
||||
|
||||
/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
|
||||
#define PAGE_IS_WPALLOWED (1 << 0)
|
||||
#define PAGE_IS_WRITTEN (1 << 1)
|
||||
#define PAGE_IS_FILE (1 << 2)
|
||||
#define PAGE_IS_PRESENT (1 << 3)
|
||||
#define PAGE_IS_SWAPPED (1 << 4)
|
||||
#define PAGE_IS_PFNZERO (1 << 5)
|
||||
#define PAGE_IS_HUGE (1 << 6)
|
||||
|
||||
/*
|
||||
* struct page_region - Page region with flags
|
||||
* @start: Start of the region
|
||||
* @end: End of the region (exclusive)
|
||||
* @categories: PAGE_IS_* category bitmask for the region
|
||||
*/
|
||||
struct page_region {
|
||||
__u64 start;
|
||||
__u64 end;
|
||||
__u64 categories;
|
||||
};
|
||||
|
||||
/* Flags for PAGEMAP_SCAN ioctl */
|
||||
#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */
|
||||
#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */
|
||||
|
||||
/*
|
||||
* struct pm_scan_arg - Pagemap ioctl argument
|
||||
* @size: Size of the structure
|
||||
* @flags: Flags for the IOCTL
|
||||
* @start: Starting address of the region
|
||||
* @end: Ending address of the region
|
||||
* @walk_end Address where the scan stopped (written by kernel).
|
||||
* walk_end == end (address tags cleared) informs that the scan completed on entire range.
|
||||
* @vec: Address of page_region struct array for output
|
||||
* @vec_len: Length of the page_region struct array
|
||||
* @max_pages: Optional limit for number of returned pages (0 = disabled)
|
||||
* @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1
|
||||
* @category_mask: Skip pages for which any category doesn't match
|
||||
* @category_anyof_mask: Skip pages for which no category matches
|
||||
* @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned
|
||||
*/
|
||||
struct pm_scan_arg {
|
||||
__u64 size;
|
||||
__u64 flags;
|
||||
__u64 start;
|
||||
__u64 end;
|
||||
__u64 walk_end;
|
||||
__u64 vec;
|
||||
__u64 vec_len;
|
||||
__u64 max_pages;
|
||||
__u64 category_inverted;
|
||||
__u64 category_mask;
|
||||
__u64 category_anyof_mask;
|
||||
__u64 return_mask;
|
||||
};
|
||||
|
||||
#endif /* _UAPI_LINUX_FS_H */
|
||||
|
@ -5044,7 +5044,7 @@ bool is_hugetlb_entry_migration(pte_t pte)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
|
||||
bool is_hugetlb_entry_hwpoisoned(pte_t pte)
|
||||
{
|
||||
swp_entry_t swp;
|
||||
|
||||
@ -6266,7 +6266,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
entry = huge_pte_clear_uffd_wp(entry);
|
||||
set_huge_pte_at(mm, haddr, ptep, entry);
|
||||
set_huge_pte_at(mm, haddr, ptep, entry,
|
||||
huge_page_size(hstate_vma(vma)));
|
||||
/* Fallthrough to CoW */
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user