forked from Minki/linux
mm: mempolicy: Implement change_prot_numa() in terms of change_protection()
This patch converts change_prot_numa() to use change_protection(). As pte_numa and friends check the PTE bits directly it is necessary for change_protection() to use pmd_mknuma(). Hence the required modifications to change_protection() are a little clumsy but the end result is that most of the numa page table helpers are just one or two instructions. Signed-off-by: Mel Gorman <mgorman@suse.de>
This commit is contained in:
parent
b24f53a0be
commit
4b10e7d562
@ -27,7 +27,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
|
|||||||
unsigned long new_addr, unsigned long old_end,
|
unsigned long new_addr, unsigned long old_end,
|
||||||
pmd_t *old_pmd, pmd_t *new_pmd);
|
pmd_t *old_pmd, pmd_t *new_pmd);
|
||||||
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
unsigned long addr, pgprot_t newprot);
|
unsigned long addr, pgprot_t newprot,
|
||||||
|
int prot_numa);
|
||||||
|
|
||||||
enum transparent_hugepage_flag {
|
enum transparent_hugepage_flag {
|
||||||
TRANSPARENT_HUGEPAGE_FLAG,
|
TRANSPARENT_HUGEPAGE_FLAG,
|
||||||
|
@ -1080,7 +1080,7 @@ extern unsigned long do_mremap(unsigned long addr,
|
|||||||
unsigned long flags, unsigned long new_addr);
|
unsigned long flags, unsigned long new_addr);
|
||||||
extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
|
extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
|
||||||
unsigned long end, pgprot_t newprot,
|
unsigned long end, pgprot_t newprot,
|
||||||
int dirty_accountable);
|
int dirty_accountable, int prot_numa);
|
||||||
extern int mprotect_fixup(struct vm_area_struct *vma,
|
extern int mprotect_fixup(struct vm_area_struct *vma,
|
||||||
struct vm_area_struct **pprev, unsigned long start,
|
struct vm_area_struct **pprev, unsigned long start,
|
||||||
unsigned long end, unsigned long newflags);
|
unsigned long end, unsigned long newflags);
|
||||||
@ -1552,7 +1552,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
|
#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
|
||||||
void change_prot_numa(struct vm_area_struct *vma,
|
unsigned long change_prot_numa(struct vm_area_struct *vma,
|
||||||
unsigned long start, unsigned long end);
|
unsigned long start, unsigned long end);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1147,7 +1147,7 @@ out:
|
|||||||
}
|
}
|
||||||
|
|
||||||
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
unsigned long addr, pgprot_t newprot)
|
unsigned long addr, pgprot_t newprot, int prot_numa)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
@ -1155,7 +1155,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
|||||||
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
|
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
|
||||||
pmd_t entry;
|
pmd_t entry;
|
||||||
entry = pmdp_get_and_clear(mm, addr, pmd);
|
entry = pmdp_get_and_clear(mm, addr, pmd);
|
||||||
entry = pmd_modify(entry, newprot);
|
if (!prot_numa)
|
||||||
|
entry = pmd_modify(entry, newprot);
|
||||||
|
else {
|
||||||
|
struct page *page = pmd_page(*pmd);
|
||||||
|
|
||||||
|
/* only check non-shared pages */
|
||||||
|
if (page_mapcount(page) == 1 &&
|
||||||
|
!pmd_numa(*pmd)) {
|
||||||
|
entry = pmd_mknuma(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
set_pmd_at(mm, addr, pmd, entry);
|
set_pmd_at(mm, addr, pmd, entry);
|
||||||
spin_unlock(&vma->vm_mm->page_table_lock);
|
spin_unlock(&vma->vm_mm->page_table_lock);
|
||||||
ret = 1;
|
ret = 1;
|
||||||
|
137
mm/mempolicy.c
137
mm/mempolicy.c
@ -568,134 +568,23 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
|
|||||||
|
|
||||||
#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
|
#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
|
||||||
/*
|
/*
|
||||||
* Here we search for not shared page mappings (mapcount == 1) and we
|
* This is used to mark a range of virtual addresses to be inaccessible.
|
||||||
* set up the pmd/pte_numa on those mappings so the very next access
|
* These are later cleared by a NUMA hinting fault. Depending on these
|
||||||
* will fire a NUMA hinting page fault.
|
* faults, pages may be migrated for better NUMA placement.
|
||||||
|
*
|
||||||
|
* This is assuming that NUMA faults are handled using PROT_NONE. If
|
||||||
|
* an architecture makes a different choice, it will need further
|
||||||
|
* changes to the core.
|
||||||
*/
|
*/
|
||||||
static int
|
unsigned long change_prot_numa(struct vm_area_struct *vma,
|
||||||
change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma,
|
unsigned long addr, unsigned long end)
|
||||||
unsigned long address)
|
|
||||||
{
|
{
|
||||||
pgd_t *pgd;
|
int nr_updated;
|
||||||
pud_t *pud;
|
BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
|
||||||
pmd_t *pmd;
|
|
||||||
pte_t *pte, *_pte;
|
|
||||||
struct page *page;
|
|
||||||
unsigned long _address, end;
|
|
||||||
spinlock_t *ptl;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
VM_BUG_ON(address & ~PAGE_MASK);
|
nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
|
||||||
|
|
||||||
pgd = pgd_offset(mm, address);
|
return nr_updated;
|
||||||
if (!pgd_present(*pgd))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
pud = pud_offset(pgd, address);
|
|
||||||
if (!pud_present(*pud))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
pmd = pmd_offset(pud, address);
|
|
||||||
if (pmd_none(*pmd))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
if (pmd_trans_huge_lock(pmd, vma) == 1) {
|
|
||||||
int page_nid;
|
|
||||||
ret = HPAGE_PMD_NR;
|
|
||||||
|
|
||||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
|
||||||
|
|
||||||
if (pmd_numa(*pmd)) {
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
page = pmd_page(*pmd);
|
|
||||||
|
|
||||||
/* only check non-shared pages */
|
|
||||||
if (page_mapcount(page) != 1) {
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
page_nid = page_to_nid(page);
|
|
||||||
|
|
||||||
if (pmd_numa(*pmd)) {
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
|
|
||||||
ret += HPAGE_PMD_NR;
|
|
||||||
/* defer TLB flush to lower the overhead */
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pmd_trans_unstable(pmd))
|
|
||||||
goto out;
|
|
||||||
VM_BUG_ON(!pmd_present(*pmd));
|
|
||||||
|
|
||||||
end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
|
|
||||||
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
|
|
||||||
for (_address = address, _pte = pte; _address < end;
|
|
||||||
_pte++, _address += PAGE_SIZE) {
|
|
||||||
pte_t pteval = *_pte;
|
|
||||||
if (!pte_present(pteval))
|
|
||||||
continue;
|
|
||||||
if (pte_numa(pteval))
|
|
||||||
continue;
|
|
||||||
page = vm_normal_page(vma, _address, pteval);
|
|
||||||
if (unlikely(!page))
|
|
||||||
continue;
|
|
||||||
/* only check non-shared pages */
|
|
||||||
if (page_mapcount(page) != 1)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
set_pte_at(mm, _address, _pte, pte_mknuma(pteval));
|
|
||||||
|
|
||||||
/* defer TLB flush to lower the overhead */
|
|
||||||
ret++;
|
|
||||||
}
|
|
||||||
pte_unmap_unlock(pte, ptl);
|
|
||||||
|
|
||||||
if (ret && !pmd_numa(*pmd)) {
|
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
|
|
||||||
spin_unlock(&mm->page_table_lock);
|
|
||||||
/* defer TLB flush to lower the overhead */
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Assumes mmap_sem is held */
|
|
||||||
void
|
|
||||||
change_prot_numa(struct vm_area_struct *vma,
|
|
||||||
unsigned long address, unsigned long end)
|
|
||||||
{
|
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
|
||||||
int progress = 0;
|
|
||||||
|
|
||||||
while (address < end) {
|
|
||||||
VM_BUG_ON(address < vma->vm_start ||
|
|
||||||
address + PAGE_SIZE > vma->vm_end);
|
|
||||||
|
|
||||||
progress += change_prot_numa_range(mm, vma, address);
|
|
||||||
address = (address + PMD_SIZE) & PMD_MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Flush the TLB for the mm to start the NUMA hinting
|
|
||||||
* page faults after we finish scanning this vma part
|
|
||||||
* if there were any PTE updates
|
|
||||||
*/
|
|
||||||
if (progress) {
|
|
||||||
mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
|
|
||||||
flush_tlb_range(vma, address, end);
|
|
||||||
mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static unsigned long change_prot_numa(struct vm_area_struct *vma,
|
static unsigned long change_prot_numa(struct vm_area_struct *vma,
|
||||||
|
@ -35,10 +35,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
unsigned long addr, unsigned long end, pgprot_t newprot,
|
unsigned long addr, unsigned long end, pgprot_t newprot,
|
||||||
int dirty_accountable)
|
int dirty_accountable, int prot_numa)
|
||||||
{
|
{
|
||||||
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
pte_t *pte, oldpte;
|
pte_t *pte, oldpte;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
unsigned long pages = 0;
|
unsigned long pages = 0;
|
||||||
@ -49,19 +50,39 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|||||||
oldpte = *pte;
|
oldpte = *pte;
|
||||||
if (pte_present(oldpte)) {
|
if (pte_present(oldpte)) {
|
||||||
pte_t ptent;
|
pte_t ptent;
|
||||||
|
bool updated = false;
|
||||||
|
|
||||||
ptent = ptep_modify_prot_start(mm, addr, pte);
|
ptent = ptep_modify_prot_start(mm, addr, pte);
|
||||||
ptent = pte_modify(ptent, newprot);
|
if (!prot_numa) {
|
||||||
|
ptent = pte_modify(ptent, newprot);
|
||||||
|
updated = true;
|
||||||
|
} else {
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
page = vm_normal_page(vma, addr, oldpte);
|
||||||
|
if (page) {
|
||||||
|
/* only check non-shared pages */
|
||||||
|
if (!pte_numa(oldpte) &&
|
||||||
|
page_mapcount(page) == 1) {
|
||||||
|
ptent = pte_mknuma(ptent);
|
||||||
|
updated = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Avoid taking write faults for pages we know to be
|
* Avoid taking write faults for pages we know to be
|
||||||
* dirty.
|
* dirty.
|
||||||
*/
|
*/
|
||||||
if (dirty_accountable && pte_dirty(ptent))
|
if (dirty_accountable && pte_dirty(ptent)) {
|
||||||
ptent = pte_mkwrite(ptent);
|
ptent = pte_mkwrite(ptent);
|
||||||
|
updated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (updated)
|
||||||
|
pages++;
|
||||||
|
|
||||||
ptep_modify_prot_commit(mm, addr, pte, ptent);
|
ptep_modify_prot_commit(mm, addr, pte, ptent);
|
||||||
pages++;
|
|
||||||
} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
|
} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
|
||||||
swp_entry_t entry = pte_to_swp_entry(oldpte);
|
swp_entry_t entry = pte_to_swp_entry(oldpte);
|
||||||
|
|
||||||
@ -83,9 +104,25 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|||||||
return pages;
|
return pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NUMA_BALANCING
|
||||||
|
static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
|
||||||
|
pmd_t *pmd)
|
||||||
|
{
|
||||||
|
spin_lock(&mm->page_table_lock);
|
||||||
|
set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
|
||||||
|
spin_unlock(&mm->page_table_lock);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
|
||||||
|
pmd_t *pmd)
|
||||||
|
{
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_NUMA_BALANCING */
|
||||||
|
|
||||||
static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
||||||
unsigned long addr, unsigned long end, pgprot_t newprot,
|
unsigned long addr, unsigned long end, pgprot_t newprot,
|
||||||
int dirty_accountable)
|
int dirty_accountable, int prot_numa)
|
||||||
{
|
{
|
||||||
pmd_t *pmd;
|
pmd_t *pmd;
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
@ -97,7 +134,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
|
|||||||
if (pmd_trans_huge(*pmd)) {
|
if (pmd_trans_huge(*pmd)) {
|
||||||
if (next - addr != HPAGE_PMD_SIZE)
|
if (next - addr != HPAGE_PMD_SIZE)
|
||||||
split_huge_page_pmd(vma->vm_mm, pmd);
|
split_huge_page_pmd(vma->vm_mm, pmd);
|
||||||
else if (change_huge_pmd(vma, pmd, addr, newprot)) {
|
else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) {
|
||||||
pages += HPAGE_PMD_NR;
|
pages += HPAGE_PMD_NR;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -105,8 +142,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
|
|||||||
}
|
}
|
||||||
if (pmd_none_or_clear_bad(pmd))
|
if (pmd_none_or_clear_bad(pmd))
|
||||||
continue;
|
continue;
|
||||||
pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
|
pages += change_pte_range(vma, pmd, addr, next, newprot,
|
||||||
dirty_accountable);
|
dirty_accountable, prot_numa);
|
||||||
|
|
||||||
|
if (prot_numa)
|
||||||
|
change_pmd_protnuma(vma->vm_mm, addr, pmd);
|
||||||
} while (pmd++, addr = next, addr != end);
|
} while (pmd++, addr = next, addr != end);
|
||||||
|
|
||||||
return pages;
|
return pages;
|
||||||
@ -114,7 +154,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
|
|||||||
|
|
||||||
static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
|
static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
|
||||||
unsigned long addr, unsigned long end, pgprot_t newprot,
|
unsigned long addr, unsigned long end, pgprot_t newprot,
|
||||||
int dirty_accountable)
|
int dirty_accountable, int prot_numa)
|
||||||
{
|
{
|
||||||
pud_t *pud;
|
pud_t *pud;
|
||||||
unsigned long next;
|
unsigned long next;
|
||||||
@ -126,7 +166,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
|
|||||||
if (pud_none_or_clear_bad(pud))
|
if (pud_none_or_clear_bad(pud))
|
||||||
continue;
|
continue;
|
||||||
pages += change_pmd_range(vma, pud, addr, next, newprot,
|
pages += change_pmd_range(vma, pud, addr, next, newprot,
|
||||||
dirty_accountable);
|
dirty_accountable, prot_numa);
|
||||||
} while (pud++, addr = next, addr != end);
|
} while (pud++, addr = next, addr != end);
|
||||||
|
|
||||||
return pages;
|
return pages;
|
||||||
@ -134,7 +174,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
|
|||||||
|
|
||||||
static unsigned long change_protection_range(struct vm_area_struct *vma,
|
static unsigned long change_protection_range(struct vm_area_struct *vma,
|
||||||
unsigned long addr, unsigned long end, pgprot_t newprot,
|
unsigned long addr, unsigned long end, pgprot_t newprot,
|
||||||
int dirty_accountable)
|
int dirty_accountable, int prot_numa)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
@ -150,7 +190,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
|
|||||||
if (pgd_none_or_clear_bad(pgd))
|
if (pgd_none_or_clear_bad(pgd))
|
||||||
continue;
|
continue;
|
||||||
pages += change_pud_range(vma, pgd, addr, next, newprot,
|
pages += change_pud_range(vma, pgd, addr, next, newprot,
|
||||||
dirty_accountable);
|
dirty_accountable, prot_numa);
|
||||||
} while (pgd++, addr = next, addr != end);
|
} while (pgd++, addr = next, addr != end);
|
||||||
|
|
||||||
/* Only flush the TLB if we actually modified any entries: */
|
/* Only flush the TLB if we actually modified any entries: */
|
||||||
@ -162,7 +202,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
|
|||||||
|
|
||||||
unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
|
unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
|
||||||
unsigned long end, pgprot_t newprot,
|
unsigned long end, pgprot_t newprot,
|
||||||
int dirty_accountable)
|
int dirty_accountable, int prot_numa)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = vma->vm_mm;
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
unsigned long pages;
|
unsigned long pages;
|
||||||
@ -171,7 +211,7 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
|
|||||||
if (is_vm_hugetlb_page(vma))
|
if (is_vm_hugetlb_page(vma))
|
||||||
pages = hugetlb_change_protection(vma, start, end, newprot);
|
pages = hugetlb_change_protection(vma, start, end, newprot);
|
||||||
else
|
else
|
||||||
pages = change_protection_range(vma, start, end, newprot, dirty_accountable);
|
pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
|
||||||
mmu_notifier_invalidate_range_end(mm, start, end);
|
mmu_notifier_invalidate_range_end(mm, start, end);
|
||||||
|
|
||||||
return pages;
|
return pages;
|
||||||
@ -249,7 +289,7 @@ success:
|
|||||||
dirty_accountable = 1;
|
dirty_accountable = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
|
change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0);
|
||||||
|
|
||||||
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
|
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
|
||||||
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
|
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
|
||||||
|
Loading…
Reference in New Issue
Block a user