forked from Minki/linux
21d9ee3eda
This patch removes the NUMA PTE bits and associated helpers. As a side-effect it increases the maximum possible swap space on x86-64. One potential source of problems is races between the marking of PTEs PROT_NONE, NUMA hinting faults and migration. It must be guaranteed that a PTE being protected is not faulted in parallel, seen as a pte_none and corrupting memory. The base case is safe but transhuge has problems in the past due to an different migration mechanism and a dependance on page lock to serialise migrations and warrants a closer look. task_work hinting update parallel fault ------------------------ -------------- change_pmd_range change_huge_pmd __pmd_trans_huge_lock pmdp_get_and_clear __handle_mm_fault pmd_none do_huge_pmd_anonymous_page read? pmd_lock blocks until hinting complete, fail !pmd_none test write? __do_huge_pmd_anonymous_page acquires pmd_lock, checks pmd_none pmd_modify set_pmd_at task_work hinting update parallel migration ------------------------ ------------------ change_pmd_range change_huge_pmd __pmd_trans_huge_lock pmdp_get_and_clear __handle_mm_fault do_huge_pmd_numa_page migrate_misplaced_transhuge_page pmd_lock waits for updates to complete, recheck pmd_same pmd_modify set_pmd_at Both of those are safe and the case where a transhuge page is inserted during a protection update is unchanged. The case where two processes try migrating at the same time is unchanged by this series so should still be ok. I could not find a case where we are accidentally depending on the PTE not being cleared and flushed. If one is missed, it'll manifest as corruption problems that start triggering shortly after this series is merged and only happen when NUMA balancing is enabled. Signed-off-by: Mel Gorman <mgorman@suse.de> Tested-by: Sasha Levin <sasha.levin@oracle.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Dave Jones <davej@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Rik van Riel <riel@redhat.com> Cc: Mark Brown <broonie@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
||
---|---|---|
.. | ||
bitops | ||
4level-fixup.h | ||
atomic64.h | ||
atomic-long.h | ||
atomic.h | ||
audit_change_attr.h | ||
audit_dir_write.h | ||
audit_read.h | ||
audit_signal.h | ||
audit_write.h | ||
barrier.h | ||
bitops.h | ||
bitsperlong.h | ||
bug.h | ||
bugs.h | ||
cache.h | ||
cacheflush.h | ||
checksum.h | ||
clkdev.h | ||
cmpxchg-local.h | ||
cmpxchg.h | ||
cputime_jiffies.h | ||
cputime_nsecs.h | ||
cputime.h | ||
current.h | ||
delay.h | ||
device.h | ||
div64.h | ||
dma-coherent.h | ||
dma-contiguous.h | ||
dma-mapping-broken.h | ||
dma-mapping-common.h | ||
dma.h | ||
early_ioremap.h | ||
emergency-restart.h | ||
exec.h | ||
fb.h | ||
fixmap.h | ||
ftrace.h | ||
futex.h | ||
getorder.h | ||
gpio.h | ||
hardirq.h | ||
hugetlb.h | ||
hw_irq.h | ||
ide_iops.h | ||
int-ll64.h | ||
io-64-nonatomic-hi-lo.h | ||
io-64-nonatomic-lo-hi.h | ||
io.h | ||
ioctl.h | ||
iomap.h | ||
irq_regs.h | ||
irq_work.h | ||
irq.h | ||
irqflags.h | ||
Kbuild.asm | ||
kdebug.h | ||
kmap_types.h | ||
kvm_para.h | ||
libata-portmap.h | ||
linkage.h | ||
local64.h | ||
local.h | ||
mcs_spinlock.h | ||
memory_model.h | ||
mm_hooks.h | ||
mmu_context.h | ||
mmu.h | ||
module.h | ||
msi.h | ||
mutex-dec.h | ||
mutex-null.h | ||
mutex-xchg.h | ||
mutex.h | ||
page.h | ||
param.h | ||
parport.h | ||
pci_iomap.h | ||
pci-bridge.h | ||
pci-dma-compat.h | ||
pci.h | ||
percpu.h | ||
pgalloc.h | ||
pgtable-nopmd.h | ||
pgtable-nopud.h | ||
pgtable.h | ||
preempt.h | ||
ptrace.h | ||
qrwlock_types.h | ||
qrwlock.h | ||
resource.h | ||
rtc.h | ||
rwsem.h | ||
scatterlist.h | ||
seccomp.h | ||
sections.h | ||
segment.h | ||
serial.h | ||
siginfo.h | ||
signal.h | ||
simd.h | ||
sizes.h | ||
spinlock.h | ||
statfs.h | ||
string.h | ||
switch_to.h | ||
syscall.h | ||
syscalls.h | ||
termios-base.h | ||
termios.h | ||
timex.h | ||
tlb.h | ||
tlbflush.h | ||
topology.h | ||
trace_clock.h | ||
uaccess-unaligned.h | ||
uaccess.h | ||
unaligned.h | ||
unistd.h | ||
user.h | ||
vga.h | ||
vmlinux.lds.h | ||
vtime.h | ||
word-at-a-time.h | ||
xor.h |