Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "24 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (24 commits) autofs: fix error return in autofs_fill_super() autofs: drop dentry reference only when it is never used fs/drop_caches.c: avoid softlockups in drop_pagecache_sb() mm: migrate: don't rely on __PageMovable() of newpage after unlocking it psi: clarify the Kconfig text for the default-disable option mm, memory_hotplug: __offline_pages fix wrong locking mm: hwpoison: use do_send_sig_info() instead of force_sig() kasan: mark file common so ftrace doesn't trace it init/Kconfig: fix grammar by moving a closing parenthesis lib/test_kmod.c: potential double free in error handling mm, oom: fix use-after-free in oom_kill_process mm/hotplug: invalid PFNs from pfn_to_online_page() mm,memory_hotplug: fix scan_movable_pages() for gigantic hugepages psi: fix aggregation idle shut-off mm, memory_hotplug: test_pages_in_a_zone do not pass the end of zone mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone oom, oom_reaper: do not enqueue same task twice mm: migrate: make buffer_migrate_page_norefs() actually succeed kernel/exit.c: release ptraced tasks before zap_pid_ns_processes x86_64: increase stack size for KASAN_EXTRA ...
This commit is contained in:
commit
b9de6efed2
@ -30,6 +30,7 @@ generic-y += pgalloc.h
|
||||
generic-y += preempt.h
|
||||
generic-y += segment.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += tlbflush.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
|
@ -1,5 +1,4 @@
|
||||
include include/uapi/asm-generic/Kbuild.asm
|
||||
|
||||
generic-y += kvm_para.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += ucontext.h
|
||||
|
@ -40,6 +40,7 @@ generic-y += preempt.h
|
||||
generic-y += scatterlist.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += sizes.h
|
||||
generic-y += spinlock.h
|
||||
generic-y += timex.h
|
||||
|
@ -1,5 +1,4 @@
|
||||
include include/uapi/asm-generic/Kbuild.asm
|
||||
|
||||
generic-y += kvm_para.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += ucontext.h
|
||||
|
@ -30,6 +30,7 @@ generic-y += rwsem.h
|
||||
generic-y += sections.h
|
||||
generic-y += segment.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += sizes.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
|
@ -1,4 +1,3 @@
|
||||
include include/uapi/asm-generic/Kbuild.asm
|
||||
|
||||
generic-y += shmparam.h
|
||||
generic-y += ucontext.h
|
||||
|
@ -20,6 +20,7 @@ generic-y += mm-arch-hooks.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += spinlock.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
|
@ -2,4 +2,3 @@ include include/uapi/asm-generic/Kbuild.asm
|
||||
|
||||
generated-y += unistd_32.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += shmparam.h
|
||||
|
@ -26,6 +26,7 @@ generic-y += parport.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += syscalls.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
|
@ -2,5 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm
|
||||
|
||||
generated-y += unistd_32.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += ucontext.h
|
||||
|
@ -34,6 +34,7 @@ generic-y += qrwlock_types.h
|
||||
generic-y += qrwlock.h
|
||||
generic-y += sections.h
|
||||
generic-y += segment.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += string.h
|
||||
generic-y += switch_to.h
|
||||
generic-y += topology.h
|
||||
|
@ -1,5 +1,4 @@
|
||||
include include/uapi/asm-generic/Kbuild.asm
|
||||
|
||||
generic-y += kvm_para.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += ucontext.h
|
||||
|
@ -28,6 +28,7 @@ generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += segment.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += sizes.h
|
||||
generic-y += syscalls.h
|
||||
generic-y += topology.h
|
||||
|
@ -1,5 +1,4 @@
|
||||
include include/uapi/asm-generic/Kbuild.asm
|
||||
|
||||
generic-y += kvm_para.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += ucontext.h
|
||||
|
@ -7,7 +7,11 @@
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
#ifdef CONFIG_KASAN_EXTRA
|
||||
#define KASAN_STACK_ORDER 2
|
||||
#else
|
||||
#define KASAN_STACK_ORDER 1
|
||||
#endif
|
||||
#else
|
||||
#define KASAN_STACK_ORDER 0
|
||||
#endif
|
||||
|
@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb,
|
||||
pkt.len = dentry->d_name.len;
|
||||
memcpy(pkt.name, dentry->d_name.name, pkt.len);
|
||||
pkt.name[pkt.len] = '\0';
|
||||
dput(dentry);
|
||||
|
||||
if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
|
||||
ret = -EFAULT;
|
||||
@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb,
|
||||
complete_all(&ino->expire_complete);
|
||||
spin_unlock(&sbi->fs_lock);
|
||||
|
||||
dput(dentry);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
|
||||
}
|
||||
root_inode = autofs_get_inode(s, S_IFDIR | 0755);
|
||||
root = d_make_root(root_inode);
|
||||
if (!root)
|
||||
if (!root) {
|
||||
ret = -ENOMEM;
|
||||
goto fail_ino;
|
||||
}
|
||||
pipe = NULL;
|
||||
|
||||
root->d_fsdata = ino;
|
||||
|
@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
|
||||
spin_lock(&sb->s_inode_list_lock);
|
||||
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
|
||||
spin_lock(&inode->i_lock);
|
||||
/*
|
||||
* We must skip inodes in unusual state. We may also skip
|
||||
* inodes without pages but we deliberately won't in case
|
||||
* we need to reschedule to avoid softlockups.
|
||||
*/
|
||||
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
|
||||
(inode->i_mapping->nrpages == 0)) {
|
||||
(inode->i_mapping->nrpages == 0 && !need_resched())) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
continue;
|
||||
}
|
||||
@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&sb->s_inode_list_lock);
|
||||
|
||||
cond_resched();
|
||||
invalidate_mapping_pages(inode->i_mapping, 0, -1);
|
||||
iput(toput_inode);
|
||||
toput_inode = inode;
|
||||
|
@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
|
||||
inode = proc_get_inode(dir->i_sb, de);
|
||||
if (!inode)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
d_set_d_op(dentry, &proc_misc_dentry_ops);
|
||||
d_set_d_op(dentry, de->proc_dops);
|
||||
return d_splice_alias(inode, dentry);
|
||||
}
|
||||
read_unlock(&proc_subdir_lock);
|
||||
@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
|
||||
INIT_LIST_HEAD(&ent->pde_openers);
|
||||
proc_set_user(ent, (*parent)->uid, (*parent)->gid);
|
||||
|
||||
ent->proc_dops = &proc_misc_dentry_ops;
|
||||
|
||||
out:
|
||||
return ent;
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ struct proc_dir_entry {
|
||||
struct completion *pde_unload_completion;
|
||||
const struct inode_operations *proc_iops;
|
||||
const struct file_operations *proc_fops;
|
||||
const struct dentry_operations *proc_dops;
|
||||
union {
|
||||
const struct seq_operations *seq_ops;
|
||||
int (*single_show)(struct seq_file *, void *);
|
||||
|
@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode)
|
||||
return maybe_get_net(PDE_NET(PDE(inode)));
|
||||
}
|
||||
|
||||
static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct dentry_operations proc_net_dentry_ops = {
|
||||
.d_revalidate = proc_net_d_revalidate,
|
||||
.d_delete = always_delete_dentry,
|
||||
};
|
||||
|
||||
static void pde_force_lookup(struct proc_dir_entry *pde)
|
||||
{
|
||||
/* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
|
||||
pde->proc_dops = &proc_net_dentry_ops;
|
||||
}
|
||||
|
||||
static int seq_open_net(struct inode *inode, struct file *file)
|
||||
{
|
||||
unsigned int state_size = PDE(inode)->state_size;
|
||||
@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
|
||||
p = proc_create_reg(name, mode, &parent, data);
|
||||
if (!p)
|
||||
return NULL;
|
||||
pde_force_lookup(p);
|
||||
p->proc_fops = &proc_net_seq_fops;
|
||||
p->seq_ops = ops;
|
||||
p->state_size = state_size;
|
||||
@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode
|
||||
p = proc_create_reg(name, mode, &parent, data);
|
||||
if (!p)
|
||||
return NULL;
|
||||
pde_force_lookup(p);
|
||||
p->proc_fops = &proc_net_seq_fops;
|
||||
p->seq_ops = ops;
|
||||
p->state_size = state_size;
|
||||
@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
|
||||
p = proc_create_reg(name, mode, &parent, data);
|
||||
if (!p)
|
||||
return NULL;
|
||||
pde_force_lookup(p);
|
||||
p->proc_fops = &proc_net_single_fops;
|
||||
p->single_show = show;
|
||||
return proc_register(parent, p);
|
||||
@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
|
||||
p = proc_create_reg(name, mode, &parent, data);
|
||||
if (!p)
|
||||
return NULL;
|
||||
pde_force_lookup(p);
|
||||
p->proc_fops = &proc_net_single_fops;
|
||||
p->single_show = show;
|
||||
p->write = write;
|
||||
|
@ -21,14 +21,16 @@ struct vmem_altmap;
|
||||
* walkers which rely on the fully initialized page->flags and others
|
||||
* should use this rather than pfn_valid && pfn_to_page
|
||||
*/
|
||||
#define pfn_to_online_page(pfn) \
|
||||
({ \
|
||||
struct page *___page = NULL; \
|
||||
unsigned long ___nr = pfn_to_section_nr(pfn); \
|
||||
\
|
||||
if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\
|
||||
___page = pfn_to_page(pfn); \
|
||||
___page; \
|
||||
#define pfn_to_online_page(pfn) \
|
||||
({ \
|
||||
struct page *___page = NULL; \
|
||||
unsigned long ___pfn = pfn; \
|
||||
unsigned long ___nr = pfn_to_section_nr(___pfn); \
|
||||
\
|
||||
if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \
|
||||
pfn_valid_within(___pfn)) \
|
||||
___page = pfn_to_page(___pfn); \
|
||||
___page; \
|
||||
})
|
||||
|
||||
/*
|
||||
|
@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm)
|
||||
#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
|
||||
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
|
||||
#define MMF_OOM_VICTIM 25 /* mm is the oom victim */
|
||||
#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */
|
||||
#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
|
||||
|
||||
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
|
||||
|
13
init/Kconfig
13
init/Kconfig
@ -512,6 +512,17 @@ config PSI_DEFAULT_DISABLED
|
||||
per default but can be enabled through passing psi=1 on the
|
||||
kernel commandline during boot.
|
||||
|
||||
This feature adds some code to the task wakeup and sleep
|
||||
paths of the scheduler. The overhead is too low to affect
|
||||
common scheduling-intense workloads in practice (such as
|
||||
webservers, memcache), but it does show up in artificial
|
||||
scheduler stress tests, such as hackbench.
|
||||
|
||||
If you are paranoid and not sure what the kernel will be
|
||||
used for, say Y.
|
||||
|
||||
Say N if unsure.
|
||||
|
||||
endmenu # "CPU/Task time and stats accounting"
|
||||
|
||||
config CPU_ISOLATION
|
||||
@ -825,7 +836,7 @@ config CGROUP_PIDS
|
||||
PIDs controller is designed to stop this from happening.
|
||||
|
||||
It should be noted that organisational operations (such as attaching
|
||||
to a cgroup hierarchy will *not* be blocked by the PIDs controller),
|
||||
to a cgroup hierarchy) will *not* be blocked by the PIDs controller,
|
||||
since the PIDs limit only affects a process's ability to fork, not to
|
||||
attach to a cgroup.
|
||||
|
||||
|
@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct task_struct *find_child_reaper(struct task_struct *father)
|
||||
static struct task_struct *find_child_reaper(struct task_struct *father,
|
||||
struct list_head *dead)
|
||||
__releases(&tasklist_lock)
|
||||
__acquires(&tasklist_lock)
|
||||
{
|
||||
struct pid_namespace *pid_ns = task_active_pid_ns(father);
|
||||
struct task_struct *reaper = pid_ns->child_reaper;
|
||||
struct task_struct *p, *n;
|
||||
|
||||
if (likely(reaper != father))
|
||||
return reaper;
|
||||
@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father)
|
||||
panic("Attempted to kill init! exitcode=0x%08x\n",
|
||||
father->signal->group_exit_code ?: father->exit_code);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(p, n, dead, ptrace_entry) {
|
||||
list_del_init(&p->ptrace_entry);
|
||||
release_task(p);
|
||||
}
|
||||
|
||||
zap_pid_ns_processes(pid_ns);
|
||||
write_lock_irq(&tasklist_lock);
|
||||
|
||||
@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father,
|
||||
exit_ptrace(father, dead);
|
||||
|
||||
/* Can drop and reacquire tasklist_lock */
|
||||
reaper = find_child_reaper(father);
|
||||
reaper = find_child_reaper(father, dead);
|
||||
if (list_empty(&father->children))
|
||||
return;
|
||||
|
||||
|
@ -124,6 +124,7 @@
|
||||
* sampling of the aggregate task states would be.
|
||||
*/
|
||||
|
||||
#include "../workqueue_internal.h"
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
||||
groupc->tasks[t]++;
|
||||
|
||||
write_seqcount_end(&groupc->seq);
|
||||
|
||||
if (!delayed_work_pending(&group->clock_work))
|
||||
schedule_delayed_work(&group->clock_work, PSI_FREQ);
|
||||
}
|
||||
|
||||
static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
|
||||
@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
|
||||
{
|
||||
int cpu = task_cpu(task);
|
||||
struct psi_group *group;
|
||||
bool wake_clock = true;
|
||||
void *iter = NULL;
|
||||
|
||||
if (!task->pid)
|
||||
@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set)
|
||||
task->psi_flags &= ~clear;
|
||||
task->psi_flags |= set;
|
||||
|
||||
while ((group = iterate_groups(task, &iter)))
|
||||
/*
|
||||
* Periodic aggregation shuts off if there is a period of no
|
||||
* task changes, so we wake it back up if necessary. However,
|
||||
* don't do this if the task change is the aggregation worker
|
||||
* itself going to sleep, or we'll ping-pong forever.
|
||||
*/
|
||||
if (unlikely((clear & TSK_RUNNING) &&
|
||||
(task->flags & PF_WQ_WORKER) &&
|
||||
wq_worker_last_func(task) == psi_update_work))
|
||||
wake_clock = false;
|
||||
|
||||
while ((group = iterate_groups(task, &iter))) {
|
||||
psi_group_change(group, cpu, clear, set);
|
||||
if (wake_clock && !delayed_work_pending(&group->clock_work))
|
||||
schedule_delayed_work(&group->clock_work, PSI_FREQ);
|
||||
}
|
||||
}
|
||||
|
||||
void psi_memstall_tick(struct task_struct *task, int cpu)
|
||||
|
@ -909,6 +909,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
|
||||
return to_wakeup ? to_wakeup->task : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* wq_worker_last_func - retrieve worker's last work function
|
||||
*
|
||||
* Determine the last function a worker executed. This is called from
|
||||
* the scheduler to get a worker's last known identity.
|
||||
*
|
||||
* CONTEXT:
|
||||
* spin_lock_irq(rq->lock)
|
||||
*
|
||||
* Return:
|
||||
* The last work function %current executed as a worker, NULL if it
|
||||
* hasn't executed any work yet.
|
||||
*/
|
||||
work_func_t wq_worker_last_func(struct task_struct *task)
|
||||
{
|
||||
struct worker *worker = kthread_data(task);
|
||||
|
||||
return worker->last_func;
|
||||
}
|
||||
|
||||
/**
|
||||
* worker_set_flags - set worker flags and adjust nr_running accordingly
|
||||
* @worker: self
|
||||
@ -2184,6 +2204,9 @@ __acquires(&pool->lock)
|
||||
if (unlikely(cpu_intensive))
|
||||
worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
|
||||
|
||||
/* tag the worker for identification in schedule() */
|
||||
worker->last_func = worker->current_func;
|
||||
|
||||
/* we're done with it, release */
|
||||
hash_del(&worker->hentry);
|
||||
worker->current_work = NULL;
|
||||
|
@ -53,6 +53,9 @@ struct worker {
|
||||
|
||||
/* used only by rescuers to point to the target workqueue */
|
||||
struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
|
||||
|
||||
/* used by the scheduler to determine a worker's last known identity */
|
||||
work_func_t last_func;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void)
|
||||
|
||||
/*
|
||||
* Scheduler hooks for concurrency managed workqueue. Only to be used from
|
||||
* sched/core.c and workqueue.c.
|
||||
* sched/ and workqueue.c.
|
||||
*/
|
||||
void wq_worker_waking_up(struct task_struct *task, int cpu);
|
||||
struct task_struct *wq_worker_sleeping(struct task_struct *task);
|
||||
work_func_t wq_worker_last_func(struct task_struct *task);
|
||||
|
||||
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
|
||||
|
@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config)
|
||||
config->test_driver = NULL;
|
||||
|
||||
kfree_const(config->test_fs);
|
||||
config->test_driver = NULL;
|
||||
config->test_fs = NULL;
|
||||
}
|
||||
|
||||
static void kmod_config_free(struct kmod_test_device *test_dev)
|
||||
|
@ -4268,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
break;
|
||||
}
|
||||
if (ret & VM_FAULT_RETRY) {
|
||||
if (nonblocking)
|
||||
if (nonblocking &&
|
||||
!(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
|
||||
*nonblocking = 0;
|
||||
*nr_pages = 0;
|
||||
/*
|
||||
|
@ -5,6 +5,7 @@ UBSAN_SANITIZE_generic.o := n
|
||||
UBSAN_SANITIZE_tags.o := n
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
CFLAGS_REMOVE_common.o = -pg
|
||||
CFLAGS_REMOVE_generic.o = -pg
|
||||
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
|
||||
# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
|
||||
|
@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
|
||||
if (fail || tk->addr_valid == 0) {
|
||||
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
|
||||
pfn, tk->tsk->comm, tk->tsk->pid);
|
||||
force_sig(SIGKILL, tk->tsk);
|
||||
do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
|
||||
tk->tsk, PIDTYPE_PID);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1233,7 +1233,8 @@ static bool is_pageblock_removable_nolock(struct page *page)
|
||||
bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
|
||||
{
|
||||
struct page *page = pfn_to_page(start_pfn);
|
||||
struct page *end_page = page + nr_pages;
|
||||
unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page)));
|
||||
struct page *end_page = pfn_to_page(end_pfn);
|
||||
|
||||
/* Check the starting page of each pageblock within the range */
|
||||
for (; page < end_page; page = next_active_pageblock(page)) {
|
||||
@ -1273,6 +1274,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
|
||||
i++;
|
||||
if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
|
||||
continue;
|
||||
/* Check if we got outside of the zone */
|
||||
if (zone && !zone_spans_pfn(zone, pfn + i))
|
||||
return 0;
|
||||
page = pfn_to_page(pfn + i);
|
||||
if (zone && page_zone(page) != zone)
|
||||
return 0;
|
||||
@ -1301,23 +1305,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
|
||||
static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long pfn;
|
||||
struct page *page;
|
||||
|
||||
for (pfn = start; pfn < end; pfn++) {
|
||||
if (pfn_valid(pfn)) {
|
||||
page = pfn_to_page(pfn);
|
||||
if (PageLRU(page))
|
||||
return pfn;
|
||||
if (__PageMovable(page))
|
||||
return pfn;
|
||||
if (PageHuge(page)) {
|
||||
if (hugepage_migration_supported(page_hstate(page)) &&
|
||||
page_huge_active(page))
|
||||
return pfn;
|
||||
else
|
||||
pfn = round_up(pfn + 1,
|
||||
1 << compound_order(page)) - 1;
|
||||
}
|
||||
}
|
||||
struct page *page, *head;
|
||||
unsigned long skip;
|
||||
|
||||
if (!pfn_valid(pfn))
|
||||
continue;
|
||||
page = pfn_to_page(pfn);
|
||||
if (PageLRU(page))
|
||||
return pfn;
|
||||
if (__PageMovable(page))
|
||||
return pfn;
|
||||
|
||||
if (!PageHuge(page))
|
||||
continue;
|
||||
head = compound_head(page);
|
||||
if (hugepage_migration_supported(page_hstate(head)) &&
|
||||
page_huge_active(head))
|
||||
return pfn;
|
||||
skip = (1 << compound_order(head)) - (page - head);
|
||||
pfn += skip - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -1344,7 +1352,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
||||
{
|
||||
unsigned long pfn;
|
||||
struct page *page;
|
||||
int not_managed = 0;
|
||||
int ret = 0;
|
||||
LIST_HEAD(source);
|
||||
|
||||
@ -1392,7 +1399,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
||||
else
|
||||
ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
|
||||
if (!ret) { /* Success */
|
||||
put_page(page);
|
||||
list_add_tail(&page->lru, &source);
|
||||
if (!__PageMovable(page))
|
||||
inc_node_page_state(page, NR_ISOLATED_ANON +
|
||||
@ -1401,22 +1407,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
||||
} else {
|
||||
pr_warn("failed to isolate pfn %lx\n", pfn);
|
||||
dump_page(page, "isolation failed");
|
||||
put_page(page);
|
||||
/* Because we don't have big zone->lock. we should
|
||||
check this again here. */
|
||||
if (page_count(page)) {
|
||||
not_managed++;
|
||||
ret = -EBUSY;
|
||||
break;
|
||||
}
|
||||
}
|
||||
put_page(page);
|
||||
}
|
||||
if (!list_empty(&source)) {
|
||||
if (not_managed) {
|
||||
putback_movable_pages(&source);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Allocate a new page from the nearest neighbor node */
|
||||
ret = migrate_pages(&source, new_node_page, NULL, 0,
|
||||
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
|
||||
@ -1429,7 +1423,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
|
||||
putback_movable_pages(&source);
|
||||
}
|
||||
}
|
||||
out:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1576,7 +1570,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
|
||||
we assume this for now. .*/
|
||||
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
|
||||
&valid_end)) {
|
||||
mem_hotplug_done();
|
||||
ret = -EINVAL;
|
||||
reason = "multizone range";
|
||||
goto failed_removal;
|
||||
@ -1591,7 +1584,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
|
||||
MIGRATE_MOVABLE,
|
||||
SKIP_HWPOISON | REPORT_FAILURE);
|
||||
if (ret) {
|
||||
mem_hotplug_done();
|
||||
reason = "failure to isolate range";
|
||||
goto failed_removal;
|
||||
}
|
||||
|
12
mm/migrate.c
12
mm/migrate.c
@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
|
||||
/* Simple case, sync compaction */
|
||||
if (mode != MIGRATE_ASYNC) {
|
||||
do {
|
||||
get_bh(bh);
|
||||
lock_buffer(bh);
|
||||
bh = bh->b_this_page;
|
||||
|
||||
@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
|
||||
|
||||
/* async case, we cannot block on lock_buffer so use trylock_buffer */
|
||||
do {
|
||||
get_bh(bh);
|
||||
if (!trylock_buffer(bh)) {
|
||||
/*
|
||||
* We failed to lock the buffer and cannot stall in
|
||||
* async migration. Release the taken locks
|
||||
*/
|
||||
struct buffer_head *failed_bh = bh;
|
||||
put_bh(failed_bh);
|
||||
bh = head;
|
||||
while (bh != failed_bh) {
|
||||
unlock_buffer(bh);
|
||||
put_bh(bh);
|
||||
bh = bh->b_this_page;
|
||||
}
|
||||
return false;
|
||||
@ -818,7 +814,6 @@ unlock_buffers:
|
||||
bh = head;
|
||||
do {
|
||||
unlock_buffer(bh);
|
||||
put_bh(bh);
|
||||
bh = bh->b_this_page;
|
||||
|
||||
} while (bh != head);
|
||||
@ -1135,10 +1130,13 @@ out:
|
||||
* If migration is successful, decrease refcount of the newpage
|
||||
* which will not free the page because new page owner increased
|
||||
* refcounter. As well, if it is LRU page, add the page to LRU
|
||||
* list in here.
|
||||
* list in here. Use the old state of the isolated source page to
|
||||
* determine if we migrated a LRU page. newpage was already unlocked
|
||||
* and possibly modified by its owner - don't rely on the page
|
||||
* state.
|
||||
*/
|
||||
if (rc == MIGRATEPAGE_SUCCESS) {
|
||||
if (unlikely(__PageMovable(newpage)))
|
||||
if (unlikely(!is_lru))
|
||||
put_page(newpage);
|
||||
else
|
||||
putback_lru_page(newpage);
|
||||
|
@ -647,8 +647,8 @@ static int oom_reaper(void *unused)
|
||||
|
||||
static void wake_oom_reaper(struct task_struct *tsk)
|
||||
{
|
||||
/* tsk is already queued? */
|
||||
if (tsk == oom_reaper_list || tsk->oom_reaper_list)
|
||||
/* mm is already queued? */
|
||||
if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
|
||||
return;
|
||||
|
||||
get_task_struct(tsk);
|
||||
@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
|
||||
* still freeing memory.
|
||||
*/
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
/*
|
||||
* The task 'p' might have already exited before reaching here. The
|
||||
* put_task_struct() will free task_struct 'p' while the loop still try
|
||||
* to access the field of 'p', so, get an extra reference.
|
||||
*/
|
||||
get_task_struct(p);
|
||||
for_each_thread(p, t) {
|
||||
list_for_each_entry(child, &t->children, sibling) {
|
||||
unsigned int child_points;
|
||||
@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
|
||||
}
|
||||
}
|
||||
}
|
||||
put_task_struct(p);
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
/*
|
||||
|
1
tools/testing/selftests/proc/.gitignore
vendored
1
tools/testing/selftests/proc/.gitignore
vendored
@ -10,4 +10,5 @@
|
||||
/proc-uptime-002
|
||||
/read
|
||||
/self
|
||||
/setns-dcache
|
||||
/thread-self
|
||||
|
@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001
|
||||
TEST_GEN_PROGS += proc-uptime-002
|
||||
TEST_GEN_PROGS += read
|
||||
TEST_GEN_PROGS += self
|
||||
TEST_GEN_PROGS += setns-dcache
|
||||
TEST_GEN_PROGS += thread-self
|
||||
|
||||
include ../lib.mk
|
||||
|
129
tools/testing/selftests/proc/setns-dcache.c
Normal file
129
tools/testing/selftests/proc/setns-dcache.c
Normal file
@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
/*
|
||||
* Test that setns(CLONE_NEWNET) points to new /proc/net content even
|
||||
* if old one is in dcache.
|
||||
*
|
||||
* FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled.
|
||||
*/
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
static pid_t pid = -1;
|
||||
|
||||
static void f(void)
|
||||
{
|
||||
if (pid > 0) {
|
||||
kill(pid, SIGTERM);
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int fd[2];
|
||||
char _ = 0;
|
||||
int nsfd;
|
||||
|
||||
atexit(f);
|
||||
|
||||
/* Check for priviledges and syscall availability straight away. */
|
||||
if (unshare(CLONE_NEWNET) == -1) {
|
||||
if (errno == ENOSYS || errno == EPERM) {
|
||||
return 4;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
/* Distinguisher between two otherwise empty net namespaces. */
|
||||
if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (pipe(fd) == -1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
if (pid == -1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (pid == 0) {
|
||||
if (unshare(CLONE_NEWNET) == -1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (write(fd[1], &_, 1) != 1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
pause();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (read(fd[0], &_, 1) != 1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
{
|
||||
char buf[64];
|
||||
snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid);
|
||||
nsfd = open(buf, O_RDONLY);
|
||||
if (nsfd == -1) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Reliably pin dentry into dcache. */
|
||||
(void)open("/proc/net/unix", O_RDONLY);
|
||||
|
||||
if (setns(nsfd, CLONE_NEWNET) == -1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
kill(pid, SIGTERM);
|
||||
pid = 0;
|
||||
|
||||
{
|
||||
char buf[4096];
|
||||
ssize_t rv;
|
||||
int fd;
|
||||
|
||||
fd = open("/proc/net/unix", O_RDONLY);
|
||||
if (fd == -1) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define S "Num RefCount Protocol Flags Type St Inode Path\n"
|
||||
rv = read(fd, buf, sizeof(buf));
|
||||
|
||||
assert(rv == strlen(S));
|
||||
assert(memcmp(buf, S, strlen(S)) == 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user