mirror of
https://github.com/torvalds/linux.git
synced 2024-11-29 07:31:29 +00:00
oom: move oom_adj value from task_struct to signal_struct
Currently, OOM logic callflow is here. __out_of_memory() select_bad_process() for each task badness() calculate badness of one task oom_kill_process() search child oom_kill_task() kill target task and mm shared tasks with it example, process-A have two thread, thread-A and thread-B and it have very fat memory and each thread have following oom_adj and oom_score. thread-A: oom_adj = OOM_DISABLE, oom_score = 0 thread-B: oom_adj = 0, oom_score = very-high Then, select_bad_process() select thread-B, but oom_kill_task() refuse kill the task because thread-A have OOM_DISABLE. Thus __out_of_memory() call select_bad_process() again. but select_bad_process() select the same task. It mean kernel fall in livelock. The fact is, select_bad_process() must select killable task. otherwise OOM logic go into livelock. And root cause is, oom_adj shouldn't be per-thread value. it should be per-process value because OOM-killer kill a process, not thread. Thus This patch moves oomkilladj (now more appropriately named oom_adj) from struct task_struct to struct signal_struct. it naturally prevent select_bad_process() choose wrong task. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
f168e1b639
commit
28b83c5193
@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
|
|||||||
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
|
struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
|
||||||
char buffer[PROC_NUMBUF];
|
char buffer[PROC_NUMBUF];
|
||||||
size_t len;
|
size_t len;
|
||||||
int oom_adjust;
|
int oom_adjust = OOM_DISABLE;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
if (!task)
|
if (!task)
|
||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
oom_adjust = task->oomkilladj;
|
|
||||||
|
if (lock_task_sighand(task, &flags)) {
|
||||||
|
oom_adjust = task->signal->oom_adj;
|
||||||
|
unlock_task_sighand(task, &flags);
|
||||||
|
}
|
||||||
|
|
||||||
put_task_struct(task);
|
put_task_struct(task);
|
||||||
|
|
||||||
len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
|
len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
|
||||||
@ -1017,6 +1023,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
|
|||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
char buffer[PROC_NUMBUF], *end;
|
char buffer[PROC_NUMBUF], *end;
|
||||||
int oom_adjust;
|
int oom_adjust;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
memset(buffer, 0, sizeof(buffer));
|
memset(buffer, 0, sizeof(buffer));
|
||||||
if (count > sizeof(buffer) - 1)
|
if (count > sizeof(buffer) - 1)
|
||||||
@ -1032,11 +1039,20 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
|
|||||||
task = get_proc_task(file->f_path.dentry->d_inode);
|
task = get_proc_task(file->f_path.dentry->d_inode);
|
||||||
if (!task)
|
if (!task)
|
||||||
return -ESRCH;
|
return -ESRCH;
|
||||||
if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
|
if (!lock_task_sighand(task, &flags)) {
|
||||||
|
put_task_struct(task);
|
||||||
|
return -ESRCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
|
||||||
|
unlock_task_sighand(task, &flags);
|
||||||
put_task_struct(task);
|
put_task_struct(task);
|
||||||
return -EACCES;
|
return -EACCES;
|
||||||
}
|
}
|
||||||
task->oomkilladj = oom_adjust;
|
|
||||||
|
task->signal->oom_adj = oom_adjust;
|
||||||
|
|
||||||
|
unlock_task_sighand(task, &flags);
|
||||||
put_task_struct(task);
|
put_task_struct(task);
|
||||||
if (end - buffer == 0)
|
if (end - buffer == 0)
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
@ -639,6 +639,8 @@ struct signal_struct {
|
|||||||
unsigned audit_tty;
|
unsigned audit_tty;
|
||||||
struct tty_audit_buf *tty_audit_buf;
|
struct tty_audit_buf *tty_audit_buf;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int oom_adj; /* OOM kill score adjustment (bit shift) */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Context switch must be unlocked if interrupts are to be enabled */
|
/* Context switch must be unlocked if interrupts are to be enabled */
|
||||||
@ -1221,7 +1223,6 @@ struct task_struct {
|
|||||||
* a short time
|
* a short time
|
||||||
*/
|
*/
|
||||||
unsigned char fpu_counter;
|
unsigned char fpu_counter;
|
||||||
s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
|
|
||||||
#ifdef CONFIG_BLK_DEV_IO_TRACE
|
#ifdef CONFIG_BLK_DEV_IO_TRACE
|
||||||
unsigned int btrace_seq;
|
unsigned int btrace_seq;
|
||||||
#endif
|
#endif
|
||||||
|
@ -880,6 +880,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
|||||||
|
|
||||||
tty_audit_fork(sig);
|
tty_audit_fork(sig);
|
||||||
|
|
||||||
|
sig->oom_adj = current->signal->oom_adj;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,6 +58,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
|
|||||||
unsigned long points, cpu_time, run_time;
|
unsigned long points, cpu_time, run_time;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
struct task_struct *child;
|
struct task_struct *child;
|
||||||
|
int oom_adj = p->signal->oom_adj;
|
||||||
|
|
||||||
|
if (oom_adj == OOM_DISABLE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
task_lock(p);
|
task_lock(p);
|
||||||
mm = p->mm;
|
mm = p->mm;
|
||||||
@ -148,15 +152,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
|
|||||||
points /= 8;
|
points /= 8;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adjust the score by oomkilladj.
|
* Adjust the score by oom_adj.
|
||||||
*/
|
*/
|
||||||
if (p->oomkilladj) {
|
if (oom_adj) {
|
||||||
if (p->oomkilladj > 0) {
|
if (oom_adj > 0) {
|
||||||
if (!points)
|
if (!points)
|
||||||
points = 1;
|
points = 1;
|
||||||
points <<= p->oomkilladj;
|
points <<= oom_adj;
|
||||||
} else
|
} else
|
||||||
points >>= -(p->oomkilladj);
|
points >>= -(oom_adj);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -251,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
|
|||||||
*ppoints = ULONG_MAX;
|
*ppoints = ULONG_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p->oomkilladj == OOM_DISABLE)
|
if (p->signal->oom_adj == OOM_DISABLE)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
points = badness(p, uptime.tv_sec);
|
points = badness(p, uptime.tv_sec);
|
||||||
@ -304,7 +308,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
|
|||||||
}
|
}
|
||||||
printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
|
printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
|
||||||
p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
|
p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
|
||||||
get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
|
get_mm_rss(mm), (int)task_cpu(p), p->signal->oom_adj,
|
||||||
p->comm);
|
p->comm);
|
||||||
task_unlock(p);
|
task_unlock(p);
|
||||||
} while_each_thread(g, p);
|
} while_each_thread(g, p);
|
||||||
@ -359,18 +363,9 @@ static int oom_kill_task(struct task_struct *p)
|
|||||||
* change to NULL at any time since we do not hold task_lock(p).
|
* change to NULL at any time since we do not hold task_lock(p).
|
||||||
* However, this is of no concern to us.
|
* However, this is of no concern to us.
|
||||||
*/
|
*/
|
||||||
|
if (!mm || p->signal->oom_adj == OOM_DISABLE)
|
||||||
if (mm == NULL)
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't kill the process if any threads are set to OOM_DISABLE
|
|
||||||
*/
|
|
||||||
do_each_thread(g, q) {
|
|
||||||
if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
|
|
||||||
return 1;
|
|
||||||
} while_each_thread(g, q);
|
|
||||||
|
|
||||||
__oom_kill_task(p, 1);
|
__oom_kill_task(p, 1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -394,8 +389,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
|
|||||||
|
|
||||||
if (printk_ratelimit()) {
|
if (printk_ratelimit()) {
|
||||||
printk(KERN_WARNING "%s invoked oom-killer: "
|
printk(KERN_WARNING "%s invoked oom-killer: "
|
||||||
"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
|
"gfp_mask=0x%x, order=%d, oom_adj=%d\n",
|
||||||
current->comm, gfp_mask, order, current->oomkilladj);
|
current->comm, gfp_mask, order,
|
||||||
|
current->signal->oom_adj);
|
||||||
task_lock(current);
|
task_lock(current);
|
||||||
cpuset_print_task_mems_allowed(current);
|
cpuset_print_task_mems_allowed(current);
|
||||||
task_unlock(current);
|
task_unlock(current);
|
||||||
|
Loading…
Reference in New Issue
Block a user