forked from Minki/linux
do_wait: make PIDTYPE_PID case O(1) instead of O(n)
Add a special-case when waiting on a pid (via waitpid, waitid, wait4, etc) to avoid doing an O(n) scan of children and tracees, and instead do an O(1) lookup. This improves performance when waiting on a pid from a thread group with many children and/or tracees. Time to fork and then call waitpid on the child, from a task that already has N children [1]: N | Before | After -----|---------|------ 1 | 74 us | 74 us 20 | 72 us | 75 us 100 | 83 us | 77 us 500 | 99 us | 74 us 1000 | 179 us | 75 us 5000 | 804 us | 79 us 8000 | 1268 us | 78 us [1]: https://lkml.org/lkml/2021/3/12/1567 This can make a substantial performance improvement for applications with a thread that has many children or tracees and frequently needs to wait on them. Tools that use ptrace to intercept syscalls for a large number of processes are likely to fall into this category. In particular this patch was developed while building a ptrace-based second generation of the Shadow emulator [2], for which it allows us to avoid quadratic scaling (without having to use a workaround that introduces a ~40% performance penalty) [3]. Other examples of tools that fall into this category which this patch may help include User Mode Linux [4] and DetTrace [5]. [2]: https://shadow.github.io/ [3]: https://github.com/shadow/shadow/issues/1134#issuecomment-798992292 [4]: https://en.wikipedia.org/wiki/User-mode_Linux [5]: https://github.com/dettrace/dettrace Link: https://lkml.kernel.org/r/20210314231544.9379-1-jnewsome@torproject.org Signed-off-by: James Newsome <jnewsome@torproject.org> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Christian Brauner <christian@brauner.io> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
c1e4726f46
commit
5449162ac0
@ -1440,9 +1440,48 @@ void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
|
||||
TASK_INTERRUPTIBLE, p);
|
||||
}
|
||||
|
||||
static bool is_effectively_child(struct wait_opts *wo, bool ptrace,
|
||||
struct task_struct *target)
|
||||
{
|
||||
struct task_struct *parent =
|
||||
!ptrace ? target->real_parent : target->parent;
|
||||
|
||||
return current == parent || (!(wo->wo_flags & __WNOTHREAD) &&
|
||||
same_thread_group(current, parent));
|
||||
}
|
||||
|
||||
/*
|
||||
* Optimization for waiting on PIDTYPE_PID. No need to iterate through child
|
||||
* and tracee lists to find the target task.
|
||||
*/
|
||||
static int do_wait_pid(struct wait_opts *wo)
|
||||
{
|
||||
bool ptrace;
|
||||
struct task_struct *target;
|
||||
int retval;
|
||||
|
||||
ptrace = false;
|
||||
target = pid_task(wo->wo_pid, PIDTYPE_TGID);
|
||||
if (target && is_effectively_child(wo, ptrace, target)) {
|
||||
retval = wait_consider_task(wo, ptrace, target);
|
||||
if (retval)
|
||||
return retval;
|
||||
}
|
||||
|
||||
ptrace = true;
|
||||
target = pid_task(wo->wo_pid, PIDTYPE_PID);
|
||||
if (target && target->ptrace &&
|
||||
is_effectively_child(wo, ptrace, target)) {
|
||||
retval = wait_consider_task(wo, ptrace, target);
|
||||
if (retval)
|
||||
return retval;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long do_wait(struct wait_opts *wo)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
int retval;
|
||||
|
||||
trace_sched_process_wait(wo->wo_pid);
|
||||
@ -1464,19 +1503,27 @@ repeat:
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
read_lock(&tasklist_lock);
|
||||
tsk = current;
|
||||
do {
|
||||
retval = do_wait_thread(wo, tsk);
|
||||
|
||||
if (wo->wo_type == PIDTYPE_PID) {
|
||||
retval = do_wait_pid(wo);
|
||||
if (retval)
|
||||
goto end;
|
||||
} else {
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
retval = ptrace_do_wait(wo, tsk);
|
||||
if (retval)
|
||||
goto end;
|
||||
do {
|
||||
retval = do_wait_thread(wo, tsk);
|
||||
if (retval)
|
||||
goto end;
|
||||
|
||||
if (wo->wo_flags & __WNOTHREAD)
|
||||
break;
|
||||
} while_each_thread(current, tsk);
|
||||
retval = ptrace_do_wait(wo, tsk);
|
||||
if (retval)
|
||||
goto end;
|
||||
|
||||
if (wo->wo_flags & __WNOTHREAD)
|
||||
break;
|
||||
} while_each_thread(current, tsk);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
notask:
|
||||
|
Loading…
Reference in New Issue
Block a user