sched/core: Optimize __schedule()
Oleg noted that by making do_exit() use __schedule() for the TASK_DEAD context switch, we can avoid the TASK_DEAD special case currently in __schedule() because that avoids the extra preempt_disable() from schedule(). In order to facilitate this, create a do_task_dead() helper which we place in the scheduler code, such that it can access __schedule(). Also add some __noreturn annotations to the functions, there's no coming back from do_exit(). Suggested-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Cheng Chao <cs.os.kernel@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: akpm@linux-foundation.org Cc: chris@chris-wilson.co.uk Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20160913163729.GB5012@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									bf89a30472
								
							
						
					
					
						commit
						9af6528ee9
					
				| @ -259,17 +259,14 @@ static inline void might_fault(void) { } | ||||
| extern struct atomic_notifier_head panic_notifier_list; | ||||
| extern long (*panic_blink)(int state); | ||||
| __printf(1, 2) | ||||
| void panic(const char *fmt, ...) | ||||
| 	__noreturn __cold; | ||||
| void panic(const char *fmt, ...) __noreturn __cold; | ||||
| void nmi_panic(struct pt_regs *regs, const char *msg); | ||||
| extern void oops_enter(void); | ||||
| extern void oops_exit(void); | ||||
| void print_oops_end_marker(void); | ||||
| extern int oops_may_print(void); | ||||
| void do_exit(long error_code) | ||||
| 	__noreturn; | ||||
| void complete_and_exit(struct completion *, long) | ||||
| 	__noreturn; | ||||
| void do_exit(long error_code) __noreturn; | ||||
| void complete_and_exit(struct completion *, long) __noreturn; | ||||
| 
 | ||||
| /* Internal, do not use. */ | ||||
| int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); | ||||
|  | ||||
| @ -448,6 +448,8 @@ static inline void io_schedule(void) | ||||
| 	io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); | ||||
| } | ||||
| 
 | ||||
| void __noreturn do_task_dead(void); | ||||
| 
 | ||||
| struct nsproxy; | ||||
| struct user_namespace; | ||||
| 
 | ||||
|  | ||||
| @ -725,7 +725,7 @@ static void check_stack_usage(void) | ||||
| static inline void check_stack_usage(void) {} | ||||
| #endif | ||||
| 
 | ||||
| void do_exit(long code) | ||||
| void __noreturn do_exit(long code) | ||||
| { | ||||
| 	struct task_struct *tsk = current; | ||||
| 	int group_dead; | ||||
| @ -882,29 +882,7 @@ void do_exit(long code) | ||||
| 	exit_rcu(); | ||||
| 	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed | ||||
| 	 * when the following two conditions become true. | ||||
| 	 *   - There is race condition of mmap_sem (It is acquired by | ||||
| 	 *     exit_mm()), and | ||||
| 	 *   - SMI occurs before setting TASK_RUNINNG. | ||||
| 	 *     (or hypervisor of virtual machine switches to other guest) | ||||
| 	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD | ||||
| 	 * | ||||
| 	 * To avoid it, we have to wait for releasing tsk->pi_lock which | ||||
| 	 * is held by try_to_wake_up() | ||||
| 	 */ | ||||
| 	smp_mb(); | ||||
| 	raw_spin_unlock_wait(&tsk->pi_lock); | ||||
| 
 | ||||
| 	/* causes final put_task_struct in finish_task_switch(). */ | ||||
| 	tsk->state = TASK_DEAD; | ||||
| 	tsk->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */ | ||||
| 	schedule(); | ||||
| 	BUG(); | ||||
| 	/* Avoid "noreturn function does return".  */ | ||||
| 	for (;;) | ||||
| 		cpu_relax();	/* For when BUG is null */ | ||||
| 	do_task_dead(); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(do_exit); | ||||
| 
 | ||||
|  | ||||
| @ -3331,17 +3331,6 @@ static void __sched notrace __schedule(bool preempt) | ||||
| 	rq = cpu_rq(cpu); | ||||
| 	prev = rq->curr; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * do_exit() calls schedule() with preemption disabled as an exception; | ||||
| 	 * however we must fix that up, otherwise the next task will see an | ||||
| 	 * inconsistent (higher) preempt count. | ||||
| 	 * | ||||
| 	 * It also avoids the below schedule_debug() test from complaining | ||||
| 	 * about this. | ||||
| 	 */ | ||||
| 	if (unlikely(prev->state == TASK_DEAD)) | ||||
| 		preempt_enable_no_resched_notrace(); | ||||
| 
 | ||||
| 	schedule_debug(prev); | ||||
| 
 | ||||
| 	if (sched_feat(HRTICK)) | ||||
| @ -3409,6 +3398,33 @@ static void __sched notrace __schedule(bool preempt) | ||||
| } | ||||
| STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */ | ||||
| 
 | ||||
| void __noreturn do_task_dead(void) | ||||
| { | ||||
| 	/*
 | ||||
| 	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed | ||||
| 	 * when the following two conditions become true. | ||||
| 	 *   - There is race condition of mmap_sem (It is acquired by | ||||
| 	 *     exit_mm()), and | ||||
| 	 *   - SMI occurs before setting TASK_RUNINNG. | ||||
| 	 *     (or hypervisor of virtual machine switches to other guest) | ||||
| 	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD | ||||
| 	 * | ||||
| 	 * To avoid it, we have to wait for releasing tsk->pi_lock which | ||||
| 	 * is held by try_to_wake_up() | ||||
| 	 */ | ||||
| 	smp_mb(); | ||||
| 	raw_spin_unlock_wait(¤t->pi_lock); | ||||
| 
 | ||||
| 	/* causes final put_task_struct in finish_task_switch(). */ | ||||
| 	__set_current_state(TASK_DEAD); | ||||
| 	current->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */ | ||||
| 	__schedule(false); | ||||
| 	BUG(); | ||||
| 	/* Avoid "noreturn function does return".  */ | ||||
| 	for (;;) | ||||
| 		cpu_relax();	/* For when BUG is null */ | ||||
| } | ||||
| 
 | ||||
| static inline void sched_submit_work(struct task_struct *tsk) | ||||
| { | ||||
| 	if (!tsk->state || tsk_is_pi_blocked(tsk)) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user