ring-buffer: User context bit recursion checking
Using context bit recursion checking, we can help increase the performance of the ring buffer. Before this patch: # echo function > /debug/tracing/current_tracer # for i in `seq 10`; do ./hackbench 50; done Time: 10.285 Time: 10.407 Time: 10.243 Time: 10.372 Time: 10.380 Time: 10.198 Time: 10.272 Time: 10.354 Time: 10.248 Time: 10.253 (average: 10.3012) Now we have: # echo function > /debug/tracing/current_tracer # for i in `seq 10`; do ./hackbench 50; done Time: 9.712 Time: 9.824 Time: 9.861 Time: 9.827 Time: 9.962 Time: 9.905 Time: 9.886 Time: 10.088 Time: 9.861 Time: 9.834 (average: 9.876) a 4% savings! Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
This commit is contained in:
		
							parent
							
								
									897f68a48b
								
							
						
					
					
						commit
						567cd4da54
					
				| @ -2432,41 +2432,76 @@ rb_reserve_next_event(struct ring_buffer *buffer, | ||||
| 
 | ||||
| #ifdef CONFIG_TRACING | ||||
| 
 | ||||
| #define TRACE_RECURSIVE_DEPTH 16 | ||||
| /*
 | ||||
|  * The lock and unlock are done within a preempt disable section. | ||||
|  * The current_context per_cpu variable can only be modified | ||||
|  * by the current task between lock and unlock. But it can | ||||
|  * be modified more than once via an interrupt. To pass this | ||||
|  * information from the lock to the unlock without having to | ||||
|  * access the 'in_interrupt()' functions again (which do show | ||||
|  * a bit of overhead in something as critical as function tracing, | ||||
|  * we use a bitmask trick. | ||||
|  * | ||||
|  *  bit 0 =  NMI context | ||||
|  *  bit 1 =  IRQ context | ||||
|  *  bit 2 =  SoftIRQ context | ||||
|  *  bit 3 =  normal context. | ||||
|  * | ||||
|  * This works because this is the order of contexts that can | ||||
|  * preempt other contexts. A SoftIRQ never preempts an IRQ | ||||
|  * context. | ||||
|  * | ||||
|  * When the context is determined, the corresponding bit is | ||||
|  * checked and set (if it was set, then a recursion of that context | ||||
|  * happened). | ||||
|  * | ||||
|  * On unlock, we need to clear this bit. To do so, just subtract | ||||
|  * 1 from the current_context and AND it to itself. | ||||
|  * | ||||
|  * (binary) | ||||
|  *  101 - 1 = 100 | ||||
|  *  101 & 100 = 100 (clearing bit zero) | ||||
|  * | ||||
|  *  1010 - 1 = 1001 | ||||
|  *  1010 & 1001 = 1000 (clearing bit 1) | ||||
|  * | ||||
|  * The least significant bit can be cleared this way, and it | ||||
|  * just so happens that it is the same bit corresponding to | ||||
|  * the current context. | ||||
|  */ | ||||
| static DEFINE_PER_CPU(unsigned int, current_context); | ||||
| 
 | ||||
| /* Keep this code out of the fast path cache */ | ||||
| static noinline void trace_recursive_fail(void) | ||||
| static __always_inline int trace_recursive_lock(void) | ||||
| { | ||||
| 	/* Disable all tracing before we do anything else */ | ||||
| 	tracing_off_permanent(); | ||||
| 	unsigned int val = this_cpu_read(current_context); | ||||
| 	int bit; | ||||
| 
 | ||||
| 	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" | ||||
| 		    "HC[%lu]:SC[%lu]:NMI[%lu]\n", | ||||
| 		    trace_recursion_buffer(), | ||||
| 		    hardirq_count() >> HARDIRQ_SHIFT, | ||||
| 		    softirq_count() >> SOFTIRQ_SHIFT, | ||||
| 		    in_nmi()); | ||||
| 	if (in_interrupt()) { | ||||
| 		if (in_nmi()) | ||||
| 			bit = 0; | ||||
| 		else if (in_irq()) | ||||
| 			bit = 1; | ||||
| 		else | ||||
| 			bit = 2; | ||||
| 	} else | ||||
| 		bit = 3; | ||||
| 
 | ||||
| 	WARN_ON_ONCE(1); | ||||
| 	if (unlikely(val & (1 << bit))) | ||||
| 		return 1; | ||||
| 
 | ||||
| 	val |= (1 << bit); | ||||
| 	this_cpu_write(current_context, val); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline int trace_recursive_lock(void) | ||||
| static __always_inline void trace_recursive_unlock(void) | ||||
| { | ||||
| 	trace_recursion_inc(); | ||||
| 	unsigned int val = this_cpu_read(current_context); | ||||
| 
 | ||||
| 	if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	trace_recursive_fail(); | ||||
| 
 | ||||
| 	return -1; | ||||
| } | ||||
| 
 | ||||
| static inline void trace_recursive_unlock(void) | ||||
| { | ||||
| 	WARN_ON_ONCE(!trace_recursion_buffer()); | ||||
| 
 | ||||
| 	trace_recursion_dec(); | ||||
| 	val--; | ||||
| 	val &= this_cpu_read(current_context); | ||||
| 	this_cpu_write(current_context, val); | ||||
| } | ||||
| 
 | ||||
| #else | ||||
|  | ||||
| @ -291,11 +291,6 @@ struct tracer { | ||||
| 
 | ||||
| 
 | ||||
| /* Only current can touch trace_recursion */ | ||||
| #define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) | ||||
| #define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) | ||||
| 
 | ||||
| /* Ring buffer has the 10 LSB bits to count */ | ||||
| #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) | ||||
| 
 | ||||
| /*
 | ||||
|  * For function tracing recursion: | ||||
| @ -323,7 +318,13 @@ struct tracer { | ||||
|  * caller, and we can skip the current check. | ||||
|  */ | ||||
| enum { | ||||
| 	TRACE_FTRACE_BIT = 11, | ||||
| 	TRACE_BUFFER_BIT, | ||||
| 	TRACE_BUFFER_NMI_BIT, | ||||
| 	TRACE_BUFFER_IRQ_BIT, | ||||
| 	TRACE_BUFFER_SIRQ_BIT, | ||||
| 
 | ||||
| 	/* Start of function recursion bits */ | ||||
| 	TRACE_FTRACE_BIT, | ||||
| 	TRACE_FTRACE_NMI_BIT, | ||||
| 	TRACE_FTRACE_IRQ_BIT, | ||||
| 	TRACE_FTRACE_SIRQ_BIT, | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user