function-graph: ignore times across schedule

Impact: more accurate timings

The current method of function graph tracing does not take into
account the time spent when a task is not running. This shows functions
that call schedule have increased costs:

 3) + 18.664 us   |      }
 ------------------------------------------
 3)    <idle>-0    =>  kblockd-123
 ------------------------------------------

 3)               |      finish_task_switch() {
 3)   1.441 us    |        _spin_unlock_irq();
 3)   3.966 us    |      }
 3) ! 2959.433 us |    }
 3) ! 2961.465 us |  }

This patch uses the tracepoint in the scheduling context switch to
account for time that has elapsed while a task is scheduled out.
Now we see:

 ------------------------------------------
 3)    <idle>-0    =>  edac-po-1067
 ------------------------------------------

 3)               |      finish_task_switch() {
 3)   0.685 us    |        _spin_unlock_irq();
 3)   2.331 us    |      }
 3) + 41.439 us   |    }
 3) + 42.663 us   |  }

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
This commit is contained in:
Steven Rostedt 2009-03-24 01:10:15 -04:00
parent 05ce5818ad
commit 8aef2d2856
2 changed files with 38 additions and 0 deletions

View File

@ -1409,6 +1409,8 @@ struct task_struct {
int curr_ret_stack;
/* Stack of return addresses for return function tracing */
struct ftrace_ret_stack *ret_stack;
/* time stamp for last schedule */
unsigned long long ftrace_timestamp;
/*
* Number of functions that haven't been traced
* because of depth overrun.

View File

@ -29,6 +29,8 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <trace/sched.h>
#include <asm/ftrace.h>
#include "trace.h"
@ -2590,6 +2592,31 @@ free:
return ret;
}
static void
ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev,
struct task_struct *next)
{
unsigned long long timestamp;
int index;
timestamp = trace_clock_local();
prev->ftrace_timestamp = timestamp;
/* only process tasks that we timestamped */
if (!next->ftrace_timestamp)
return;
/*
* Update all the counters in next to make up for the
* time next was sleeping.
*/
timestamp -= next->ftrace_timestamp;
for (index = next->curr_ret_stack; index >= 0; index--)
next->ret_stack[index].calltime += timestamp;
}
/* Allocate a return stack for each task */
static int start_graph_tracing(void)
{
@ -2611,6 +2638,13 @@ static int start_graph_tracing(void)
ret = alloc_retstack_tasklist(ret_stack_list);
} while (ret == -EAGAIN);
if (!ret) {
ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch);
if (ret)
pr_info("ftrace_graph: Couldn't activate tracepoint"
" probe to kernel_sched_switch\n");
}
kfree(ret_stack_list);
return ret;
}
@ -2674,6 +2708,7 @@ void unregister_ftrace_graph(void)
mutex_lock(&ftrace_lock);
atomic_dec(&ftrace_graph_active);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(FTRACE_STOP_FUNC_RET);
@ -2694,6 +2729,7 @@ void ftrace_graph_init_task(struct task_struct *t)
t->curr_ret_stack = -1;
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->ftrace_timestamp = 0;
} else
t->ret_stack = NULL;
}