perf/core: Optimize side-band event delivery
The perf_event_aux() function iterates all PMUs and all events in their respective per-CPU contexts to find the events to deliver side-band records to. For example, the brk test case in lkp triggers many mmap() operations, which, if we're also running perf, results in many perf_event_aux() invocations. If we enable uncore PMU support (even when uncore events are not used), dozens of uncore PMUs will be iterated, which can significantly decrease brk_test's throughput. For example, the brk throughput: without uncore PMUs: 2647573 ops_per_sec with uncore PMUs: 1768444 ops_per_sec ... a 33% reduction. To get at the per-CPU events that need side-band records, this patch puts these events on a per-CPU list, this avoids iterating the PMUs and any events that do not need side-band records. Per task events are unchanged to avoid extra overhead on the context switch paths. Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reported-by: Huang, Ying <ying.huang@linux.intel.com> Signed-off-by: Kan Liang <kan.liang@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1458757477-3781-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
42c4fb7747
commit
f2fb6bef92
@ -517,6 +517,11 @@ struct swevent_hlist {
|
||||
struct perf_cgroup;
|
||||
struct ring_buffer;
|
||||
|
||||
struct pmu_event_list {
|
||||
raw_spinlock_t lock;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct perf_event - performance event kernel representation:
|
||||
*/
|
||||
@ -675,6 +680,7 @@ struct perf_event {
|
||||
int cgrp_defer_enabled;
|
||||
#endif
|
||||
|
||||
struct list_head sb_list;
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
};
|
||||
|
||||
|
@ -335,6 +335,7 @@ static atomic_t perf_sched_count;
|
||||
|
||||
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
|
||||
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
|
||||
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
static atomic_t nr_comm_events __read_mostly;
|
||||
@ -3665,6 +3666,26 @@ static void free_event_rcu(struct rcu_head *head)
|
||||
static void ring_buffer_attach(struct perf_event *event,
|
||||
struct ring_buffer *rb);
|
||||
|
||||
static void detach_sb_event(struct perf_event *event)
|
||||
{
|
||||
struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
|
||||
|
||||
raw_spin_lock(&pel->lock);
|
||||
list_del_rcu(&event->sb_list);
|
||||
raw_spin_unlock(&pel->lock);
|
||||
}
|
||||
|
||||
static void unaccount_pmu_sb_event(struct perf_event *event)
|
||||
{
|
||||
if (event->parent)
|
||||
return;
|
||||
|
||||
if (event->attach_state & PERF_ATTACH_TASK)
|
||||
return;
|
||||
|
||||
detach_sb_event(event);
|
||||
}
|
||||
|
||||
static void unaccount_event_cpu(struct perf_event *event, int cpu)
|
||||
{
|
||||
if (event->parent)
|
||||
@ -3728,6 +3749,8 @@ static void unaccount_event(struct perf_event *event)
|
||||
}
|
||||
|
||||
unaccount_event_cpu(event, event->cpu);
|
||||
|
||||
unaccount_pmu_sb_event(event);
|
||||
}
|
||||
|
||||
static void perf_sched_delayed(struct work_struct *work)
|
||||
@ -5888,13 +5911,25 @@ perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void perf_event_sb_iterate(perf_event_aux_output_cb output, void *data)
|
||||
{
|
||||
struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
|
||||
struct perf_event *event;
|
||||
|
||||
list_for_each_entry_rcu(event, &pel->list, sb_list) {
|
||||
if (event->state < PERF_EVENT_STATE_INACTIVE)
|
||||
continue;
|
||||
if (!event_filter_match(event))
|
||||
continue;
|
||||
output(event, data);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
perf_event_aux(perf_event_aux_output_cb output, void *data,
|
||||
struct perf_event_context *task_ctx)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct perf_event_context *ctx;
|
||||
struct pmu *pmu;
|
||||
int ctxn;
|
||||
|
||||
/*
|
||||
@ -5909,20 +5944,15 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(pmu, &pmus, entry) {
|
||||
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
|
||||
if (cpuctx->unique_pmu != pmu)
|
||||
goto next;
|
||||
perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
|
||||
ctxn = pmu->task_ctx_nr;
|
||||
if (ctxn < 0)
|
||||
goto next;
|
||||
preempt_disable();
|
||||
perf_event_sb_iterate(output, data);
|
||||
|
||||
for_each_task_context_nr(ctxn) {
|
||||
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
|
||||
if (ctx)
|
||||
perf_event_aux_ctx(ctx, output, data, false);
|
||||
next:
|
||||
put_cpu_ptr(pmu->pmu_cpu_context);
|
||||
}
|
||||
preempt_enable();
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@ -8615,6 +8645,32 @@ unlock:
|
||||
return pmu;
|
||||
}
|
||||
|
||||
static void attach_sb_event(struct perf_event *event)
|
||||
{
|
||||
struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
|
||||
|
||||
raw_spin_lock(&pel->lock);
|
||||
list_add_rcu(&event->sb_list, &pel->list);
|
||||
raw_spin_unlock(&pel->lock);
|
||||
}
|
||||
|
||||
static void account_pmu_sb_event(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_attr *attr = &event->attr;
|
||||
|
||||
if (event->parent)
|
||||
return;
|
||||
|
||||
if (event->attach_state & PERF_ATTACH_TASK)
|
||||
return;
|
||||
|
||||
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
|
||||
attr->comm || attr->comm_exec ||
|
||||
attr->task ||
|
||||
attr->context_switch)
|
||||
attach_sb_event(event);
|
||||
}
|
||||
|
||||
static void account_event_cpu(struct perf_event *event, int cpu)
|
||||
{
|
||||
if (event->parent)
|
||||
@ -8695,6 +8751,8 @@ static void account_event(struct perf_event *event)
|
||||
enabled:
|
||||
|
||||
account_event_cpu(event, event->cpu);
|
||||
|
||||
account_pmu_sb_event(event);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -10203,6 +10261,9 @@ static void __init perf_event_init_all_cpus(void)
|
||||
swhash = &per_cpu(swevent_htable, cpu);
|
||||
mutex_init(&swhash->hlist_mutex);
|
||||
INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
|
||||
|
||||
INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
|
||||
raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user