From 84f2b9b2edc09595569c7397cc3c888764ffd78b Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 2 Feb 2012 12:04:01 +0100 Subject: [PATCH 1/5] perf: Remove deprecated WARN_ON_ONCE() With the new throttling/unthrottling code introduced with commit: e050e3f0a71b ("perf: Fix broken interrupt rate throttling") we occasionally hit two WARN_ON_ONCE() checks in: - intel_pmu_pebs_enable() - intel_pmu_lbr_enable() - x86_pmu_start() The assertions are no longer problematic. There is a valid path where they can trigger but it is harmless. The assertion can be triggered with: $ perf record -e instructions:pp .... Leading to paths: intel_pmu_pebs_enable intel_pmu_enable_event x86_perf_event_set_period x86_pmu_start perf_adjust_freq_unthr_context perf_event_task_tick scheduler_tick And: intel_pmu_lbr_enable intel_pmu_enable_event x86_perf_event_set_period x86_pmu_start perf_adjust_freq_unthr_context. perf_event_task_tick scheduler_tick cpuc->enabled is always on because when we get to perf_adjust_freq_unthr_context() the PMU is not totally disabled. Furthermore when we need to adjust a period, we only stop the event we need to change and not the entire PMU. Thus, when we re-enable, cpuc->enabled is already set. Note that when we stop the event, both pebs and lbr are stopped if necessary (and possible). Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/20120202110401.GA30911@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 1 - arch/x86/kernel/cpu/perf_event_intel_lbr.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5adce1040b11..2a30e5ae6acf 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -986,9 +986,6 @@ static void x86_pmu_start(struct perf_event *event, int flags) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - if (WARN_ON_ONCE(idx == -1)) return; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 73da6b64f5b7..d6bd49faa40c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -439,7 +439,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; cpuc->pebs_enabled |= 1ULL << hwc->idx; - WARN_ON_ONCE(cpuc->enabled); if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) intel_pmu_lbr_enable(event); diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 3fab3de3ce96..47a7e63bfe54 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -72,8 +72,6 @@ void intel_pmu_lbr_enable(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - WARN_ON_ONCE(cpuc->enabled); - /* * Reset the LBR stack if we changed task context to * avoid data leaks. From 7a0153ee15575a4d07b5da8c96b79e0b0fd41a12 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Feb 2012 18:54:06 -0200 Subject: [PATCH 2/5] perf tools: Fix perf stack to non executable on x86_64 By adding following objects: bench/mem-memcpy-x86-64-asm.o the x86_64 perf binary ended up with executable stack. The reason was that above object are assembler sourced and is missing the GNU-stack note section. In such case the linker assumes that the final binary should not be restricted at all and mark the stack as RWX. Adding section ".note.GNU-stack" definition to mentioned object, with all flags disabled, thus omiting this object from linker stack flags decision. Problem introduced in: $ git describe ea7872b v2.6.37-rc2-19-gea7872b Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=783570 Reported-by: Clark Williams Acked-by: Eric Dumazet Cc: Corey Ashford Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1328100848-5630-1-git-send-email-jolsa@redhat.com Signed-off-by: Jiri Olsa [ committer note: Backported fix to perf/urgent (3.3-rc2+) ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/mem-memcpy-x86-64-asm.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index a57b66e853c2..185a96d66dd1 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,2 +1,8 @@ #include "../../../arch/x86/lib/memcpy_64.S" +/* + * We need to provide note.GNU-stack section, saying that we want + * NOT executable stack. Otherwise the final linking will assume that + * the ELF stack should not be restricted at all and set it RWX. + */ +.section .note.GNU-stack,"",@progbits From bf32c9ebc9890764c7a9984e3a0c8a57a059753d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 5 Feb 2012 18:11:05 +0100 Subject: [PATCH 3/5] perf tools: Fix prefix matching for kernel maps In some perf ancient versions we used '[kernel.kallsyms._text]' as the name for the kernel map. This got changed with commit: perf: 'perf kvm' tool for monitoring guest performance from host commit a1645ce12adb6c9cc9e19d7695466204e3f017fe Author: Zhang, Yanmin and we started to use following name '[kernel.kallsyms]_text'. This name change is important for the report code dealing with ancient perf data. When processing the kernel map event, we need to recognize the old naming (dont match the last ']') and initialize the kernel map correctly. The subsequent call to maps__set_kallsyms_ref_reloc_sym deals with the superfluous ']' to get correct symbol name. Cc: Corey Ashford Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1328461865-6127-1-git-send-email-jolsa@redhat.com Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 73ddaf06b8e7..2044324b755a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -554,7 +554,7 @@ static int perf_event__process_kernel_mmap(struct perf_tool *tool __used, is_kernel_mmap = memcmp(event->mmap.filename, kmmap_prefix, - strlen(kmmap_prefix)) == 0; + strlen(kmmap_prefix) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { From a4a03fc7ef89020baca4f19174e6a43767c6d78a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 3 Feb 2012 22:31:13 +0530 Subject: [PATCH 4/5] perf evsel: Fix an issue where perf report fails to show the proper percentage This patch fixes an issue where perf report shows nan% for certain perf.data files. The below is from a report for a do_fork probe: -nan% sshd [kernel.kallsyms] [k] do_fork -nan% packagekitd [kernel.kallsyms] [k] do_fork -nan% dbus-daemon [kernel.kallsyms] [k] do_fork -nan% bash [kernel.kallsyms] [k] do_fork A git bisect shows commit f3bda2c as the cause. However, looking back through the git history, I saw commit 640c03c which seems to have removed the required initialization for perf_sample->period. The problem only started showing after commit f3bda2c. The below patch re-introduces the initialization and it fixes the problem for me. With the below patch, for the same perf.data: 73.08% bash [kernel.kallsyms] [k] do_fork 8.97% 11-dhclient [kernel.kallsyms] [k] do_fork 6.41% sshd [kernel.kallsyms] [k] do_fork 3.85% 20-chrony [kernel.kallsyms] [k] do_fork 2.56% sendmail [kernel.kallsyms] [k] do_fork This patch applies over current linux-tip commit 9949284. Problem introduced in: $ git describe 640c03c v2.6.37-rc3-83-g640c03c Cc: Ananth N Mavinakayanahalli Cc: Ingo Molnar Cc: Robert Richter Cc: Srikar Dronamraju Cc: stable@kernel.org Link: http://lkml.kernel.org/r/20120203170113.5190.25558.stgit@localhost6.localdomain6 Signed-off-by: Naveen N. Rao Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 667f3b78bb2c..7132ee834e0e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -463,6 +463,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, memset(data, 0, sizeof(*data)); data->cpu = data->pid = data->tid = -1; data->stream_id = data->id = data->time = -1ULL; + data->period = 1; if (event->header.type != PERF_RECORD_SAMPLE) { if (!sample_id_all) From f39d47ff819ed52a2afbdbecbe35f23f7755f58d Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 7 Feb 2012 14:39:57 +0100 Subject: [PATCH 5/5] perf: Fix double start/stop in x86_pmu_start() The following patch fixes a bug introduced by the following commit: e050e3f0a71b ("perf: Fix broken interrupt rate throttling") The patch caused the following warning to pop up depending on the sampling frequency adjustments: ------------[ cut here ]------------ WARNING: at arch/x86/kernel/cpu/perf_event.c:995 x86_pmu_start+0x79/0xd4() It was caused by the following call sequence: perf_adjust_freq_unthr_context.part() { stop() if (delta > 0) { perf_adjust_period() { if (period > 8*...) { stop() ... start() } } } start() } Which caused a double start and a double stop, thus triggering the assert in x86_pmu_start(). The patch fixes the problem by avoiding the double calls. We pass a new argument to perf_adjust_period() to indicate whether or not the event is already stopped. We can't just remove the start/stop from that function because it's called from __perf_event_overflow where the event needs to be reloaded via a stop/start back-toback call. The patch reintroduces the assertion in x86_pmu_start() which was removed by commit: 84f2b9b ("perf: Remove deprecated WARN_ON_ONCE()") In this second version, we've added calls to disable/enable PMU during unthrottling or frequency adjustment based on bug report of spurious NMI interrupts from Eric Dumazet. Reported-and-tested-by: Eric Dumazet Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra Cc: markus@trippelsdorf.de Cc: paulus@samba.org Link: http://lkml.kernel.org/r/20120207133956.GA4932@quad [ Minor edits to the changelog and to the code ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ kernel/events/core.c | 19 ++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2a30e5ae6acf..5adce1040b11 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -986,6 +986,9 @@ static void x86_pmu_start(struct perf_event *event, int flags) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + if (WARN_ON_ONCE(idx == -1)) return; diff --git a/kernel/events/core.c b/kernel/events/core.c index ba36013cfb21..1b5c081d8b9f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2303,7 +2303,7 @@ do { \ static DEFINE_PER_CPU(int, perf_throttled_count); static DEFINE_PER_CPU(u64, perf_throttled_seq); -static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) +static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable) { struct hw_perf_event *hwc = &event->hw; s64 period, sample_period; @@ -2322,9 +2322,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; if (local64_read(&hwc->period_left) > 8*sample_period) { - event->pmu->stop(event, PERF_EF_UPDATE); + if (disable) + event->pmu->stop(event, PERF_EF_UPDATE); + local64_set(&hwc->period_left, 0); - event->pmu->start(event, PERF_EF_RELOAD); + + if (disable) + event->pmu->start(event, PERF_EF_RELOAD); } } @@ -2350,6 +2354,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, return; raw_spin_lock(&ctx->lock); + perf_pmu_disable(ctx->pmu); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->state != PERF_EVENT_STATE_ACTIVE) @@ -2381,13 +2386,17 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, /* * restart the event * reload only if value has changed + * we have stopped the event so tell that + * to perf_adjust_period() to avoid stopping it + * twice. */ if (delta > 0) - perf_adjust_period(event, period, delta); + perf_adjust_period(event, period, delta, false); event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); } + perf_pmu_enable(ctx->pmu); raw_spin_unlock(&ctx->lock); } @@ -4562,7 +4571,7 @@ static int __perf_event_overflow(struct perf_event *event, hwc->freq_time_stamp = now; if (delta > 0 && delta < 2*TICK_NSEC) - perf_adjust_period(event, delta, hwc->last_period); + perf_adjust_period(event, delta, hwc->last_period, true); } /*