perf/urgent fixes:

- Fix probing of precise_ip level for default cycles event, that
   got broken recently on x86_64 when its arch code started
   considering invalid requesting precise samples when not sampling
   (i.e. when attr.sample_period == 0).
 
   This also fixes another problem in s/390 where the precision
   probing with sample_period == 0 returned precise_ip > 0, that
   then, when setting up the real cycles event (not probing) would
   return EOPNOTSUPP for precise_ip > 0 (as determined previously
   by probing) and sample_period > 0.
 
   These problems resulted in attr_precise not being set to the
   highest precision available on x86.64 when no event was specified,
   i.e. the canonical:
 
 	perf record ./workload
 
   would end up using attr.precise_ip = 0. As a workaround this would
   need to be done:
 
 	perf record -e cycles:P ./workload
 
   And on s/390 it would plain not work, requiring using:
 
         perf record -e cycles ./workload
 
   as a workaround.  (Arnaldo Carvalho de Melo)
 
 - Fix perf build with ARCH=x86_64, when ARCH should be transformed
   into ARCH=x86, just like with the main kernel Makefile and
   tools/objtool's, i.e. use SRCARCH. (Jiada Wang)
 
 - Avoid accessing uninitialized data structures when unwinding with
   elfutils's libdw, making it more closely mimic libunwind's unwinder.
   (Milian Wolff)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJZRCRCAAoJENZQFvNTUqpAQ/gP/2JLe5ZAIxybFIuCnoiO5uCT
 mJ5R6222pi2Nk21n+ys5Fo0/vp5Tu8eGUj30tFvNJ9jh7Ut1aIJNuxgEy7moJn59
 biniX35ttX/a67WK5wZ1rdH5ATzoZWpSESStiAMiGdNEmUaTGn8k3xA++HflJNq4
 aYCYAD2UabfKo61ZfRzpgtHf5dr6y1WQ3mVS+1CwoezMHxhBEMcyv1hvKO+u1JIW
 n59bgFaVlomg8jaEyRSIZsuvUr2Sy8EUoO6U8IH+ghqcLoiBxxKT4Q9rScKbef0J
 ZRgO01ntuorJSiE55WB4yA0oSmfl7nWdbO2XbDQs/Wp7tjTc+CAmtDGBUxM1cg6t
 5ibuiMRjaA/2zI9qH2LbYLmno/PeS9t6PW9RpTLWIrjv4IF2p3pCJLECUY9GWZqD
 HdUOcxmXxItkeTKxibK4eFg7of07bh/Z9REWxo5+sSyQnb/AJ/kkrGdtWOyeSzhy
 2yKpbt+ZAvsr9KGvLmO0cjMxmeaoFY94SBHVmxto9+87t3X/1Bz/C9r4a34Kz05c
 ia7MNz04je3v4DuauOvB28vxRHUaLNacDGhiH8eKVb/mE51I9yoWFCOmFE70/OUd
 NOPv7/i4CxnrHe8dVQrapm9YY2i9F2BUWEuzseA+kmPAK0So6baMBbmq4T+DfT2I
 kQw3gMUQ9GBWp8STn4Vo
 =vT1k
 -----END PGP SIGNATURE-----

Merge tag 'perf-urgent-for-mingo-4.12-20170616' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/urgent fixes from Arnaldo Carvalho de Melo:

- Fix probing of precise_ip level for default cycles event, that
  got broken recently on x86_64 when its arch code started
  considering invalid requesting precise samples when not sampling
  (i.e. when attr.sample_period == 0).

  This also fixes another problem in s/390 where the precision
  probing with sample_period == 0 returned precise_ip > 0, that
  then, when setting up the real cycles event (not probing) would
  return EOPNOTSUPP for precise_ip > 0 (as determined previously
  by probing) and sample_period > 0.

  These problems resulted in attr_precise not being set to the
  highest precision available on x86.64 when no event was specified,
  i.e. the canonical:

	perf record ./workload

  would end up using attr.precise_ip = 0. As a workaround this would
  need to be done:

	perf record -e cycles:P ./workload

  And on s/390 it would plain not work, requiring using:

        perf record -e cycles ./workload

  as a workaround.  (Arnaldo Carvalho de Melo)

- Fix perf build with ARCH=x86_64, when ARCH should be transformed
  into ARCH=x86, just like with the main kernel Makefile and
  tools/objtool's, i.e. use SRCARCH. (Jiada Wang)

- Avoid accessing uninitialized data structures when unwinding with
  elfutils's libdw, making it more closely mimic libunwind's unwinder.
  (Milian Wolff)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2017-06-16 21:33:48 +02:00
commit 531c221df1
9 changed files with 46 additions and 26 deletions

View File

@ -19,18 +19,18 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
include $(srctree)/tools/scripts/Makefile.arch
$(call detected_var,ARCH)
$(call detected_var,SRCARCH)
NO_PERF_REGS := 1
# Additional ARCH settings for ppc
ifeq ($(ARCH),powerpc)
ifeq ($(SRCARCH),powerpc)
NO_PERF_REGS := 0
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
endif
# Additional ARCH settings for x86
ifeq ($(ARCH),x86)
ifeq ($(SRCARCH),x86)
$(call detected,CONFIG_X86)
ifeq (${IS_64_BIT}, 1)
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
@ -43,12 +43,12 @@ ifeq ($(ARCH),x86)
NO_PERF_REGS := 0
endif
ifeq ($(ARCH),arm)
ifeq ($(SRCARCH),arm)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-arm
endif
ifeq ($(ARCH),arm64)
ifeq ($(SRCARCH),arm64)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
@ -61,7 +61,7 @@ endif
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
# to the check.
ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
NO_LIBDW_DWARF_UNWIND := 1
endif
@ -115,9 +115,9 @@ endif
FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
# include ARCH specific config
-include $(src-perf)/arch/$(ARCH)/Makefile
-include $(src-perf)/arch/$(SRCARCH)/Makefile
ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
@ -228,12 +228,12 @@ ifeq ($(DEBUG),0)
endif
INC_FLAGS += -I$(src-perf)/util/include
INC_FLAGS += -I$(src-perf)/arch/$(ARCH)/include
INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
INC_FLAGS += -I$(srctree)/tools/include/uapi
INC_FLAGS += -I$(srctree)/tools/include/
INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/uapi
INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/include/
INC_FLAGS += -I$(srctree)/tools/arch/$(ARCH)/
INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
# $(obj-perf) for generated common-cmds.h
# $(obj-perf)/util for generated bison/flex headers
@ -355,7 +355,7 @@ ifndef NO_LIBELF
ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
NO_DWARF := 1
else
CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
@ -380,7 +380,7 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_BPF_PROLOGUE
$(call detected,CONFIG_BPF_PROLOGUE)
else
msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset());
msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
endif
else
msg := $(warning DWARF support is off, BPF prologue is disabled);
@ -406,7 +406,7 @@ ifdef PERF_HAVE_JITDUMP
endif
endif
ifeq ($(ARCH),powerpc)
ifeq ($(SRCARCH),powerpc)
ifndef NO_DWARF
CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
endif
@ -487,7 +487,7 @@ else
endif
ifndef NO_LOCAL_LIBUNWIND
ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
$(call feature_check,libunwind-debug-frame)
ifneq ($(feature-libunwind-debug-frame), 1)
msg := $(warning No debug_frame support found in libunwind);
@ -740,7 +740,7 @@ ifeq (${IS_64_BIT}, 1)
NO_PERF_READ_VDSO32 := 1
endif
endif
ifneq ($(ARCH), x86)
ifneq ($(SRCARCH), x86)
NO_PERF_READ_VDSOX32 := 1
endif
ifndef NO_PERF_READ_VDSOX32
@ -769,7 +769,7 @@ ifdef LIBBABELTRACE
endif
ifndef NO_AUXTRACE
ifeq ($(ARCH),x86)
ifeq ($(SRCARCH),x86)
ifeq ($(feature-get_cpuid), 0)
msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
NO_AUXTRACE := 1
@ -872,7 +872,7 @@ sysconfdir = $(prefix)/etc
ETC_PERFCONFIG = etc/perfconfig
endif
ifndef lib
ifeq ($(ARCH)$(IS_64_BIT), x861)
ifeq ($(SRCARCH)$(IS_64_BIT), x861)
lib = lib64
else
lib = lib

View File

@ -226,7 +226,7 @@ endif
ifeq ($(config),0)
include $(srctree)/tools/scripts/Makefile.arch
-include arch/$(ARCH)/Makefile
-include arch/$(SRCARCH)/Makefile
endif
# The FEATURE_DUMP_EXPORT holds location of the actual

View File

@ -1,2 +1,2 @@
libperf-y += common.o
libperf-y += $(ARCH)/
libperf-y += $(SRCARCH)/

View File

@ -2,7 +2,7 @@ hostprogs := jevents
jevents-y += json.o jsmn.o jevents.o
pmu-events-y += pmu-events.o
JDIR = pmu-events/arch/$(ARCH)
JDIR = pmu-events/arch/$(SRCARCH)
JSON = $(shell [ -d $(JDIR) ] && \
find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
#
@ -10,4 +10,4 @@ JSON = $(shell [ -d $(JDIR) ] && \
# directory and create tables in pmu-events.c.
#
$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
$(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
$(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)

View File

@ -75,7 +75,7 @@ $(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/B
$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
$(Q)echo ';' >> $@
ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
endif

View File

@ -83,7 +83,7 @@ int test__task_exit(int subtest __maybe_unused)
evsel = perf_evlist__first(evlist);
evsel->attr.task = 1;
evsel->attr.sample_freq = 0;
evsel->attr.sample_freq = 1;
evsel->attr.inherit = 0;
evsel->attr.watermark = 0;
evsel->attr.wakeup_events = 1;

View File

@ -273,8 +273,20 @@ struct perf_evsel *perf_evsel__new_cycles(void)
struct perf_evsel *evsel;
event_attr_init(&attr);
/*
* Unnamed union member, not supported as struct member named
* initializer in older compilers such as gcc 4.4.7
*
* Just for probing the precise_ip:
*/
attr.sample_period = 1;
perf_event_attr__set_max_precise_ip(&attr);
/*
* Now let the usual logic to set up the perf_event_attr defaults
* to kick in when we return and before perf_evsel__open() is called.
*/
attr.sample_period = 0;
evsel = perf_evsel__new(&attr);
if (evsel == NULL)

View File

@ -841,7 +841,7 @@ static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
/*
* default get_cpuid(): nothing gets recorded
* actual implementation must be in arch/$(ARCH)/util/header.c
* actual implementation must be in arch/$(SRCARCH)/util/header.c
*/
int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
{

View File

@ -178,6 +178,14 @@ frame_callback(Dwfl_Frame *state, void *arg)
Dwarf_Addr pc;
bool isactivation;
if (!dwfl_frame_pc(state, &pc, NULL)) {
pr_err("%s", dwfl_errmsg(-1));
return DWARF_CB_ABORT;
}
// report the module before we query for isactivation
report_module(pc, ui);
if (!dwfl_frame_pc(state, &pc, &isactivation)) {
pr_err("%s", dwfl_errmsg(-1));
return DWARF_CB_ABORT;