perf/core improvements and fixes:

Improvements:
 
 - Improve error message when analyzing file with required events in
   'perf sched timehist' (David Ahern)
 
 Fixes:
 
 - Force fixdep compilation to be done at the start of the build, fixing
   some build race conditions in high core count machines (Jiri Olsa)
 
 - Fix handling a zero sample->tid in 'perf sched timehist', as
   sometimes that isn't the idle thread (Namhyung Kim)
 
 Infrastructure:
 
 - Check minimal accepted LLVM version in its feature check, 3.9 at this
   time (Wang Nan)
 
 Documentation:
 
 - Explicitly document that --children is enabled by default (Yannick Brosseau)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJYSDpEAAoJENZQFvNTUqpAz94QAI7XjpSdVfwzeTB3OaMXQAdE
 wyok0AVA2p6yCzdxkeA06WoBin3clOiv4krBtiOxxyuM1TTaCFlLlLaK8zRfB3Xa
 R4eHMjItSoWqPvj4CYSlqoe+689Bo6yzo2szjPTt5E1exbUsVTfpSb+ALOZBZb26
 W6NOKE5us0JpoLJoESeWmMNZpeG0HMWfgmv7HSjXXecPAqXHgl6V2vNE5DRRJj2q
 2+0saIm1llU8/0OX9dp3cUaiwKjB0ZzyR3u7i4DLBvxrIoNHiv7qq7+iz19hviBV
 sHIndLpjXR5lehMN4GoSZJr/oCAXmGHF4F1dY8SNbHRbhtunKOjyQytPX1w6fw3+
 BVYInQi81CDsyOyvzRVmRzJKpLAJmnfmdBa3kpG3peimH0xwDXhDbYUgnWTW2RTs
 le2ryQLXSnGGLXfbZrLFnJ1UYSgtDwEZDrK6G7619QxllAZUvRN8kUG1kkwXehmG
 O/5N4A54VaEfV/9jszdEUcsHdYPnq1W6q+XQujQDfuiPXw7e86s+HLVhD7WK1YZa
 vD9+zdQw4bpdkst5lM9IKkpWUlwdysxIXDaSZ6w7ciNbGkMJrmX7c8okP/uUhqlb
 zIIqLh/Ny2v+NBKThhmX2bLQFW+L4qx1uZe/6yTd6vMNfslRxURBZgiISw3LEmpg
 VAamH2AxYOnmn4mQaRMJ
 =sqh9
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-20161207' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

Improvements:

 - Improve error message when analyzing file with required events in
   'perf sched timehist' (David Ahern)

Fixes:

 - Force fixdep compilation to be done at the start of the build, fixing
   some build race conditions in high core count machines (Jiri Olsa)

 - Fix handling a zero sample->tid in 'perf sched timehist', as
   sometimes that isn't the idle thread (Namhyung Kim)

Infrastructure changes:

 - Check minimal accepted LLVM version in its feature check, 3.9 at this
   time (Wang Nan)

Documentation changes:

 - Explicitly document that --children is enabled by default (Yannick Brosseau)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2016-12-07 19:16:54 +01:00
commit 080c25914e
10 changed files with 122 additions and 38 deletions

View File

@ -231,14 +231,18 @@ $(OUTPUT)test-jvmti.bin:
$(BUILD)
$(OUTPUT)test-llvm.bin:
$(BUILDXX) -std=gnu++11 \
$(BUILDXX) -std=gnu++11 \
-I$(shell $(LLVM_CONFIG) --includedir) \
-L$(shell $(LLVM_CONFIG) --libdir) \
$(shell $(LLVM_CONFIG) --libs Core BPF) \
$(shell $(LLVM_CONFIG) --system-libs)
$(OUTPUT)test-llvm-version.bin:
$(BUILDXX) -std=gnu++11 \
-I$(shell $(LLVM_CONFIG) --includedir)
$(OUTPUT)test-clang.bin:
$(BUILDXX) -std=gnu++11 \
$(BUILDXX) -std=gnu++11 \
-I$(shell $(LLVM_CONFIG) --includedir) \
-L$(shell $(LLVM_CONFIG) --libdir) \
-Wl,--start-group -lclangBasic -lclangDriver \

View File

@ -0,0 +1,11 @@
#include <cstdio>
#include "llvm/Config/llvm-config.h"
#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH)
#define pass int main() {printf("%x\n", NUM_VERSION); return 0;}
#if NUM_VERSION >= 0x030900
pass
#else
# error This LLVM is not tested yet.
#endif

View File

@ -1,5 +1,10 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH)
#if NUM_VERSION < 0x030900
# error "LLVM version too low"
#endif
int main()
{
llvm::errs() << "Hello World!\n";

View File

@ -239,7 +239,8 @@ OPTIONS
Accumulate callchain of children to parent entry so that then can
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires callchains are recorded.
See the `overhead calculation' section for more details.
See the `overhead calculation' section for more details. Enabled by
default, disable with --no-children.
--max-stack::
Set the stack depth limit when parsing the callchain, anything

View File

@ -170,6 +170,7 @@ Default is to monitor all CPUS.
show up in the output. The output will have a new "Children" column
and will be sorted on the data. It requires -g/--call-graph option
enabled. See the `overhead calculation' section for more details.
Enabled by default, disable with --no-children.
--max-stack::
Set the stack depth limit when parsing the callchain, anything

View File

@ -802,12 +802,13 @@ ifdef LIBCLANGLLVM
msg := $(warning No g++ found, disable clang and llvm support. Please install g++)
else
$(call feature_check,llvm)
$(call feature_check,llvm-version)
ifneq ($(feature-llvm), 1)
msg := $(warning No libLLVM found, disable clang and llvm support. Please install llvm-dev)
msg := $(warning No suitable libLLVM found, disabling builtin clang and LLVM support. Please install llvm-dev(el) (>= 3.9.0))
else
$(call feature_check,clang)
ifneq ($(feature-clang), 1)
msg := $(warning No libclang found, disable clang and llvm support. Please install libclang-dev)
msg := $(warning No suitable libclang found, disabling builtin clang and LLVM support. Please install libclang-dev(el) (>= 3.9.0))
else
CFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT
CXXFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT -I$(shell $(LLVM_CONFIG) --includedir)
@ -816,6 +817,9 @@ ifdef LIBCLANGLLVM
USE_CXX = 1
USE_LLVM = 1
USE_CLANG = 1
ifneq ($(feature-llvm-version),1)
msg := $(warning This version of LLVM is not tested. May cause build errors)
endif
endif
endif
endif

View File

@ -128,10 +128,6 @@ endif
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
$(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
$(Q)touch $(OUTPUT)PERF-VERSION-FILE
# Makefiles suck: This macro sets a default value of $(2) for the
# variable named by $(1), unless the variable has been set by
# environment or command line. This is necessary for CC and AR
@ -168,11 +164,6 @@ BISON = bison
STRIP = strip
AWK = awk
LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
# include Makefile.config by default and rule out
# non-config cases
config := 1
@ -185,6 +176,40 @@ ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
endif
endif
# The fixdep build - we force fixdep tool to be built as
# the first target in the separate make session not to be
# disturbed by any parallel make jobs. Once fixdep is done
# we issue the requested build with FIXDEP=1 variable.
#
# The fixdep build is disabled for $(NON_CONFIG_TARGETS)
# targets, because it's not necessary.
ifdef FIXDEP
force_fixdep := 0
else
force_fixdep := $(config)
endif
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
export HOSTCC HOSTLD HOSTAR
include $(srctree)/tools/build/Makefile.include
ifeq ($(force_fixdep),1)
goals := $(filter-out all sub-make, $(MAKECMDGOALS))
$(goals) all: sub-make
sub-make: fixdep
$(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
else # force_fixdep
LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
# Without this setting the output feature dump file misses some features, for
# example, liberty. Select all checkers so we won't get an incomplete feature
@ -369,10 +394,6 @@ strip: $(PROGRAMS) $(OUTPUT)perf
PERF_IN := $(OUTPUT)perf-in.o
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
export HOSTCC HOSTLD HOSTAR
include $(srctree)/tools/build/Makefile.include
JEVENTS := $(OUTPUT)pmu-events/jevents
JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
@ -491,7 +512,7 @@ $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_L
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
$(GTK_IN): fixdep FORCE
$(GTK_IN): FORCE
$(Q)$(MAKE) $(build)=gtk
$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
@ -505,6 +526,10 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
$(SCRIPTS) : % : %.sh
$(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
$(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
$(Q)touch $(OUTPUT)PERF-VERSION-FILE
# These can record PERF_VERSION
perf.spec $(SCRIPTS) \
: $(OUTPUT)PERF-VERSION-FILE
@ -536,7 +561,7 @@ endif
__build-dir = $(subst $(OUTPUT),,$(dir $@))
build-dir = $(if $(__build-dir),$(__build-dir),.)
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders
$(OUTPUT)%.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@ -586,7 +611,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
LIBPERF_IN := $(OUTPUT)libperf-in.o
$(LIBPERF_IN): prepare fixdep FORCE
$(LIBPERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIB_FILE): $(LIBPERF_IN)
@ -594,10 +619,10 @@ $(LIB_FILE): $(LIBPERF_IN)
LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
$(LIBTRACEEVENT): fixdep FORCE
$(LIBTRACEEVENT): FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
libtraceevent_plugins: fixdep FORCE
libtraceevent_plugins: FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
@ -610,21 +635,21 @@ $(LIBTRACEEVENT)-clean:
install-traceevent-plugins: libtraceevent_plugins
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
$(LIBAPI): fixdep FORCE
$(LIBAPI): FORCE
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
$(LIBAPI)-clean:
$(call QUIET_CLEAN, libapi)
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBBPF): fixdep FORCE
$(LIBBPF): FORCE
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
$(LIBBPF)-clean:
$(call QUIET_CLEAN, libbpf)
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBSUBCMD): fixdep FORCE
$(LIBSUBCMD): FORCE
$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
$(LIBSUBCMD)-clean:
@ -832,3 +857,4 @@ FORCE:
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
.PHONY: libtraceevent_plugins archheaders
endif # force_fixdep

View File

@ -2010,7 +2010,7 @@ static int init_idle_threads(int ncpu)
if (!idle_threads)
return -ENOMEM;
idle_max_cpu = ncpu - 1;
idle_max_cpu = ncpu;
/* allocate the actual thread struct if needed */
for (i = 0; i < ncpu; ++i) {
@ -2031,7 +2031,7 @@ static void free_idle_threads(void)
if (idle_threads == NULL)
return;
for (i = 0; i <= idle_max_cpu; ++i) {
for (i = 0; i < idle_max_cpu; ++i) {
if ((idle_threads[i]))
thread__delete(idle_threads[i]);
}
@ -2054,8 +2054,7 @@ static struct thread *get_idle_thread(int cpu)
return NULL;
idle_threads = (struct thread **) p;
i = idle_max_cpu ? idle_max_cpu + 1 : 0;
for (; i < j; ++i)
for (i = idle_max_cpu; i < j; ++i)
idle_threads[i] = NULL;
idle_max_cpu = j;
@ -2118,7 +2117,9 @@ static struct thread *timehist_get_thread(struct perf_sched *sched,
pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
} else {
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
/* there were samples with tid 0 but non-zero pid */
thread = machine__findnew_thread(machine, sample->pid,
sample->tid ?: sample->pid);
if (thread == NULL) {
pr_debug("Failed to get thread for tid %d. skipping sample.\n",
sample->tid);
@ -2493,7 +2494,7 @@ static void timehist_print_summary(struct perf_sched *sched,
return;
printf("\nIdle stats:\n");
for (i = 0; i <= idle_max_cpu; ++i) {
for (i = 0; i < idle_max_cpu; ++i) {
t = idle_threads[i];
if (!t)
continue;
@ -2583,6 +2584,7 @@ static int perf_sched__timehist(struct perf_sched *sched)
struct perf_data_file file = {
.path = input_name,
.mode = PERF_DATA_MODE_READ,
.force = sched->force,
};
struct perf_session *session;
@ -2629,8 +2631,12 @@ static int perf_sched__timehist(struct perf_sched *sched)
if (perf_session__set_tracepoints_handlers(session, handlers))
goto out;
if (!perf_session__has_traces(session, "record -R"))
/* sched_switch event at a minimum needs to exist */
if (!perf_evlist__find_tracepoint_by_name(session->evlist,
"sched:sched_switch")) {
pr_err("No sched_switch events found. Have you run 'perf sched record'?\n");
goto out;
}
if (sched->show_migrations &&
perf_session__set_tracepoints_handlers(session, migrate_handlers))
@ -2984,6 +2990,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
OPT_END()
};
const struct option latency_options[] = {
@ -2991,8 +2998,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"sort by key(s): runtime, switch, avg, max"),
OPT_INTEGER('C', "CPU", &sched.profile_cpu,
"CPU to profile on"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN('p', "pids", &sched.skip_merge,
"latency stats per pid instead of per comm"),
OPT_PARENT(sched_options)
@ -3000,9 +3005,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
const struct option replay_options[] = {
OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
"repeat the workload replay N times (-1: infinite)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
OPT_PARENT(sched_options)
};
const struct option map_options[] = {

View File

@ -1234,3 +1234,30 @@ out:
}
return -ENOMEM;
}
int callchain_cursor__copy(struct callchain_cursor *dst,
struct callchain_cursor *src)
{
int rc = 0;
callchain_cursor_reset(dst);
callchain_cursor_commit(src);
while (true) {
struct callchain_cursor_node *node;
node = callchain_cursor_current(src);
if (node == NULL)
break;
rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
node->branch, &node->branch_flags,
node->nr_loop_iter, node->samples);
if (rc)
break;
callchain_cursor_advance(src);
}
return rc;
}

View File

@ -216,6 +216,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
cursor->pos++;
}
int callchain_cursor__copy(struct callchain_cursor *dst,
struct callchain_cursor *src);
struct option;
struct hist_entry;