diff --git a/tools/build/Build b/tools/build/Build new file mode 100644 index 000000000000..63a6c34c0c88 --- /dev/null +++ b/tools/build/Build @@ -0,0 +1 @@ +fixdep-y := fixdep.o diff --git a/tools/build/Build.include b/tools/build/Build.include index 4c8daaccb82a..4d000bc959b4 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -54,15 +54,26 @@ make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) # PHONY targets skipped in both cases. any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) +### +# Copy dependency data into .cmd file +# - gcc -M dependency info +# - command line to create object 'cmd_object :=' +dep-cmd = $(if $(wildcard $(fixdep)), \ + $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp; \ + rm -f $(depfile); \ + mv -f $(dot-target).tmp $(dot-target).cmd, \ + printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ + printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \ + cat $(depfile) >> $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + ### # if_changed_dep - execute command if any prerequisite is newer than # target, or command line has changed and update # dependencies in the cmd file if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ @set -e; \ - $(echo-cmd) $(cmd_$(1)); \ - cat $(depfile) > $(dot-target).cmd; \ - printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + $(echo-cmd) $(cmd_$(1)) && $(dep-cmd)) # if_changed - execute command if any prerequisite is newer than # target, or command line has changed diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index aa5e092c4352..a47bffbae159 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -11,8 +11,9 @@ Unlike the kernel we don't have a single build object 'obj-y' list that where we setup source objects, but we support more. This allows one 'Build' file to carry a sources list for multiple build objects. -a) Build framework makefiles ----------------------------- + +Build framework makefiles +------------------------- The build framework consists of 2 Makefiles: @@ -23,7 +24,7 @@ While the 'Build.include' file contains just some generic definitions, the 'Makefile.build' file is the makefile used from the outside. It's interface/usage is following: - $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) + $ make -f tools/build/Makefile.build srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) where: @@ -38,8 +39,9 @@ called $(OBJECT)-in.o: which includes all compiled sources described in 'Build' makefiles. -a) Build makefiles ------------------- + +Build makefiles +--------------- The user supplies 'Build' makefiles that contains a objects list, and connects the build to nested directories. @@ -95,8 +97,31 @@ It's only a matter of 2 single commands to create the final binaries: You can check the 'ex' example in 'tools/build/tests/ex' for more details. -b) Rules --------- + +Makefile.include +---------------- + +The tools/build/Makefile.include makefile could be included +via user makefiles to get usefull definitions. + +It defines following interface: + + - build macro definition: + build := -f $(srctree)/tools/build/Makefile.build dir=. obj + + to make it easier to invoke build like: + make $(build)=ex + + +Fixdep +------ +It is necessary to build the fixdep helper before invoking the build. +The Makefile.include file adds the fixdep target, that could be +invoked by the user. + + +Rules +----- The build framework provides standard compilation rules to handle .S and .c compilation. @@ -104,8 +129,9 @@ compilation. It's possible to include special rule if needed (like we do for flex or bison code generation). -c) CFLAGS ---------- + +CFLAGS +------ It's possible to alter the standard object C flags in the following way: @@ -115,8 +141,8 @@ It's possible to alter the standard object C flags in the following way: This C flags changes has the scope of the Build makefile they are defined in. -d) Dependencies ---------------- +Dependencies +------------ For each built object file 'a.o' the '.a.cmd' is created and holds: @@ -130,8 +156,8 @@ All existing '.cmd' files are included in the Build process to follow properly the dependencies and trigger a rebuild when necessary. -e) Single rules ---------------- +Single rules +------------ It's possible to build single object file by choice, like: diff --git a/tools/build/Makefile b/tools/build/Makefile new file mode 100644 index 000000000000..a93036272d43 --- /dev/null +++ b/tools/build/Makefile @@ -0,0 +1,43 @@ +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +include $(srctree)/tools//scripts/Makefile.include + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +ifeq ($(V),1) + Q = +else + Q = @ +endif + +export Q srctree CC LD + +MAKEFLAGS := --no-print-directory +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +all: fixdep + +clean: + $(call QUIET_CLEAN, fixdep) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)rm -f fixdep + +$(OUTPUT)fixdep-in.o: FORCE + $(Q)$(MAKE) $(build)=fixdep + +$(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o + $(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $< + +FORCE: + +.PHONY: FORCE diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 0c5f485521d6..4a96473b180f 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -21,6 +21,13 @@ endif build-dir := $(srctree)/tools/build +# Define $(fixdep) for dep-cmd function +ifeq ($(OUTPUT),) + fixdep := $(build-dir)/fixdep +else + fixdep := $(OUTPUT)/fixdep +endif + # Generic definitions include $(build-dir)/Build.include diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include new file mode 100644 index 000000000000..6572bb023543 --- /dev/null +++ b/tools/build/Makefile.include @@ -0,0 +1,6 @@ +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +fixdep: + $(Q)$(MAKE) -C $(srctree)/tools/build fixdep + +.PHONY: fixdep diff --git a/tools/build/fixdep.c b/tools/build/fixdep.c new file mode 100644 index 000000000000..1521d36cef0d --- /dev/null +++ b/tools/build/fixdep.c @@ -0,0 +1,168 @@ +/* + * "Optimize" a list of dependencies as spit out by gcc -MD + * for the build framework. + * + * Original author: + * Copyright 2002 by Kai Germaschewski + * + * This code has been borrowed from kbuild's fixdep (scripts/basic/fixdep.c), + * Please check it for detailed explanation. This fixdep borow only the + * base transformation of dependecies without the CONFIG mangle. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +char *target; +char *depfile; +char *cmdline; + +static void usage(void) +{ + fprintf(stderr, "Usage: fixdep \n"); + exit(1); +} + +/* + * Print out the commandline prefixed with cmd_ := + */ +static void print_cmdline(void) +{ + printf("cmd_%s := %s\n\n", target, cmdline); +} + +/* + * Important: The below generated source_foo.o and deps_foo.o variable + * assignments are parsed not only by make, but also by the rather simple + * parser in scripts/mod/sumversion.c. + */ +static void parse_dep_file(void *map, size_t len) +{ + char *m = map; + char *end = m + len; + char *p; + char s[PATH_MAX]; + int is_target; + int saw_any_target = 0; + int is_first_dep = 0; + + while (m < end) { + /* Skip any "white space" */ + while (m < end && (*m == ' ' || *m == '\\' || *m == '\n')) + m++; + /* Find next "white space" */ + p = m; + while (p < end && *p != ' ' && *p != '\\' && *p != '\n') + p++; + /* Is the token we found a target name? */ + is_target = (*(p-1) == ':'); + /* Don't write any target names into the dependency file */ + if (is_target) { + /* The /next/ file is the first dependency */ + is_first_dep = 1; + } else { + /* Save this token/filename */ + memcpy(s, m, p-m); + s[p - m] = 0; + + /* + * Do not list the source file as dependency, + * so that kbuild is not confused if a .c file + * is rewritten into .S or vice versa. Storing + * it in source_* is needed for modpost to + * compute srcversions. + */ + if (is_first_dep) { + /* + * If processing the concatenation of + * multiple dependency files, only + * process the first target name, which + * will be the original source name, + * and ignore any other target names, + * which will be intermediate temporary + * files. + */ + if (!saw_any_target) { + saw_any_target = 1; + printf("source_%s := %s\n\n", + target, s); + printf("deps_%s := \\\n", + target); + } + is_first_dep = 0; + } else + printf(" %s \\\n", s); + } + /* + * Start searching for next token immediately after the first + * "whitespace" character that follows this token. + */ + m = p + 1; + } + + if (!saw_any_target) { + fprintf(stderr, "fixdep: parse error; no targets found\n"); + exit(1); + } + + printf("\n%s: $(deps_%s)\n\n", target, target); + printf("$(deps_%s):\n", target); +} + +static void print_deps(void) +{ + struct stat st; + int fd; + void *map; + + fd = open(depfile, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "fixdep: error opening depfile: "); + perror(depfile); + exit(2); + } + if (fstat(fd, &st) < 0) { + fprintf(stderr, "fixdep: error fstat'ing depfile: "); + perror(depfile); + exit(2); + } + if (st.st_size == 0) { + fprintf(stderr, "fixdep: %s is empty\n", depfile); + close(fd); + return; + } + map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if ((long) map == -1) { + perror("fixdep: mmap"); + close(fd); + return; + } + + parse_dep_file(map, st.st_size); + + munmap(map, st.st_size); + + close(fd); +} + +int main(int argc, char **argv) +{ + if (argc != 4) + usage(); + + depfile = argv[1]; + target = argv[2]; + cmdline = argv[3]; + + print_cmdline(); + print_deps(); + + return 0; +} diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build index 429c7d452101..4d502f9b1a50 100644 --- a/tools/build/tests/ex/Build +++ b/tools/build/tests/ex/Build @@ -4,6 +4,7 @@ ex-y += b.o ex-y += b.o ex-y += empty/ ex-y += empty2/ +ex-y += inc.o libex-y += c.o libex-y += d.o diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile index 52d2476073a3..c50d5782ad5a 100644 --- a/tools/build/tests/ex/Makefile +++ b/tools/build/tests/ex/Makefile @@ -1,19 +1,22 @@ -export srctree := ../../../.. +export srctree := $(abspath ../../../..) export CC := gcc export LD := ld export AR := ar -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +ex: + +include $(srctree)/tools/build/Makefile.include + ex: ex-in.o libex-in.o gcc -o $@ $^ -ex.%: FORCE +ex.%: fixdep FORCE make -f $(srctree)/tools/build/Makefile.build dir=. $@ -ex-in.o: FORCE +ex-in.o: fixdep FORCE make $(build)=ex -libex-in.o: FORCE +libex-in.o: fixdep FORCE make $(build)=libex clean: diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c index dc42eb2e1a67..57de6074d252 100644 --- a/tools/build/tests/ex/ex.c +++ b/tools/build/tests/ex/ex.c @@ -5,6 +5,7 @@ int c(void); int d(void); int e(void); int f(void); +int inc(void); int main(void) { @@ -14,6 +15,7 @@ int main(void) d(); e(); f(); + inc(); return 0; } diff --git a/tools/build/tests/ex/inc.c b/tools/build/tests/ex/inc.c new file mode 100644 index 000000000000..c20f1e9033a3 --- /dev/null +++ b/tools/build/tests/ex/inc.c @@ -0,0 +1,8 @@ +#ifdef INCLUDE +#include "krava.h" +#endif + +int inc(void) +{ + return 0; +} diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh index 5494f8ea7567..44d2a0fade67 100755 --- a/tools/build/tests/run.sh +++ b/tools/build/tests/run.sh @@ -34,9 +34,36 @@ function test_ex_suffix { make -C ex V=1 clean > /dev/null 2>&1 rm -f ex.out } + +function test_ex_include { + make -C ex V=1 clean > ex.out 2>&1 + + # build with krava.h include + touch ex/krava.h + make -C ex V=1 CFLAGS=-DINCLUDE >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + # build without the include + rm -f ex/krava.h ex/ex + make -C ex V=1 >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} + echo -n Testing.. test_ex test_ex_suffix +test_ex_include echo OK diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index fe1b02c2c95b..d85904dc9b38 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -21,12 +21,14 @@ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f -build := -f $(srctree)/tools/build/Makefile.build dir=. obj API_IN := $(OUTPUT)libapi-in.o -export srctree OUTPUT CC LD CFLAGS V +all: -all: $(LIBFILE) +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include + +all: fixdep $(LIBFILE) $(API_IN): FORCE @$(MAKE) $(build)=libapi diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 38aca2dd1946..0406a7d5c891 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -12,12 +12,14 @@ #include "tracing_path.h" +char tracing_mnt[PATH_MAX + 1] = "/sys/kernel/debug"; char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing"; char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; static void __tracing_path_set(const char *tracing, const char *mountpoint) { + snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint); snprintf(tracing_path, sizeof(tracing_path), "%s/%s", mountpoint, tracing); snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", @@ -109,19 +111,10 @@ static int strerror_open(int err, char *buf, size_t size, const char *filename) "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); break; case EACCES: { - const char *mountpoint = debugfs__mountpoint(); - - if (!access(mountpoint, R_OK) && strncmp(filename, "tracing/", 8) == 0) { - const char *tracefs_mntpoint = tracefs__mountpoint(); - - if (tracefs_mntpoint) - mountpoint = tracefs__mountpoint(); - } - snprintf(buf, size, "Error:\tNo permissions to read %s/%s\n" "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - tracing_events_path, filename, mountpoint); + tracing_events_path, filename, tracing_mnt); } break; default: diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e630f9fc4fb6..fc9af57b666e 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -123,8 +123,10 @@ endif # the same command line setup. MAKEOVERRIDES= +all: + export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) @@ -133,7 +135,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: $(VERSION_FILES) all_cmd +all: fixdep $(VERSION_FILES) all_cmd all_cmd: $(CMD_TARGETS) diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 18ffccf00426..7e319afac78a 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -93,8 +93,10 @@ else print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif +all: + export srctree OUTPUT CC LD CFLAGS V -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include do_compile_shared_library = \ ($(print_shared_lib_compile) \ @@ -109,7 +111,7 @@ CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) -all: all_cmd +all: fixdep all_cmd all_cmd: $(CMD_TARGETS) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index c94c9de3173e..be764f9ec769 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -671,6 +671,7 @@ The letters are: e synthesize tracing error events d create a debug log g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) "Instructions" events look like they were recorded by "perf record -e instructions". @@ -707,12 +708,26 @@ on the sample is *not* adjusted and reflects the last known value of TSC. For Intel PT, the default period is 100us. +Setting it to a zero period means "as often as possible". + +In the case of Intel PT that is the same as a period of 1 and a unit of +'instructions' (i.e. --itrace=i1i). + Also the call chain size (default 16, max. 1024) for instructions or transactions events can be specified. e.g. --itrace=ig32 --itrace=xg32 +Also the number of last branch entries (default 64, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=il10 + --itrace=xl10 + +Note that last branch entries are cleared for each sample, so there is no overlap +from one sample to the next. + To disable trace decoding entirely, use the option --no-itrace. @@ -749,3 +764,32 @@ perf inject also accepts the --itrace option in which case tracing data is removed and replaced with the synthesized events. e.g. perf inject --itrace -i perf.data -o perf.data.new + +Below is an example of using Intel PT with autofdo. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) +amended to take the number of elements as a parameter. + + $ gcc-5 -O3 sort.c -o sort_optimized + $ ./sort_optimized 30000 + Bubble sorting array of 30000 elements + 2254 ms + + $ cat ~/.perfconfig + [intel-pt] + mispred-all + + $ perf record -e intel_pt//u ./sort 3000 + Bubble sorting array of 3000 elements + 58 ms + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 3.939 MB perf.data ] + $ perf inject -i perf.data -o inj --itrace=i100usle --strip + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ ./sort_autofdo 30000 + Bubble sorting array of 30000 elements + 2155 ms + +Note there is currently no advantage to using Intel PT instead of LBR, but +that may change in the future if greater use is made of the data. diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 2ff946677e3b..65453f4c7006 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -6,6 +6,7 @@ e synthesize error events d create a debug log g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) The default is all events i.e. the same as --itrace=ibxe @@ -20,3 +21,6 @@ Also the call chain size (default 16, max. 1024) for instructions or transactions events can be specified. + + Also the number of last branch entries (default 64, max. 1024) for + instructions or transactions events can be specified. diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 0c721c3e37e1..0b1cedeef895 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -50,6 +50,9 @@ OPTIONS include::itrace.txt[] +--strip:: + Use with --itrace to strip out non-synthesized events. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index dc3ec783b7bd..b3b42f9285df 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -249,6 +249,9 @@ include::itrace.txt[] --full-source-path:: Show the full path for source files for srcline output. +--ns:: + Use 9 decimal places when displaying time (i.e. show the nanoseconds) + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6c5c699002cb..56517d304772 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -297,16 +297,16 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK -build := -f $(srctree)/tools/build/Makefile.build dir=. obj +include $(srctree)/tools/build/Makefile.include -$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE +$(PERF_IN): prepare FORCE $(Q)$(MAKE) $(build)=perf $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ $(PERF_IN) $(LIBS) -o $@ -$(GTK_IN): FORCE +$(GTK_IN): fixdep FORCE $(Q)$(MAKE) $(build)=gtk $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) @@ -349,27 +349,27 @@ endif __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) -single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep -$(OUTPUT)%.o: %.c single_dep FORCE +$(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.i: %.c single_dep FORCE +$(OUTPUT)%.i: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.s: %.c single_dep FORCE +$(OUTPUT)%.s: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%-bison.o: %.c single_dep FORCE +$(OUTPUT)%-bison.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%-flex.o: %.c single_dep FORCE +$(OUTPUT)%-flex.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.o: %.S single_dep FORCE +$(OUTPUT)%.o: %.S prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)%.i: %.S single_dep FORCE +$(OUTPUT)%.i: %.S prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) @@ -389,7 +389,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o -$(LIBPERF_IN): FORCE +$(LIBPERF_IN): fixdep FORCE $(Q)$(MAKE) $(build)=libperf $(LIB_FILE): $(LIBPERF_IN) @@ -397,10 +397,10 @@ $(LIB_FILE): $(LIBPERF_IN) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): FORCE +$(LIBTRACEEVENT): fixdep FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a -libtraceevent_plugins: FORCE +libtraceevent_plugins: fixdep FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins @@ -413,7 +413,7 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: $(LIBTRACEEVENT) $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -$(LIBAPI): FORCE +$(LIBAPI): fixdep FORCE $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a $(LIBAPI)-clean: @@ -591,6 +591,6 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare .PHONY: libtraceevent_plugins diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f62c49b35be0..0a945d2e8ca5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -28,9 +28,11 @@ struct perf_inject { bool build_ids; bool sched_stat; bool have_auxtrace; + bool strip; const char *input_name; struct perf_data_file output; u64 bytes_written; + u64 aux_id; struct list_head samples; struct itrace_synth_opts itrace_synth_opts; }; @@ -176,6 +178,27 @@ static int perf_event__repipe(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +static int perf_event__drop(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static int perf_event__drop_aux(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + + if (!inject->aux_id) + inject->aux_id = sample->id; + + return 0; +} + typedef int (*inject_handler)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -466,6 +489,78 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel, return 0; } +static int drop_sample(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static void strip_init(struct perf_inject *inject) +{ + struct perf_evlist *evlist = inject->session->evlist; + struct perf_evsel *evsel; + + inject->tool.context_switch = perf_event__drop; + + evlist__for_each(evlist, evsel) + evsel->handler = drop_sample; +} + +static bool has_tracking(struct perf_evsel *evsel) +{ + return evsel->attr.mmap || evsel->attr.mmap2 || evsel->attr.comm || + evsel->attr.task; +} + +#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER) + +/* + * In order that the perf.data file is parsable, tracking events like MMAP need + * their selected event to exist, except if there is only 1 selected event left + * and it has a compatible sample type. + */ +static bool ok_to_remove(struct perf_evlist *evlist, + struct perf_evsel *evsel_to_remove) +{ + struct perf_evsel *evsel; + int cnt = 0; + bool ok = false; + + if (!has_tracking(evsel_to_remove)) + return true; + + evlist__for_each(evlist, evsel) { + if (evsel->handler != drop_sample) { + cnt += 1; + if ((evsel->attr.sample_type & COMPAT_MASK) == + (evsel_to_remove->attr.sample_type & COMPAT_MASK)) + ok = true; + } + } + + return ok && cnt == 1; +} + +static void strip_fini(struct perf_inject *inject) +{ + struct perf_evlist *evlist = inject->session->evlist; + struct perf_evsel *evsel, *tmp; + + /* Remove non-synthesized evsels if possible */ + evlist__for_each_safe(evlist, tmp, evsel) { + if (evsel->handler == drop_sample && + ok_to_remove(evlist, evsel)) { + pr_debug("Deleting %s\n", perf_evsel__name(evsel)); + perf_evlist__remove(evlist, evsel); + perf_evsel__delete(evsel); + } + } +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; @@ -512,10 +607,14 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.id_index = perf_event__repipe_id_index; inject->tool.auxtrace_info = perf_event__process_auxtrace_info; inject->tool.auxtrace = perf_event__process_auxtrace; + inject->tool.aux = perf_event__drop_aux; + inject->tool.itrace_start = perf_event__drop_aux, inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ output_data_offset = 4096; + if (inject->strip) + strip_init(inject); } if (!inject->itrace_synth_opts.set) @@ -535,11 +634,28 @@ static int __cmd_inject(struct perf_inject *inject) } /* * The AUX areas have been removed and replaced with - * synthesized hardware events, so clear the feature flag. + * synthesized hardware events, so clear the feature flag and + * remove the evsel. */ - if (inject->itrace_synth_opts.set) + if (inject->itrace_synth_opts.set) { + struct perf_evsel *evsel; + perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + if (inject->itrace_synth_opts.last_branch) + perf_header__set_feat(&session->header, + HEADER_BRANCH_STACK); + evsel = perf_evlist__id2evsel_strict(session->evlist, + inject->aux_id); + if (evsel) { + pr_debug("Deleting %s\n", + perf_evsel__name(evsel)); + perf_evlist__remove(session->evlist, evsel); + perf_evsel__delete(evsel); + } + if (inject->strip) + strip_fini(inject); + } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__write_header(session, session->evlist, fd, true); @@ -604,6 +720,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "strip", &inject.strip, + "strip non-synthesized events (use with --itrace)"), OPT_END() }; const char * const inject_usage[] = { @@ -619,6 +737,11 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(inject_usage, options); + if (inject.strip && !inject.itrace_synth_opts.set) { + pr_err("--strip option requires --itrace option\n"); + return -1; + } + if (perf_data_file__open(&inject.output)) { perror("failed to create output file"); return -1; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e4e3f1432622..37c9f5125887 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -163,14 +163,21 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) goto out_put; - if (sort__mode == SORT_MODE__BRANCH) + if (sort__mode == SORT_MODE__BRANCH) { + /* + * A non-synthesized event might not have a branch stack if + * branch stacks have been synthesized (using itrace options). + */ + if (!sample->branch_stack) + goto out_put; iter.ops = &hist_iter_branch; - else if (rep->mem_mode) + } else if (rep->mem_mode) { iter.ops = &hist_iter_mem; - else if (symbol_conf.cumulate_callchain) + } else if (symbol_conf.cumulate_callchain) { iter.ops = &hist_iter_cumulative; - else + } else { iter.ops = &hist_iter_normal; + } if (al.map != NULL) al.map->dso->hit = 1; @@ -214,6 +221,15 @@ static int report__setup_sample_type(struct report *rep) u64 sample_type = perf_evlist__combined_sample_type(session->evlist); bool is_pipe = perf_data_file__is_pipe(session->file); + if (session->itrace_synth_opts->callchain || + (!is_pipe && + perf_header__has_feat(&session->header, HEADER_AUXTRACE) && + !session->itrace_synth_opts->set)) + sample_type |= PERF_SAMPLE_CALLCHAIN; + + if (session->itrace_synth_opts->last_branch) + sample_type |= PERF_SAMPLE_BRANCH_STACK; + if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { ui__error("Selected --sort parent, but no " @@ -793,6 +809,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; + if (itrace_synth_opts.callchain && + (int)itrace_synth_opts.callchain_sz > report.max_stack) + report.max_stack = itrace_synth_opts.callchain_sz; + if (!input_name || !strlen(input_name)) { if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) input_name = "-"; @@ -820,6 +840,9 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); + if (itrace_synth_opts.last_branch) + has_br_stack = true; + /* * Branch mode is a tristate: * -1 means default, so decide based on the file having branch data. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 284a76e04628..8ce1c6bbfa45 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -29,9 +29,12 @@ static bool no_callchain; static bool latency_format; static bool system_wide; static bool print_flags; +static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; + enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -415,7 +418,10 @@ static void print_sample_start(struct perf_sample *sample, secs = nsecs / NSECS_PER_SEC; nsecs -= secs * NSECS_PER_SEC; usecs = nsecs / NSECS_PER_USEC; - printf("%5lu.%06lu: ", secs, usecs); + if (nanosecs) + printf("%5lu.%09llu: ", secs, nsecs); + else + printf("%5lu.%06lu: ", secs, usecs); } } @@ -471,7 +477,7 @@ static void print_sample_bts(union perf_event *event, } } perf_evsel__print_ip(evsel, sample, al, print_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } /* print branch_to information */ @@ -548,7 +554,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample, perf_evsel__print_ip(evsel, sample, al, output[attr->type].print_ip_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } if (PRINT_FIELD(IREGS)) @@ -1695,6 +1701,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, "Show context switch events (if recorded)"), OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN(0, "ns", &nanosecs, + "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), @@ -1740,6 +1748,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) } } + if (itrace_synth_opts.callchain && + itrace_synth_opts.callchain_sz > scripting_max_stack) + scripting_max_stack = itrace_synth_opts.callchain_sz; + /* make sure PERF_EXEC_PATH is set for scripts */ perf_set_argv_exec_path(perf_exec_path()); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bdaf44f24d5d..38d4d6cac823 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -655,7 +655,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) { const char *name = sym->name; - if (!map->dso->kernel) + if (!__map__is_kernel(map)) return 0; /* * ppc64 uses function descriptors and appends a '.' to the diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 84a32037a80f..1b02cdc0cab6 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -61,6 +61,142 @@ import datetime # # An example of using the database is provided by the script # call-graph-from-postgresql.py. Refer to that script for details. +# +# Tables: +# +# The tables largely correspond to perf tools' data structures. They are largely self-explanatory. +# +# samples +# +# 'samples' is the main table. It represents what instruction was executing at a point in time +# when something (a selected event) happened. The memory address is the instruction pointer or 'ip'. +# +# calls +# +# 'calls' represents function calls and is related to 'samples' by 'call_id' and 'return_id'. +# 'calls' is only created when the 'calls' option to this script is specified. +# +# call_paths +# +# 'call_paths' represents all the call stacks. Each 'call' has an associated record in 'call_paths'. +# 'calls_paths' is only created when the 'calls' option to this script is specified. +# +# branch_types +# +# 'branch_types' provides descriptions for each type of branch. +# +# comm_threads +# +# 'comm_threads' shows how 'comms' relates to 'threads'. +# +# comms +# +# 'comms' contains a record for each 'comm' - the name given to the executable that is running. +# +# dsos +# +# 'dsos' contains a record for each executable file or library. +# +# machines +# +# 'machines' can be used to distinguish virtual machines if virtualization is supported. +# +# selected_events +# +# 'selected_events' contains a record for each kind of event that has been sampled. +# +# symbols +# +# 'symbols' contains a record for each symbol. Only symbols that have samples are present. +# +# threads +# +# 'threads' contains a record for each thread. +# +# Views: +# +# Most of the tables have views for more friendly display. The views are: +# +# calls_view +# call_paths_view +# comm_threads_view +# dsos_view +# machines_view +# samples_view +# symbols_view +# threads_view +# +# More examples of browsing the database with psql: +# Note that some of the examples are not the most optimal SQL query. +# Note that call information is only available if the script's 'calls' option has been used. +# +# Top 10 function calls (not aggregated by symbol): +# +# SELECT * FROM calls_view ORDER BY elapsed_time DESC LIMIT 10; +# +# Top 10 function calls (aggregated by symbol): +# +# SELECT symbol_id,(SELECT name FROM symbols WHERE id = symbol_id) AS symbol, +# SUM(elapsed_time) AS tot_elapsed_time,SUM(branch_count) AS tot_branch_count +# FROM calls_view GROUP BY symbol_id ORDER BY tot_elapsed_time DESC LIMIT 10; +# +# Note that the branch count gives a rough estimation of cpu usage, so functions +# that took a long time but have a relatively low branch count must have spent time +# waiting. +# +# Find symbols by pattern matching on part of the name (e.g. names containing 'alloc'): +# +# SELECT * FROM symbols_view WHERE name LIKE '%alloc%'; +# +# Top 10 function calls for a specific symbol (e.g. whose symbol_id is 187): +# +# SELECT * FROM calls_view WHERE symbol_id = 187 ORDER BY elapsed_time DESC LIMIT 10; +# +# Show function calls made by function in the same context (i.e. same call path) (e.g. one with call_path_id 254): +# +# SELECT * FROM calls_view WHERE parent_call_path_id = 254; +# +# Show branches made during a function call (e.g. where call_id is 29357 and return_id is 29370 and tid is 29670) +# +# SELECT * FROM samples_view WHERE id >= 29357 AND id <= 29370 AND tid = 29670 AND event LIKE 'branches%'; +# +# Show transactions: +# +# SELECT * FROM samples_view WHERE event = 'transactions'; +# +# Note transaction start has 'in_tx' true whereas, transaction end has 'in_tx' false. +# Transaction aborts have branch_type_name 'transaction abort' +# +# Show transaction aborts: +# +# SELECT * FROM samples_view WHERE event = 'transactions' AND branch_type_name = 'transaction abort'; +# +# To print a call stack requires walking the call_paths table. For example this python script: +# #!/usr/bin/python2 +# +# import sys +# from PySide.QtSql import * +# +# if __name__ == '__main__': +# if (len(sys.argv) < 3): +# print >> sys.stderr, "Usage is: printcallstack.py " +# raise Exception("Too few arguments") +# dbname = sys.argv[1] +# call_path_id = sys.argv[2] +# db = QSqlDatabase.addDatabase('QPSQL') +# db.setDatabaseName(dbname) +# if not db.open(): +# raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) +# query = QSqlQuery(db) +# print " id ip symbol_id symbol dso_id dso_short_name" +# while call_path_id != 0 and call_path_id != 1: +# ret = query.exec_('SELECT * FROM call_paths_view WHERE id = ' + str(call_path_id)) +# if not ret: +# raise Exception("Query failed: " + query.lastError().text()) +# if not query.next(): +# raise Exception("Query failed") +# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5)) +# call_path_id = query.value(6) from PySide.QtSql import * @@ -244,6 +380,91 @@ if perf_db_export_calls: 'parent_call_path_id bigint,' 'flags integer)') +do_query(query, 'CREATE VIEW machines_view AS ' + 'SELECT ' + 'id,' + 'pid,' + 'root_dir,' + 'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest' + ' FROM machines') + +do_query(query, 'CREATE VIEW dsos_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'short_name,' + 'long_name,' + 'build_id' + ' FROM dsos') + +do_query(query, 'CREATE VIEW symbols_view AS ' + 'SELECT ' + 'id,' + 'name,' + '(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,' + 'dso_id,' + 'sym_start,' + 'sym_end,' + 'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding' + ' FROM symbols') + +do_query(query, 'CREATE VIEW threads_view AS ' + 'SELECT ' + 'id,' + 'machine_id,' + '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,' + 'process_id,' + 'pid,' + 'tid' + ' FROM threads') + +do_query(query, 'CREATE VIEW comm_threads_view AS ' + 'SELECT ' + 'comm_id,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid' + ' FROM comm_threads') + +if perf_db_export_calls: + do_query(query, 'CREATE VIEW call_paths_view AS ' + 'SELECT ' + 'c.id,' + 'to_hex(c.ip) AS ip,' + 'c.symbol_id,' + '(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,' + '(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,' + '(SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name,' + 'c.parent_id,' + 'to_hex(p.ip) AS parent_ip,' + 'p.symbol_id AS parent_symbol_id,' + '(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,' + '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,' + '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name' + ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id') + do_query(query, 'CREATE VIEW calls_view AS ' + 'SELECT ' + 'calls.id,' + 'thread_id,' + '(SELECT pid FROM threads WHERE id = thread_id) AS pid,' + '(SELECT tid FROM threads WHERE id = thread_id) AS tid,' + '(SELECT comm FROM comms WHERE id = comm_id) AS command,' + 'call_path_id,' + 'to_hex(ip) AS ip,' + 'symbol_id,' + '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,' + 'call_time,' + 'return_time,' + 'return_time - call_time AS elapsed_time,' + 'branch_count,' + 'call_id,' + 'return_id,' + 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'parent_call_path_id' + ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') + do_query(query, 'CREATE VIEW samples_view AS ' 'SELECT ' 'id,' diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e1f28f4cdc8e..a4e9b370c037 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1527,7 +1527,7 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, static int do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { - struct dso *dso = act->dso; + struct map *map = act->ms.map; if (browser->hists->dso_filter) { pstack__remove(browser->pstack, &browser->hists->dso_filter); @@ -1535,11 +1535,11 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) browser->hists->dso_filter = NULL; ui_helpline__pop(); } else { - if (dso == NULL) + if (map == NULL) return 0; ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", - dso->kernel ? "the Kernel" : dso->short_name); - browser->hists->dso_filter = dso; + __map__is_kernel(map) ? "the Kernel" : map->dso->short_name); + browser->hists->dso_filter = map->dso; perf_hpp__set_elide(HISTC_DSO, true); pstack__push(browser->pstack, &browser->hists->dso_filter); } @@ -1551,17 +1551,18 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, - char **optstr, struct dso *dso) + char **optstr, struct map *map) { - if (dso == NULL) + if (map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", browser->hists->dso_filter ? "out of" : "into", - dso->kernel ? "the Kernel" : dso->short_name) < 0) + __map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0) return 0; - act->dso = dso; + act->ms.map = map; + act->dso = map->dso; act->fn = do_zoom_dso; return 1; } @@ -1814,6 +1815,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, while (1) { struct thread *thread = NULL; struct dso *dso = NULL; + struct map *map = NULL; int choice = 0; int socked_id = -1; @@ -1823,7 +1825,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); - dso = browser->selection->map ? browser->selection->map->dso : NULL; + map = browser->selection->map; + if (map) + dso = map->dso; socked_id = browser->he_selection->socket; } switch (key) { @@ -2014,7 +2018,7 @@ skip_annotation: nr_options += add_thread_opt(browser, &actions[nr_options], &options[nr_options], thread); nr_options += add_dso_opt(browser, &actions[nr_options], - &options[nr_options], dso); + &options[nr_options], map); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], browser->selection ? diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a980e7c50ee0..7f10430af39c 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -926,6 +926,8 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool, #define PERF_ITRACE_DEFAULT_PERIOD 100000 #define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16 #define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024 +#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64 +#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024 void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) { @@ -936,6 +938,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; + synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; } /* @@ -950,6 +953,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, const char *p; char *endptr; bool period_type_set = false; + bool period_set = false; synth_opts->set = true; @@ -971,6 +975,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, p += 1; if (isdigit(*p)) { synth_opts->period = strtoull(p, &endptr, 10); + period_set = true; p = endptr; while (*p == ' ' || *p == ',') p += 1; @@ -1041,6 +1046,23 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->callchain_sz = val; } break; + case 'l': + synth_opts->last_branch = true; + synth_opts->last_branch_sz = + PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; + while (*p == ' ' || *p == ',') + p += 1; + if (isdigit(*p)) { + unsigned int val; + + val = strtoul(p, &endptr, 10); + p = endptr; + if (!val || + val > PERF_ITRACE_MAX_LAST_BRANCH_SZ) + goto out_err; + synth_opts->last_branch_sz = val; + } + break; case ' ': case ',': break; @@ -1053,7 +1075,7 @@ out: if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; - if (!synth_opts->period) + if (!period_set) synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; } diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index bf72b77a588a..b86f90db1352 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -63,7 +63,9 @@ enum itrace_period_type { * @calls: limit branch samples to calls (can be combined with @returns) * @returns: limit branch samples to returns (can be combined with @calls) * @callchain: add callchain to 'instructions' events + * @last_branch: add branch context to 'instruction' events * @callchain_sz: maximum callchain size + * @last_branch_sz: branch context size * @period: 'instructions' events period * @period_type: 'instructions' events period type */ @@ -79,7 +81,9 @@ struct itrace_synth_opts { bool calls; bool returns; bool callchain; + bool last_branch; unsigned int callchain_sz; + unsigned int last_branch_sz; unsigned long long period; enum itrace_period_type period_type; }; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6214ad47d554..b1bb348ec3b6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -378,7 +378,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, for (pos = maps__first(maps); pos; pos = map__next(pos)) { size_t size; - if (pos->dso->kernel) + if (__map__is_kernel(pos)) continue; size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index be5cbc7be889..a0dbcbd4f6d8 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -257,6 +257,7 @@ struct events_stats { u64 total_non_filtered_period; u64 total_lost; u64 total_lost_samples; + u64 total_aux_lost; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_non_filtered_samples; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a8643735dcea..89546228b8ed 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -165,6 +165,13 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) __perf_evlist__propagate_maps(evlist, entry); } +void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) +{ + evsel->evlist = NULL; + list_del_init(&evsel->node); + evlist->nr_entries -= 1; +} + void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list) { @@ -617,6 +624,21 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return NULL; } +struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, + u64 id) +{ + struct perf_sample_id *sid; + + if (!id) + return NULL; + + sid = perf_evlist__id2sid(evlist, id); + if (sid) + return sid->evsel; + + return NULL; +} + static int perf_evlist__event2id(struct perf_evlist *evlist, union perf_event *event, u64 *id) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 115d8b53c601..66bc9d4c0869 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -73,6 +73,7 @@ void perf_evlist__exit(struct perf_evlist *evlist); void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); +void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel); int perf_evlist__add_default(struct perf_evlist *evlist); int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); @@ -104,6 +105,8 @@ int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mas int perf_evlist__poll(struct perf_evlist *evlist, int timeout); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); +struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, + u64 id); struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b3567a25f0c4..0cad9e07c5b4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -695,7 +695,7 @@ iter_finish_normal_entry(struct hist_entry_iter *iter, } static int -iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, +iter_prepare_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { struct hist_entry **he_cache; @@ -707,7 +707,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, * cumulated only one time to prevent entries more than 100% * overhead. */ - he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1)); + he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1)); if (he_cache == NULL) return -ENOMEM; @@ -868,6 +868,8 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, if (err) return err; + iter->max_stack = max_stack_depth; + err = iter->ops->prepare_entry(iter, al); if (err) goto out; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4d6aa1dbdaee..8c20a8f6b214 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -90,6 +90,7 @@ struct hist_entry_iter { int curr; bool hide_unresolved; + int max_stack; struct perf_evsel *evsel; struct perf_sample *sample; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 22ba50224319..9409d014b46c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -650,7 +650,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) if (data->from_mtc && timestamp < data->timestamp && data->timestamp - timestamp < decoder->tsc_slip) return 1; - while (timestamp < data->timestamp) + if (timestamp < data->timestamp) timestamp += (1ULL << 56); if (pkt_info->last_packet_type != INTEL_PT_CYC) { if (data->from_mtc) @@ -1191,7 +1191,7 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) timestamp); timestamp = decoder->timestamp; } - while (timestamp < decoder->timestamp) { + if (timestamp < decoder->timestamp) { intel_pt_log_to("Wraparound timestamp", timestamp); timestamp += (1ULL << 56); decoder->tsc_timestamp = timestamp; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index d09c7d9f9050..319bef33a64b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -29,18 +29,18 @@ static FILE *f; static char log_name[MAX_LOG_NAME]; -static bool enable_logging; +bool intel_pt_enable_logging; void intel_pt_log_enable(void) { - enable_logging = true; + intel_pt_enable_logging = true; } void intel_pt_log_disable(void) { if (f) fflush(f); - enable_logging = false; + intel_pt_enable_logging = false; } void intel_pt_log_set_name(const char *name) @@ -80,7 +80,7 @@ static void intel_pt_print_no_data(uint64_t pos, int indent) static int intel_pt_log_open(void) { - if (!enable_logging) + if (!intel_pt_enable_logging) return -1; if (f) @@ -91,15 +91,15 @@ static int intel_pt_log_open(void) f = fopen(log_name, "w+"); if (!f) { - enable_logging = false; + intel_pt_enable_logging = false; return -1; } return 0; } -void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, - uint64_t pos, const unsigned char *buf) +void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf) { char desc[INTEL_PT_PKT_DESC_MAX]; @@ -111,7 +111,7 @@ void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, fprintf(f, "%s\n", desc); } -void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) { char desc[INTEL_PT_INSN_DESC_MAX]; size_t len = intel_pt_insn->length; @@ -128,7 +128,8 @@ void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) fprintf(f, "Bad instruction!\n"); } -void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) +void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip) { char desc[INTEL_PT_INSN_DESC_MAX]; @@ -142,7 +143,7 @@ void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) fprintf(f, "Bad instruction!\n"); } -void intel_pt_log(const char *fmt, ...) +void __intel_pt_log(const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index db3942f83677..debe751dc3d6 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -25,20 +25,46 @@ void intel_pt_log_enable(void); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); -void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, - uint64_t pos, const unsigned char *buf); +void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, + uint64_t pos, const unsigned char *buf); struct intel_pt_insn; -void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); -void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, - uint64_t ip); +void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); +void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, + uint64_t ip); __attribute__((format(printf, 1, 2))) -void intel_pt_log(const char *fmt, ...); +void __intel_pt_log(const char *fmt, ...); + +#define intel_pt_log(fmt, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log(fmt, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_packet(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_packet(arg, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_insn(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_insn(arg, ##__VA_ARGS__); \ + } while (0) + +#define intel_pt_log_insn_no_data(arg, ...) \ + do { \ + if (intel_pt_enable_logging) \ + __intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \ + } while (0) #define x64_fmt "0x%" PRIx64 +extern bool intel_pt_enable_logging; + static inline void intel_pt_log_at(const char *msg, uint64_t u) { intel_pt_log("%s at " x64_fmt "\n", msg, u); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 38942e1eac8f..03ff072b5993 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -22,6 +22,7 @@ #include "../perf.h" #include "session.h" #include "machine.h" +#include "sort.h" #include "tool.h" #include "event.h" #include "evlist.h" @@ -63,6 +64,7 @@ struct intel_pt { bool data_queued; bool est_tsc; bool sync_switch; + bool mispred_all; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -115,6 +117,9 @@ struct intel_pt_queue { void *decoder; const struct intel_pt_state *state; struct ip_callchain *chain; + struct branch_stack *last_branch; + struct branch_stack *last_branch_rb; + size_t last_branch_pos; union perf_event *event_buf; bool on_heap; bool stop; @@ -675,6 +680,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, goto out_free; } + if (pt->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + sz += pt->synth_opts.last_branch_sz * + sizeof(struct branch_entry); + ptq->last_branch = zalloc(sz); + if (!ptq->last_branch) + goto out_free; + ptq->last_branch_rb = zalloc(sz); + if (!ptq->last_branch_rb) + goto out_free; + } + ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); if (!ptq->event_buf) goto out_free; @@ -720,7 +738,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (!params.period) { params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; - params.period = 1000; + params.period = 1; } } @@ -732,6 +750,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, out_free: zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); return NULL; @@ -746,6 +766,8 @@ static void intel_pt_free_queue(void *priv) thread__zput(ptq->thread); intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); + zfree(&ptq->last_branch); + zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); } @@ -876,6 +898,58 @@ static int intel_pt_setup_queues(struct intel_pt *pt) return 0; } +static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) +{ + struct branch_stack *bs_src = ptq->last_branch_rb; + struct branch_stack *bs_dst = ptq->last_branch; + size_t nr = 0; + + bs_dst->nr = bs_src->nr; + + if (!bs_src->nr) + return; + + nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; + memcpy(&bs_dst->entries[0], + &bs_src->entries[ptq->last_branch_pos], + sizeof(struct branch_entry) * nr); + + if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { + memcpy(&bs_dst->entries[nr], + &bs_src->entries[0], + sizeof(struct branch_entry) * ptq->last_branch_pos); + } +} + +static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) +{ + ptq->last_branch_pos = 0; + ptq->last_branch_rb->nr = 0; +} + +static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) +{ + const struct intel_pt_state *state = ptq->state; + struct branch_stack *bs = ptq->last_branch_rb; + struct branch_entry *be; + + if (!ptq->last_branch_pos) + ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; + + ptq->last_branch_pos -= 1; + + be = &bs->entries[ptq->last_branch_pos]; + be->from = state->from_ip; + be->to = state->to_ip; + be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); + be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); + /* No support for mispredict */ + be->flags.mispred = ptq->pt->mispred_all; + + if (bs->nr < ptq->pt->synth_opts.last_branch_sz) + bs->nr += 1; +} + static int intel_pt_inject_event(union perf_event *event, struct perf_sample *sample, u64 type, bool swapped) @@ -890,6 +964,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + struct dummy_branch_stack { + u64 nr; + struct branch_entry entries; + } dummy_bs; + + if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) + return 0; event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; @@ -909,8 +990,20 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.flags = ptq->flags; sample.insn_len = ptq->insn_len; - if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) - return 0; + /* + * perf report cannot handle events without a branch stack when using + * SORT_MODE__BRANCH so make a dummy one. + */ + if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { + dummy_bs = (struct dummy_branch_stack){ + .nr = 1, + .entries = { + .from = sample.ip, + .to = sample.addr, + }, + }; + sample.branch_stack = (struct branch_stack *)&dummy_bs; + } if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, @@ -961,6 +1054,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.callchain = ptq->chain; } + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, pt->instructions_sample_type, @@ -974,6 +1072,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", ret); + if (pt->synth_opts.last_branch) + intel_pt_reset_last_branch_rb(ptq); + return ret; } @@ -1008,6 +1109,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) sample.callchain = ptq->chain; } + if (pt->synth_opts.last_branch) { + intel_pt_copy_last_branch_rb(ptq); + sample.branch_stack = ptq->last_branch; + } + if (pt->synth_opts.inject) { ret = intel_pt_inject_event(event, &sample, pt->transactions_sample_type, @@ -1021,6 +1127,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", ret); + if (pt->synth_opts.callchain) + intel_pt_reset_last_branch_rb(ptq); + return ret; } @@ -1116,6 +1225,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } + if (pt->synth_opts.last_branch) + intel_pt_update_last_branch_rb(ptq); + if (!pt->sync_switch) return 0; @@ -1763,6 +1875,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, pt->instructions_sample_period = attr.sample_period; if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); @@ -1782,6 +1896,8 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_period = 1; if (pt->synth_opts.callchain) attr.sample_type |= PERF_SAMPLE_CALLCHAIN; + if (pt->synth_opts.last_branch) + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); @@ -1808,6 +1924,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_period = 1; attr.sample_type |= PERF_SAMPLE_ADDR; attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; + attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); err = intel_pt_synth_event(session, &attr, id); @@ -1852,6 +1969,16 @@ static bool intel_pt_find_switch(struct perf_evlist *evlist) return false; } +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt *pt = data; + + if (!strcmp(var, "intel-pt.mispred-all")) + pt->mispred_all = perf_config_bool(var, value); + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -1896,6 +2023,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (!pt) return -ENOMEM; + perf_config(intel_pt_perf_config, pt); + err = auxtrace_queues__init(&pt->queues); if (err) goto err_free; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 61c2bc20926d..5ffb356cbcc6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -27,6 +27,8 @@ extern int parse_events_debug; #endif int parse_events_parse(void *data, void *scanner); +static int get_config_terms(struct list_head *head_config, + struct list_head *head_terms __maybe_unused); static struct perf_pmu_event_symbol *perf_pmu_events_list; /* @@ -416,7 +418,8 @@ static void tracepoint_error(struct parse_events_error *error, int err, static int add_tracepoint(struct list_head *list, int *idx, char *sys_name, char *evt_name, - struct parse_events_error *error __maybe_unused) + struct parse_events_error *error __maybe_unused, + struct list_head *head_config) { struct perf_evsel *evsel; @@ -426,13 +429,22 @@ static int add_tracepoint(struct list_head *list, int *idx, return PTR_ERR(evsel); } + if (head_config) { + LIST_HEAD(config_terms); + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + list_splice(&config_terms, &evsel->config_terms); + } + list_add_tail(&evsel->node, list); return 0; } static int add_tracepoint_multi_event(struct list_head *list, int *idx, char *sys_name, char *evt_name, - struct parse_events_error *error) + struct parse_events_error *error, + struct list_head *head_config) { char evt_path[MAXPATHLEN]; struct dirent *evt_ent; @@ -456,7 +468,8 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, if (!strglobmatch(evt_ent->d_name, evt_name)) continue; - ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, error); + ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, + error, head_config); } closedir(evt_dir); @@ -465,16 +478,20 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, static int add_tracepoint_event(struct list_head *list, int *idx, char *sys_name, char *evt_name, - struct parse_events_error *error) + struct parse_events_error *error, + struct list_head *head_config) { return strpbrk(evt_name, "*?") ? - add_tracepoint_multi_event(list, idx, sys_name, evt_name, error) : - add_tracepoint(list, idx, sys_name, evt_name, error); + add_tracepoint_multi_event(list, idx, sys_name, evt_name, + error, head_config) : + add_tracepoint(list, idx, sys_name, evt_name, + error, head_config); } static int add_tracepoint_multi_sys(struct list_head *list, int *idx, char *sys_name, char *evt_name, - struct parse_events_error *error) + struct parse_events_error *error, + struct list_head *head_config) { struct dirent *events_ent; DIR *events_dir; @@ -498,23 +515,13 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, continue; ret = add_tracepoint_event(list, idx, events_ent->d_name, - evt_name, error); + evt_name, error, head_config); } closedir(events_dir); return ret; } -int parse_events_add_tracepoint(struct list_head *list, int *idx, - char *sys, char *event, - struct parse_events_error *error) -{ - if (strpbrk(sys, "*?")) - return add_tracepoint_multi_sys(list, idx, sys, event, error); - else - return add_tracepoint_event(list, idx, sys, event, error); -} - static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) { @@ -599,9 +606,13 @@ static int check_type_val(struct parse_events_term *term, return -EINVAL; } -static int config_term(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err) +typedef int config_term_func_t(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); + +static int config_term_common(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) { #define CHECK_TYPE_VAL(type) \ do { \ @@ -610,12 +621,6 @@ do { \ } while (0) switch (term->type_term) { - case PARSE_EVENTS__TERM_TYPE_USER: - /* - * Always succeed for sysfs terms, as we dont know - * at this point what type they need to have. - */ - return 0; case PARSE_EVENTS__TERM_TYPE_CONFIG: CHECK_TYPE_VAL(NUM); attr->config = term->val.num; @@ -658,6 +663,9 @@ do { \ CHECK_TYPE_VAL(STR); break; default: + err->str = strdup("unknown term"); + err->idx = term->err_term; + err->help = parse_events_formats_error_string(NULL); return -EINVAL; } @@ -665,9 +673,44 @@ do { \ #undef CHECK_TYPE_VAL } +static int config_term_pmu(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) + /* + * Always succeed for sysfs terms, as we dont know + * at this point what type they need to have. + */ + return 0; + else + return config_term_common(attr, term, err); +} + +static int config_term_tracepoint(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + return config_term_common(attr, term, err); + default: + if (err) { + err->idx = term->err_term; + err->str = strdup("unknown term"); + err->help = strdup("valid terms: call-graph,stack-size\n"); + } + return -EINVAL; + } + + return 0; +} + static int config_attr(struct perf_event_attr *attr, struct list_head *head, - struct parse_events_error *err) + struct parse_events_error *err, + config_term_func_t config_term) { struct parse_events_term *term; @@ -722,6 +765,27 @@ do { \ return 0; } +int parse_events_add_tracepoint(struct list_head *list, int *idx, + char *sys, char *event, + struct parse_events_error *error, + struct list_head *head_config) +{ + if (head_config) { + struct perf_event_attr attr; + + if (config_attr(&attr, head_config, error, + config_term_tracepoint)) + return -EINVAL; + } + + if (strpbrk(sys, "*?")) + return add_tracepoint_multi_sys(list, idx, sys, event, + error, head_config); + else + return add_tracepoint_event(list, idx, sys, event, + error, head_config); +} + int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, @@ -735,7 +799,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data, attr.config = config; if (head_config) { - if (config_attr(&attr, head_config, data->error)) + if (config_attr(&attr, head_config, data->error, + config_term_common)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -795,7 +860,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, * Configure hardcoded terms first, no need to check * return value when called with fail == 0 ;) */ - if (config_attr(&attr, head_config, data->error)) + if (config_attr(&attr, head_config, data->error, config_term_pmu)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -1861,3 +1926,29 @@ void parse_events_evlist_error(struct parse_events_evlist *data, err->str = strdup(str); WARN_ONCE(!err->str, "WARNING: failed to allocate error string"); } + +/* + * Return string contains valid config terms of an event. + * @additional_terms: For terms such as PMU sysfs terms. + */ +char *parse_events_formats_error_string(char *additional_terms) +{ + char *str; + static const char *static_terms = "config,config1,config2,name," + "period,freq,branch_type,time," + "call-graph,stack-size\n"; + + /* valid terms */ + if (additional_terms) { + if (!asprintf(&str, "valid terms: %s,%s", + additional_terms, static_terms)) + goto fail; + } else { + if (!asprintf(&str, "valid terms: %s", static_terms)) + goto fail; + } + return str; + +fail: + return NULL; +} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ffee7ece75a6..f13d3ccda444 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -119,7 +119,8 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event, - struct parse_events_error *error); + struct parse_events_error *error, + struct list_head *head_config); int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, @@ -156,5 +157,6 @@ int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); +char *parse_events_formats_error_string(char *additional_terms); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 936d566f48d8..c29832bce496 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -174,7 +174,7 @@ modifier_bp [rwx]{1,3} { /* - * Please update formats_error_string any time + * Please update parse_events_formats_error_string any time * new static term is added. */ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 8bcc45868457..ae6af269f9c9 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -67,6 +67,7 @@ static inc_group_count(struct list_head *list, %type event_legacy_cache %type event_legacy_mem %type event_legacy_tracepoint +%type tracepoint_name %type event_legacy_numeric %type event_legacy_raw %type event_def @@ -84,6 +85,10 @@ static inc_group_count(struct list_head *list, u64 num; struct list_head *head; struct parse_events_term *term; + struct tracepoint_name { + char *sys; + char *event; + } tracepoint_name; } %% @@ -368,36 +373,58 @@ PE_PREFIX_MEM PE_VALUE sep_dc } event_legacy_tracepoint: -PE_NAME '-' PE_NAME ':' PE_NAME +tracepoint_name { struct parse_events_evlist *data = _data; struct parse_events_error *error = data->error; struct list_head *list; - char sys_name[128]; - snprintf(&sys_name, 128, "%s-%s", $1, $3); ALLOC_LIST(list); - if (parse_events_add_tracepoint(list, &data->idx, &sys_name, $5, error)) { - if (error) - error->idx = @1.first_column; + if (error) + error->idx = @1.first_column; + + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + error, NULL)) return -1; - } + $$ = list; } | +tracepoint_name '/' event_config '/' +{ + struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; + struct list_head *list; + + ALLOC_LIST(list); + if (error) + error->idx = @1.first_column; + + if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, + error, $3)) + return -1; + + $$ = list; +} + +tracepoint_name: +PE_NAME '-' PE_NAME ':' PE_NAME +{ + char sys_name[128]; + struct tracepoint_name tracepoint; + + snprintf(&sys_name, 128, "%s-%s", $1, $3); + tracepoint.sys = &sys_name; + tracepoint.event = $5; + + $$ = tracepoint; +} +| PE_NAME ':' PE_NAME { - struct parse_events_evlist *data = _data; - struct parse_events_error *error = data->error; - struct list_head *list; + struct tracepoint_name tracepoint = {$1, $3}; - ALLOC_LIST(list); - if (parse_events_add_tracepoint(list, &data->idx, $1, $3, error)) { - if (error) - error->idx = @1.first_column; - return -1; - } - $$ = list; + $$ = tracepoint; } event_legacy_numeric: diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 89c91a1a67e7..ac42c97be9e4 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -626,38 +626,26 @@ static int pmu_resolve_param_term(struct parse_events_term *term, return -1; } -static char *formats_error_string(struct list_head *formats) +static char *pmu_formats_string(struct list_head *formats) { struct perf_pmu_format *format; - char *err, *str; - static const char *static_terms = "config,config1,config2,name," - "period,freq,branch_type,time," - "call-graph,stack-size\n"; + char *str; + struct strbuf buf; unsigned i = 0; - if (!asprintf(&str, "valid terms:")) + if (!formats) return NULL; + strbuf_init(&buf, 0); /* sysfs exported terms */ - list_for_each_entry(format, formats, list) { - char c = i++ ? ',' : ' '; + list_for_each_entry(format, formats, list) + strbuf_addf(&buf, i++ ? ",%s" : "%s", + format->name); - err = str; - if (!asprintf(&str, "%s%c%s", err, c, format->name)) - goto fail; - free(err); - } + str = strbuf_detach(&buf, NULL); + strbuf_release(&buf); - /* static terms */ - err = str; - if (!asprintf(&str, "%s,%s", err, static_terms)) - goto fail; - - free(err); return str; -fail: - free(err); - return NULL; } /* @@ -693,9 +681,12 @@ static int pmu_config_term(struct list_head *formats, if (verbose) printf("Invalid event/parameter '%s'\n", term->config); if (err) { + char *pmu_term = pmu_formats_string(formats); + err->idx = term->err_term; err->str = strdup("unknown term"); - err->help = formats_error_string(formats); + err->help = parse_events_formats_error_string(pmu_term); + free(pmu_term); } return -EINVAL; } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index aa9e1257c1ee..a8e825fca42a 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -319,7 +319,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample, if (thread__resolve_callchain(al->thread, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f5e000030a5e..84a02eae4394 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1101,6 +1101,9 @@ static int machines__deliver_event(struct machines *machines, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); case PERF_RECORD_AUX: + if (tool->aux == perf_event__process_aux && + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)) + evlist->stats.total_aux_lost += 1; return tool->aux(tool, event, sample, machine); case PERF_RECORD_ITRACE_START: return tool->itrace_start(tool, event, sample, machine); @@ -1346,6 +1349,13 @@ static void perf_session__warn_about_errors(const struct perf_session *session) } } + if (session->tool->aux == perf_event__process_aux && + stats->total_aux_lost != 0) { + ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n", + stats->total_aux_lost, + stats->nr_events[PERF_RECORD_AUX]); + } + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " @@ -1790,7 +1800,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, if (thread__resolve_callchain(al->thread, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + stack_depth) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index da6cc4cc2a4f..b85ee55cca0c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -78,6 +78,8 @@ struct scripting_ops { int (*generate_script) (struct pevent *pevent, const char *outfile); }; +extern unsigned int scripting_max_stack; + int script_spec_register(const char *spec, struct scripting_ops *ops); void setup_perl_scripting(void);