From 259d40775ee11dbeba022514815df08c57994ef3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Sep 2017 10:52:02 -0300 Subject: [PATCH 1/9] tools include linux: Guard against redefinition of some macros When cross building to android r15c (and older versions) on Fedora 26 we notice these: /opt/android-ndk-r15c/platforms/android-24/arch-arm/usr/include/sys/cdefs.h:332:0: note: this is the location of the previous definition For __aligned, __packed and __noreturn, so guard those with ifdefs to avoid drowning useful warnings in these. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d7w3fa9c22dtmrwbedos6ie1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler-gcc.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index bd39b2090ad1..3723b9f8f964 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -21,11 +21,14 @@ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #define noinline __attribute__((noinline)) - +#ifndef __packed #define __packed __attribute__((packed)) - +#endif +#ifndef __noreturn #define __noreturn __attribute__((noreturn)) - +#endif +#ifndef __aligned #define __aligned(x) __attribute__((aligned(x))) +#endif #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) From df90cc41d662ad5f700afc042df43e57ce1ed0a4 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Wed, 6 Sep 2017 17:02:09 +0200 Subject: [PATCH 2/9] perf tests: Fix compile when libunwind's unwind.h is available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When cross compiling perf and I want to link against a self-compiled libunwind, I usually make the custom path where the libunwind headers exist visible by adding the libunwind prefix to the include path when compiling perf, i.e.: ~~~~~ $ ls $HOME/projects/compiled/other/include/ libunwind-coredump.h libunwind.h libunwind-x86_64.h libunwind-common.h libunwind-dynamic.h libunwind-ptrace.h unwind.h $ make EXTRA_CFLAGS="-I$HOME/projects/compiled/other/include/ ~~~~~~ Note the `unwind.h` header from libunwind which leads to compile errors when compiling tests/dwarf-unwind.c, since it shadows perf's util/unwind.h: ~~~~~ tests/dwarf-unwind.c:41:32: error: ‘struct unwind_entry’ declared inside parameter list will not be visible outside of this definition or declaration [-Werror] static int unwind_entry(struct unwind_entry *entry, void *arg) ^~~~~~~~~~~~ tests/dwarf-unwind.c: In function ‘unwind_entry’: tests/dwarf-unwind.c:44:22: error: dereferencing pointer to incomplete type ‘struct unwind_entry’ char *symbol = entry->sym ? entry->sym->name : NULL; ^~ tests/dwarf-unwind.c: In function ‘unwind_thread’: tests/dwarf-unwind.c:92:8: error: implicit declaration of function ‘unwind__get_entries’; did you mean ‘unwind_entry’? [-Werror=implicit-function-declaration] err = unwind__get_entries(unwind_entry, &cnt, thread, ^~~~~~~~~~~~~~~~~~~ unwind_entry tests/dwarf-unwind.c:92:8: error: nested extern declaration of ‘unwind__get_entries’ [-Werror=nested-externs] ~~~~~~ Fix this compile error by specificing an explicit include of perf's unwind.h in the util folder. Signed-off-by: Milian Wolff Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20170906150209.12579-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/dwarf-unwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2a7b9b47bbcb..9ba1d216a89f 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -6,7 +6,7 @@ #include "debug.h" #include "machine.h" #include "event.h" -#include "unwind.h" +#include "../util/unwind.h" #include "perf_regs.h" #include "map.h" #include "thread.h" From 58b79186c34306f4a14e98119afc10744a42fa40 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 10:46:19 +0200 Subject: [PATCH 3/9] tools lib api: Fix make DEBUG=1 build Do not use -D_FORTIFY_SOURCE=2 for DEBUG build as it seems to mess up with debuginfo, which results in bad gdb experience. We already do that for tools/perf/. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908084621.31595-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 4563ba7ede6f..1e83e3c07448 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -17,13 +17,19 @@ MAKEFLAGS += --no-print-directory LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC +ifeq ($(DEBUG),0) ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 endif +endif + +ifeq ($(DEBUG),0) + CFLAGS += -D_FORTIFY_SOURCE +endif # Treat warnings as errors unless directed not to ifneq ($(WERROR),0) From cd6379ebb55ae53e77f17e22ce830bf3fe826736 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 10:46:20 +0200 Subject: [PATCH 4/9] perf tools: Open perf.data with O_CLOEXEC flag Do not carry the perf.data file descriptor into the workload process and close it when perf executes the workload. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908084621.31595-2-jolsa@kernel.org [ Add definitions for O_CLOEXEC for older systems ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index e84bbc8ec058..263f5a906ba5 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -10,6 +10,16 @@ #include "util.h" #include "debug.h" +#ifndef O_CLOEXEC +#ifdef __sparc__ +#define O_CLOEXEC 0x400000 +#elif defined(__alpha__) || defined(__hppa__) +#define O_CLOEXEC 010000000 +#else +#define O_CLOEXEC 02000000 +#endif +#endif + static bool check_pipe(struct perf_data_file *file) { struct stat st; @@ -96,7 +106,8 @@ static int open_file_write(struct perf_data_file *file) if (check_backup(file)) return -1; - fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR); + fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, + S_IRUSR|S_IWUSR); if (fd < 0) pr_err("failed to open %s : %s\n", file->path, From 4d286c89e412fc2eaa1b8988481d2f32b5e3826f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 14:05:07 +0200 Subject: [PATCH 5/9] perf ui progress: Make sure we always define step value Unlikely, but we could have ui_progress__init being called with total < 16, which would set the next and step variables to 0. That would force unnecessary ui_progress__ops->update calls because 'next' would never raise. Forcing the next and step values to be always > 0. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908120510.22515-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/progress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index a0f24c7115c5..a9c15804b1f6 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -25,7 +25,7 @@ void ui_progress__update(struct ui_progress *p, u64 adv) void ui_progress__init(struct ui_progress *p, u64 total, const char *title) { p->curr = 0; - p->next = p->step = total / 16; + p->next = p->step = total / 16 ?: 1; p->total = total; p->title = title; From a82bfd041d0ec41861a7adda4c078993f2f9c452 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 14:05:08 +0200 Subject: [PATCH 6/9] perf ui progress: Fix progress update We currently update the 'next' variable only with a single step value. But it's possible the 'adv' update is bigger than single 'step' value. This would leave 'next' value under counted and force unnecessary ui_progress__ops->update calls. Calculate the amount of steps we need for 'adv' update and increase the 'next' with that amounts of steps. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908120510.22515-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/progress.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index a9c15804b1f6..ae91c8148edf 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -1,3 +1,4 @@ +#include #include "../cache.h" #include "progress.h" @@ -14,10 +15,14 @@ struct ui_progress_ops *ui_progress__ops = &null_progress__ops; void ui_progress__update(struct ui_progress *p, u64 adv) { + u64 last = p->curr; + p->curr += adv; if (p->curr >= p->next) { - p->next += p->step; + u64 nr = DIV_ROUND_UP(p->curr - last, p->step); + + p->next += nr * p->step; ui_progress__ops->update(p); } } From cba225d6eeaf00bd8181a851fbaa7b8716337e0b Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 7 Sep 2017 12:18:45 +0900 Subject: [PATCH 7/9] perf config: Check not only section->from_system_config but also item's Currently section->from_system_config is being checked multiple times. item->from_system_config should be checked instead, when iterating thru the items in a section. Fix it. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1504754325-9724-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 3ddcc6e2abeb..a1d82e33282c 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -59,7 +59,7 @@ static int set_config(struct perf_config_set *set, const char *file_name, fprintf(fp, "[%s]\n", section->name); perf_config_items__for_each_entry(§ion->items, item) { - if (!use_system_config && section->from_system_config) + if (!use_system_config && item->from_system_config) continue; if (item->value) fprintf(fp, "\t%s = %s\n", From 3192f1ed3dd3a6883d5ae31bf2ff69984ea0fd54 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 11 Sep 2017 13:14:22 +0200 Subject: [PATCH 8/9] perf tools: Support running perf binaries with a dash in their name Previously the part behind "perf-" was interpreted as an internal perf command. If the suffix could not be handled, the execution was stopped. This makes it impossible to launch perf binaries that got renamed to have the `perf-` prefix. This is e.g. the case for appimages (e.g. "perf-x86_64.AppImage"), but would also apply to all other scenarios where users symlink or rename perf themselves: Status quo with the broken behavior: $ ln -s ./perf ./perf-custom-suffix $ ./perf-custom-suffix list cannot handle custom-suffix internally$ Also note the missing newline at the end of the error message. With this patch applied, the above works properly: $ ./perf-custom-suffix list List of pre-defined events (to be used in -e): ... Signed-off-by: Milian Wolff Acked-by: David Ahern Tested-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yao Jin Link: http://lkml.kernel.org/r/20170911111422.31903-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index e0279babe0c0..2f19e03c5c40 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -467,15 +467,21 @@ int main(int argc, const char **argv) * - cannot execute it externally (since it would just do * the same thing over again) * - * So we just directly call the internal command handler, and - * die if that one cannot handle it. + * So we just directly call the internal command handler. If that one + * fails to handle this, then maybe we just run a renamed perf binary + * that contains a dash in its name. To handle this scenario, we just + * fall through and ignore the "xxxx" part of the command string. */ if (strstarts(cmd, "perf-")) { cmd += 5; argv[0] = cmd; handle_internal_command(argc, argv); - fprintf(stderr, "cannot handle %s internally", cmd); - goto out; + /* + * If the command is handled, the above function does not + * return undo changes and fall through in such a case. + */ + cmd -= 5; + argv[0] = cmd; } if (strstarts(cmd, "trace")) { #ifdef HAVE_LIBAUDIT_SUPPORT From dfc9eec7716cc0a9f7eb743c703d74cd2d6085a0 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Tue, 12 Sep 2017 17:25:23 +0200 Subject: [PATCH 9/9] perf stat: Wait for the correct child When packaging the perf userland application into an AppImage, the wait() call in perf stat returned too early. It turned out that some other child process exited, but not the one perf stat launched: $ sudo strace -e fork,execve,clone,wait4 -f ./perf-x86_64.AppImage stat sleep 1 execve("./perf-git.3a73b7f9-x86_64.AppImage", ["./perf-git.3a73b7f9-x86_64.AppIm"..., "stat", "sleep", "1"], 0x7ffec1bbf050 /* 18 vars */) = 0 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f6a6e7efe50) = 3912 strace: Process 3912 attached [pid 3912] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f6a6e7efe50) = 3914 strace: Process 3914 attached [pid 3912] +++ exited with 0 +++ [pid 3911] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=3912, si_uid=0, si_status=0, si_utime=0, si_stime=0} --- [pid 3914] clone(strace: Process 3915 attached child_stack=0x7f6a6d9fefb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f6a6d9ff9d0, tls=0x7f6a6d9ff700, child_tidptr=0x7f6a6d9ff9d0) = 3915 [pid 3911] execve("/tmp/.mount_perf-g6VYMpl/AppRun", ["./perf-git.3a73b7f9-x86_64.AppIm"..., "stat", "sleep", "1"], 0x14aab70 /* 21 vars */) = 0 [pid 3911] clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f4ae113c4d0) = 3916 strace: Process 3916 attached [pid 3911] wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 3912 [pid 3916] execve("/usr/libexec/perf-core/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/tmp/./sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/home/milian/.bin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/usr/lib/icecream/libexec/icecc/bin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/ssd2/milian/projects/compiled/other/bin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/home/milian/.bin/kf5/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/ssd2/milian/projects/compiled/kf5/bin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/home/milian/projects/compiled/other/bin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/home/milian/projects/compiled/kf5/bin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/usr/local/sbin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/usr/local/bin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */) = -1 ENOENT (No such file or directory) [pid 3916] execve("/usr/bin/sleep", ["sleep", "1"], 0x27d3650 /* 22 vars */ Performance counter stats for 'sleep 1': task-clock context-switches cpu-migrations page-faults cycles instructions branches branch-misses 0.000047194 seconds time elapsed [pid 3916] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=3911, si_uid=0} --- [pid 3916] +++ killed by SIGTERM +++ [pid 3911] --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=3916, si_uid=0, si_status=SIGTERM, si_utime=0, si_stime=0} --- [pid 3915] --- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3914, si_uid=0} --- [pid 3911] +++ exited with 0 +++ [pid 3915] --- SIGHUP {si_signo=SIGHUP, si_code=SI_USER, si_pid=3914, si_uid=0} --- [pid 3915] +++ exited with 0 +++ +++ exited with 0 +++ This patch uses waitpid instead to ensure the call waits for the debuggee application launched by 'perf stat'. This fixes 'perf stat' when launched from an AppImage: $ ./perf-x86_64.AppImage stat sleep 1 Performance counter stats for 'sleep 1': 0.357235 task-clock (msec) # 0.000 CPUs utilized 1 context-switches # 0.003 M/sec 0 cpu-migrations # 0.000 K/sec 50 page-faults # 0.140 M/sec 1269602 cycles # 3.554 GHz 654278 instructions # 0.52 insn per cycle 129963 branches # 363.803 M/sec 7082 branch-misses # 5.45% of all branches 1.000633420 seconds time elapsed Signed-off-by: Milian Wolff Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170912152523.4497-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 85e992d9215b..69523ed55894 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -707,7 +707,7 @@ try_again: process_interval(); } } - wait(&status); + waitpid(child_pid, &status, 0); if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));