2014-06-25 15:49:03 +00:00
|
|
|
/*
|
|
|
|
* Use DWARF Debug information to skip unnecessary callchain entries.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2014 Sukadev Bhattiprolu, IBM Corporation.
|
|
|
|
* Copyright (C) 2014 Ulrich Weigand, IBM Corporation.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
#include <inttypes.h>
|
|
|
|
#include <dwarf.h>
|
|
|
|
#include <elfutils/libdwfl.h>
|
|
|
|
|
|
|
|
#include "util/thread.h"
|
|
|
|
#include "util/callchain.h"
|
2014-08-07 07:27:00 +00:00
|
|
|
#include "util/debug.h"
|
2014-06-25 15:49:03 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* When saving the callchain on Power, the kernel conservatively saves
|
|
|
|
* excess entries in the callchain. A few of these entries are needed
|
|
|
|
* in some cases but not others. If the unnecessary entries are not
|
|
|
|
* ignored, we end up with duplicate arcs in the call-graphs. Use
|
|
|
|
* DWARF debug information to skip over any unnecessary callchain
|
|
|
|
* entries.
|
|
|
|
*
|
|
|
|
* See function header for arch_adjust_callchain() below for more details.
|
|
|
|
*
|
|
|
|
* The libdwfl code in this file is based on code from elfutils
|
|
|
|
* (libdwfl/argp-std.c, libdwfl/tests/addrcfi.c, etc).
|
|
|
|
*/
|
|
|
|
static char *debuginfo_path;
|
|
|
|
|
|
|
|
static const Dwfl_Callbacks offline_callbacks = {
|
|
|
|
.debuginfo_path = &debuginfo_path,
|
|
|
|
.find_debuginfo = dwfl_standard_find_debuginfo,
|
|
|
|
.section_address = dwfl_offline_section_address,
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use the DWARF expression for the Call-frame-address and determine
|
|
|
|
* if return address is in LR and if a new frame was allocated.
|
|
|
|
*/
|
|
|
|
static int check_return_reg(int ra_regno, Dwarf_Frame *frame)
|
|
|
|
{
|
|
|
|
Dwarf_Op ops_mem[2];
|
|
|
|
Dwarf_Op dummy;
|
|
|
|
Dwarf_Op *ops = &dummy;
|
|
|
|
size_t nops;
|
|
|
|
int result;
|
|
|
|
|
|
|
|
result = dwarf_frame_register(frame, ra_regno, ops_mem, &ops, &nops);
|
|
|
|
if (result < 0) {
|
|
|
|
pr_debug("dwarf_frame_register() %s\n", dwarf_errmsg(-1));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
perf powerpc: Fix callchain ip filtering when return address is in a register
For powerpc64, perf will filter out the second entry in the callchain,
i.e. the LR value, if the return address of the function corresponding
to the probed location has already been saved on its caller's stack.
The state of the return address is determined using debug information.
At any point within a function, if the return address is already saved
somewhere, a DWARF expression can tell us about its location. If the
return address in still in LR only, no DWARF expression would exist.
Typically, the instructions in a function's prologue first copy the LR
value to R0 and then pushes R0 on to the stack. If LR has already been
copied to R0 but R0 is yet to be pushed to the stack, we can still get a
DWARF expression that says that the return address is in R0. This is
indicating that getting a DWARF expression for the return address does
not guarantee the fact that it has already been saved on the stack.
This can be observed on a powerpc64le system running Fedora 27 as shown
below.
# objdump -d /usr/lib64/libc-2.26.so | less
...
000000000015af20 <inet_pton>:
15af20: 0b 00 4c 3c addis r2,r12,11
15af24: e0 c1 42 38 addi r2,r2,-15904
15af28: a6 02 08 7c mflr r0
15af2c: f0 ff c1 fb std r30,-16(r1)
15af30: f8 ff e1 fb std r31,-8(r1)
15af34: 78 1b 7f 7c mr r31,r3
15af38: 78 23 83 7c mr r3,r4
15af3c: 78 2b be 7c mr r30,r5
15af40: 10 00 01 f8 std r0,16(r1)
15af44: c1 ff 21 f8 stdu r1,-64(r1)
15af48: 28 00 81 f8 std r4,40(r1)
...
# readelf --debug-dump=frames-interp /usr/lib64/libc-2.26.so | less
...
00027024 0000000000000024 00027028 FDE cie=00000000 pc=000000000015af20..000000000015af88
LOC CFA r30 r31 ra
000000000015af20 r1+0 u u u
000000000015af34 r1+0 c-16 c-8 r0
000000000015af48 r1+64 c-16 c-8 c+16
000000000015af5c r1+0 c-16 c-8 c+16
000000000015af78 r1+0 u u
...
# perf probe -x /usr/lib64/libc-2.26.so -a inet_pton+0x18
# perf record -e probe_libc:inet_pton -g ping -6 -c 1 ::1
# perf script
Before:
ping 2829 [005] 512917.460174: probe_libc:inet_pton: (7fff7e2baf38)
7fff7e2baf38 __GI___inet_pton+0x18 (/usr/lib64/libc-2.26.so)
7fff7e2705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so)
12f152d70 _init+0xbfc (/usr/bin/ping)
7fff7e1836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so)
7fff7e183898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so)
0 [unknown] ([unknown])
After:
ping 2829 [005] 512917.460174: probe_libc:inet_pton: (7fff7e2baf38)
7fff7e2baf38 __GI___inet_pton+0x18 (/usr/lib64/libc-2.26.so)
7fff7e26fa54 gaih_inet.constprop.7+0xf44 (/usr/lib64/libc-2.26.so)
7fff7e2705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so)
12f152d70 _init+0xbfc (/usr/bin/ping)
7fff7e1836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so)
7fff7e183898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so)
0 [unknown] ([unknown])
Reported-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Maynard Johnson <maynard@us.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/66e848a7bdf2d43b39210a705ff6d828a0865661.1530724939.git.sandipan@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-07-10 13:58:14 +00:00
|
|
|
* Check if return address is on the stack. If return address
|
|
|
|
* is in a register (typically R0), it is yet to be saved on
|
|
|
|
* the stack.
|
2014-06-25 15:49:03 +00:00
|
|
|
*/
|
perf powerpc: Fix callchain ip filtering when return address is in a register
For powerpc64, perf will filter out the second entry in the callchain,
i.e. the LR value, if the return address of the function corresponding
to the probed location has already been saved on its caller's stack.
The state of the return address is determined using debug information.
At any point within a function, if the return address is already saved
somewhere, a DWARF expression can tell us about its location. If the
return address in still in LR only, no DWARF expression would exist.
Typically, the instructions in a function's prologue first copy the LR
value to R0 and then pushes R0 on to the stack. If LR has already been
copied to R0 but R0 is yet to be pushed to the stack, we can still get a
DWARF expression that says that the return address is in R0. This is
indicating that getting a DWARF expression for the return address does
not guarantee the fact that it has already been saved on the stack.
This can be observed on a powerpc64le system running Fedora 27 as shown
below.
# objdump -d /usr/lib64/libc-2.26.so | less
...
000000000015af20 <inet_pton>:
15af20: 0b 00 4c 3c addis r2,r12,11
15af24: e0 c1 42 38 addi r2,r2,-15904
15af28: a6 02 08 7c mflr r0
15af2c: f0 ff c1 fb std r30,-16(r1)
15af30: f8 ff e1 fb std r31,-8(r1)
15af34: 78 1b 7f 7c mr r31,r3
15af38: 78 23 83 7c mr r3,r4
15af3c: 78 2b be 7c mr r30,r5
15af40: 10 00 01 f8 std r0,16(r1)
15af44: c1 ff 21 f8 stdu r1,-64(r1)
15af48: 28 00 81 f8 std r4,40(r1)
...
# readelf --debug-dump=frames-interp /usr/lib64/libc-2.26.so | less
...
00027024 0000000000000024 00027028 FDE cie=00000000 pc=000000000015af20..000000000015af88
LOC CFA r30 r31 ra
000000000015af20 r1+0 u u u
000000000015af34 r1+0 c-16 c-8 r0
000000000015af48 r1+64 c-16 c-8 c+16
000000000015af5c r1+0 c-16 c-8 c+16
000000000015af78 r1+0 u u
...
# perf probe -x /usr/lib64/libc-2.26.so -a inet_pton+0x18
# perf record -e probe_libc:inet_pton -g ping -6 -c 1 ::1
# perf script
Before:
ping 2829 [005] 512917.460174: probe_libc:inet_pton: (7fff7e2baf38)
7fff7e2baf38 __GI___inet_pton+0x18 (/usr/lib64/libc-2.26.so)
7fff7e2705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so)
12f152d70 _init+0xbfc (/usr/bin/ping)
7fff7e1836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so)
7fff7e183898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so)
0 [unknown] ([unknown])
After:
ping 2829 [005] 512917.460174: probe_libc:inet_pton: (7fff7e2baf38)
7fff7e2baf38 __GI___inet_pton+0x18 (/usr/lib64/libc-2.26.so)
7fff7e26fa54 gaih_inet.constprop.7+0xf44 (/usr/lib64/libc-2.26.so)
7fff7e2705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so)
12f152d70 _init+0xbfc (/usr/bin/ping)
7fff7e1836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so)
7fff7e183898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so)
0 [unknown] ([unknown])
Reported-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Maynard Johnson <maynard@us.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/66e848a7bdf2d43b39210a705ff6d828a0865661.1530724939.git.sandipan@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-07-10 13:58:14 +00:00
|
|
|
if ((nops != 0 || ops != NULL) &&
|
|
|
|
!(nops == 1 && ops[0].atom == DW_OP_regx &&
|
|
|
|
ops[0].number2 == 0 && ops[0].offset == 0))
|
2014-06-25 15:49:03 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return address is in LR. Check if a frame was allocated
|
|
|
|
* but not-yet used.
|
|
|
|
*/
|
|
|
|
result = dwarf_frame_cfa(frame, &ops, &nops);
|
|
|
|
if (result < 0) {
|
|
|
|
pr_debug("dwarf_frame_cfa() returns %d, %s\n", result,
|
|
|
|
dwarf_errmsg(-1));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If call frame address is in r1, no new frame was allocated.
|
|
|
|
*/
|
|
|
|
if (nops == 1 && ops[0].atom == DW_OP_bregx && ops[0].number == 1 &&
|
|
|
|
ops[0].number2 == 0)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A new frame was allocated but has not yet been used.
|
|
|
|
*/
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the DWARF frame from the .eh_frame section.
|
|
|
|
*/
|
|
|
|
static Dwarf_Frame *get_eh_frame(Dwfl_Module *mod, Dwarf_Addr pc)
|
|
|
|
{
|
|
|
|
int result;
|
|
|
|
Dwarf_Addr bias;
|
|
|
|
Dwarf_CFI *cfi;
|
|
|
|
Dwarf_Frame *frame;
|
|
|
|
|
|
|
|
cfi = dwfl_module_eh_cfi(mod, &bias);
|
|
|
|
if (!cfi) {
|
|
|
|
pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-11-22 01:33:53 +00:00
|
|
|
result = dwarf_cfi_addrframe(cfi, pc-bias, &frame);
|
2014-06-25 15:49:03 +00:00
|
|
|
if (result) {
|
|
|
|
pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return frame;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the DWARF frame from the .debug_frame section.
|
|
|
|
*/
|
|
|
|
static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc)
|
|
|
|
{
|
|
|
|
Dwarf_CFI *cfi;
|
|
|
|
Dwarf_Addr bias;
|
|
|
|
Dwarf_Frame *frame;
|
|
|
|
int result;
|
|
|
|
|
|
|
|
cfi = dwfl_module_dwarf_cfi(mod, &bias);
|
|
|
|
if (!cfi) {
|
|
|
|
pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-11-22 01:33:53 +00:00
|
|
|
result = dwarf_cfi_addrframe(cfi, pc-bias, &frame);
|
2014-06-25 15:49:03 +00:00
|
|
|
if (result) {
|
|
|
|
pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return frame;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return:
|
|
|
|
* 0 if return address for the program counter @pc is on stack
|
|
|
|
* 1 if return address is in LR and no new stack frame was allocated
|
|
|
|
* 2 if return address is in LR and a new frame was allocated (but not
|
|
|
|
* yet used)
|
|
|
|
* -1 in case of errors
|
|
|
|
*/
|
2014-11-22 01:33:53 +00:00
|
|
|
static int check_return_addr(struct dso *dso, u64 map_start, Dwarf_Addr pc)
|
2014-06-25 15:49:03 +00:00
|
|
|
{
|
|
|
|
int rc = -1;
|
|
|
|
Dwfl *dwfl;
|
|
|
|
Dwfl_Module *mod;
|
|
|
|
Dwarf_Frame *frame;
|
|
|
|
int ra_regno;
|
|
|
|
Dwarf_Addr start = pc;
|
|
|
|
Dwarf_Addr end = pc;
|
|
|
|
bool signalp;
|
2014-11-22 01:33:53 +00:00
|
|
|
const char *exec_file = dso->long_name;
|
2014-06-25 15:49:03 +00:00
|
|
|
|
perf tools powerpc: Cache the DWARF debug info
Cache the DWARF debug info for DSO so we don't have to rebuild it for each
address in the DSO.
Note that dso__new() uses calloc() so don't need to set dso->dwfl to NULL.
$ /tmp/perf.orig --version
perf version 3.18.rc1.gc2661b8
$ /tmp/perf.new --version
perf version 3.18.rc1.g402d62
$ perf stat -e cycles,instructions /tmp/perf.orig report -g > orig
Performance counter stats for '/tmp/perf.orig report -g':
6,428,177,183 cycles # 0.000 GHz
4,176,288,391 instructions # 0.65 insns per cycle
1.840666132 seconds time elapsed
$ perf stat -e cycles,instructions /tmp/perf.new report -g > new
Performance counter stats for '/tmp/perf.new report -g':
305,773,142 cycles # 0.000 GHz
276,048,272 instructions # 0.90 insns per cycle
0.087693543 seconds time elapsed
$ diff orig new
$
Changelog[v2]:
[Arnaldo Carvalho] Cache in existing global objects rather than create
new static/globals in functions.
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Anton Blanchard <anton@au1.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20141022000958.GB2228@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2014-10-22 00:09:58 +00:00
|
|
|
dwfl = dso->dwfl;
|
2014-06-25 15:49:03 +00:00
|
|
|
|
perf tools powerpc: Cache the DWARF debug info
Cache the DWARF debug info for DSO so we don't have to rebuild it for each
address in the DSO.
Note that dso__new() uses calloc() so don't need to set dso->dwfl to NULL.
$ /tmp/perf.orig --version
perf version 3.18.rc1.gc2661b8
$ /tmp/perf.new --version
perf version 3.18.rc1.g402d62
$ perf stat -e cycles,instructions /tmp/perf.orig report -g > orig
Performance counter stats for '/tmp/perf.orig report -g':
6,428,177,183 cycles # 0.000 GHz
4,176,288,391 instructions # 0.65 insns per cycle
1.840666132 seconds time elapsed
$ perf stat -e cycles,instructions /tmp/perf.new report -g > new
Performance counter stats for '/tmp/perf.new report -g':
305,773,142 cycles # 0.000 GHz
276,048,272 instructions # 0.90 insns per cycle
0.087693543 seconds time elapsed
$ diff orig new
$
Changelog[v2]:
[Arnaldo Carvalho] Cache in existing global objects rather than create
new static/globals in functions.
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Anton Blanchard <anton@au1.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20141022000958.GB2228@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2014-10-22 00:09:58 +00:00
|
|
|
if (!dwfl) {
|
|
|
|
dwfl = dwfl_begin(&offline_callbacks);
|
|
|
|
if (!dwfl) {
|
|
|
|
pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2014-11-22 01:33:53 +00:00
|
|
|
mod = dwfl_report_elf(dwfl, exec_file, exec_file, -1,
|
|
|
|
map_start, false);
|
|
|
|
if (!mod) {
|
|
|
|
pr_debug("dwfl_report_elf() failed %s\n",
|
perf tools powerpc: Cache the DWARF debug info
Cache the DWARF debug info for DSO so we don't have to rebuild it for each
address in the DSO.
Note that dso__new() uses calloc() so don't need to set dso->dwfl to NULL.
$ /tmp/perf.orig --version
perf version 3.18.rc1.gc2661b8
$ /tmp/perf.new --version
perf version 3.18.rc1.g402d62
$ perf stat -e cycles,instructions /tmp/perf.orig report -g > orig
Performance counter stats for '/tmp/perf.orig report -g':
6,428,177,183 cycles # 0.000 GHz
4,176,288,391 instructions # 0.65 insns per cycle
1.840666132 seconds time elapsed
$ perf stat -e cycles,instructions /tmp/perf.new report -g > new
Performance counter stats for '/tmp/perf.new report -g':
305,773,142 cycles # 0.000 GHz
276,048,272 instructions # 0.90 insns per cycle
0.087693543 seconds time elapsed
$ diff orig new
$
Changelog[v2]:
[Arnaldo Carvalho] Cache in existing global objects rather than create
new static/globals in functions.
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Anton Blanchard <anton@au1.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20141022000958.GB2228@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2014-10-22 00:09:58 +00:00
|
|
|
dwarf_errmsg(-1));
|
|
|
|
/*
|
|
|
|
* We normally cache the DWARF debug info and never
|
|
|
|
* call dwfl_end(). But to prevent fd leak, free in
|
|
|
|
* case of error.
|
|
|
|
*/
|
|
|
|
dwfl_end(dwfl);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
dso->dwfl = dwfl;
|
2014-06-25 15:49:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
mod = dwfl_addrmodule(dwfl, pc);
|
|
|
|
if (!mod) {
|
|
|
|
pr_debug("dwfl_addrmodule() failed, %s\n", dwarf_errmsg(-1));
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To work with split debug info files (eg: glibc), check both
|
|
|
|
* .eh_frame and .debug_frame sections of the ELF header.
|
|
|
|
*/
|
|
|
|
frame = get_eh_frame(mod, pc);
|
|
|
|
if (!frame) {
|
|
|
|
frame = get_dwarf_frame(mod, pc);
|
|
|
|
if (!frame)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ra_regno = dwarf_frame_info(frame, &start, &end, &signalp);
|
|
|
|
if (ra_regno < 0) {
|
|
|
|
pr_debug("Return address register unavailable: %s\n",
|
|
|
|
dwarf_errmsg(-1));
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = check_return_reg(ra_regno, frame);
|
|
|
|
|
|
|
|
out:
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The callchain saved by the kernel always includes the link register (LR).
|
|
|
|
*
|
|
|
|
* 0: PERF_CONTEXT_USER
|
|
|
|
* 1: Program counter (Next instruction pointer)
|
|
|
|
* 2: LR value
|
|
|
|
* 3: Caller's caller
|
|
|
|
* 4: ...
|
|
|
|
*
|
|
|
|
* The value in LR is only needed when it holds a return address. If the
|
|
|
|
* return address is on the stack, we should ignore the LR value.
|
|
|
|
*
|
|
|
|
* Further, when the return address is in the LR, if a new frame was just
|
|
|
|
* allocated but the LR was not saved into it, then the LR contains the
|
|
|
|
* caller, slot 4: contains the caller's caller and the contents of slot 3:
|
|
|
|
* (chain->ips[3]) is undefined and must be ignored.
|
|
|
|
*
|
|
|
|
* Use DWARF debug information to determine if any entries need to be skipped.
|
|
|
|
*
|
|
|
|
* Return:
|
|
|
|
* index: of callchain entry that needs to be ignored (if any)
|
|
|
|
* -1 if no entry needs to be ignored or in case of errors
|
|
|
|
*/
|
2014-10-23 15:50:25 +00:00
|
|
|
int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
|
2014-06-25 15:49:03 +00:00
|
|
|
{
|
|
|
|
struct addr_location al;
|
|
|
|
struct dso *dso = NULL;
|
|
|
|
int rc;
|
|
|
|
u64 ip;
|
|
|
|
u64 skip_slot = -1;
|
|
|
|
|
2018-06-11 10:40:49 +00:00
|
|
|
if (!chain || chain->nr < 3)
|
2014-06-25 15:49:03 +00:00
|
|
|
return skip_slot;
|
|
|
|
|
perf powerpc: Fix callchain ip filtering
For powerpc64, redundant entries in the callchain are filtered out by
determining the state of the return address and the stack frame using
DWARF debug information.
For making these filtering decisions we must analyze the debug
information for the location corresponding to the program counter value,
i.e. the first entry in the callchain, and not the LR value; otherwise,
perf may filter out either the second or the third entry in the
callchain incorrectly.
This can be observed on a powerpc64le system running Fedora 27 as shown
below.
Case 1 - Attaching a probe at inet_pton+0x8 (binary offset 0x15af28).
Return address is still in LR and a new stack frame is not yet
allocated. The LR value, i.e. the second entry, should not be
filtered out.
# objdump -d /usr/lib64/libc-2.26.so | less
...
000000000010eb10 <gaih_inet.constprop.7>:
...
10fa48: 78 bb e4 7e mr r4,r23
10fa4c: 0a 00 60 38 li r3,10
10fa50: d9 b4 04 48 bl 15af28 <inet_pton+0x8>
10fa54: 00 00 00 60 nop
10fa58: ac f4 ff 4b b 10ef04 <gaih_inet.constprop.7+0x3f4>
...
0000000000110450 <getaddrinfo>:
...
1105a8: 54 00 ff 38 addi r7,r31,84
1105ac: 58 00 df 38 addi r6,r31,88
1105b0: 69 e5 ff 4b bl 10eb18 <gaih_inet.constprop.7+0x8>
1105b4: 78 1b 71 7c mr r17,r3
1105b8: 50 01 7f e8 ld r3,336(r31)
...
000000000015af20 <inet_pton>:
15af20: 0b 00 4c 3c addis r2,r12,11
15af24: e0 c1 42 38 addi r2,r2,-15904
15af28: a6 02 08 7c mflr r0
15af2c: f0 ff c1 fb std r30,-16(r1)
15af30: f8 ff e1 fb std r31,-8(r1)
...
# perf probe -x /usr/lib64/libc-2.26.so -a inet_pton+0x8
# perf record -e probe_libc:inet_pton -g ping -6 -c 1 ::1
# perf script
Before:
ping 4507 [002] 514985.546540: probe_libc:inet_pton: (7fffa7dbaf28)
7fffa7dbaf28 __GI___inet_pton+0x8 (/usr/lib64/libc-2.26.so)
7fffa7d705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so)
13fb52d70 _init+0xbfc (/usr/bin/ping)
7fffa7c836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so)
7fffa7c83898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so)
0 [unknown] ([unknown])
After:
ping 4507 [002] 514985.546540: probe_libc:inet_pton: (7fffa7dbaf28)
7fffa7dbaf28 __GI___inet_pton+0x8 (/usr/lib64/libc-2.26.so)
7fffa7d6fa54 gaih_inet.constprop.7+0xf44 (/usr/lib64/libc-2.26.so)
7fffa7d705b4 getaddrinfo+0x164 (/usr/lib64/libc-2.26.so)
13fb52d70 _init+0xbfc (/usr/bin/ping)
7fffa7c836a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so)
7fffa7c83898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so)
0 [unknown] ([unknown])
Case 2 - Attaching a probe at _int_malloc+0x180 (binary offset 0x9cf10).
Return address in still in LR and a new stack frame has already
been allocated but not used. The caller's caller, i.e. the third
entry, is invalid and should be filtered out and not the second
one.
# objdump -d /usr/lib64/libc-2.26.so | less
...
000000000009cd90 <_int_malloc>:
9cd90: 17 00 4c 3c addis r2,r12,23
9cd94: 70 a3 42 38 addi r2,r2,-23696
9cd98: 26 00 80 7d mfcr r12
9cd9c: f8 ff e1 fb std r31,-8(r1)
9cda0: 17 00 e4 3b addi r31,r4,23
9cda4: d8 ff 61 fb std r27,-40(r1)
9cda8: 78 23 9b 7c mr r27,r4
9cdac: 1f 00 bf 2b cmpldi cr7,r31,31
9cdb0: f0 ff c1 fb std r30,-16(r1)
9cdb4: b0 ff c1 fa std r22,-80(r1)
9cdb8: 78 1b 7e 7c mr r30,r3
9cdbc: 08 00 81 91 stw r12,8(r1)
9cdc0: 11 ff 21 f8 stdu r1,-240(r1)
9cdc4: 4c 01 9d 41 bgt cr7,9cf10 <_int_malloc+0x180>
9cdc8: 20 00 a4 2b cmpldi cr7,r4,32
...
9cf08: 00 00 00 60 nop
9cf0c: 00 00 42 60 ori r2,r2,0
9cf10: e4 06 ff 7b rldicr r31,r31,0,59
9cf14: 40 f8 a4 7f cmpld cr7,r4,r31
9cf18: 68 05 9d 41 bgt cr7,9d480 <_int_malloc+0x6f0>
...
000000000009e3c0 <tcache_init.part.4>:
...
9e420: 40 02 80 38 li r4,576
9e424: 78 fb e3 7f mr r3,r31
9e428: 71 e9 ff 4b bl 9cd98 <_int_malloc+0x8>
9e42c: 00 00 a3 2f cmpdi cr7,r3,0
9e430: 78 1b 7e 7c mr r30,r3
...
000000000009f7a0 <__libc_malloc>:
...
9f8f8: 00 00 89 2f cmpwi cr7,r9,0
9f8fc: 1c ff 9e 40 bne cr7,9f818 <__libc_malloc+0x78>
9f900: c9 ea ff 4b bl 9e3c8 <tcache_init.part.4+0x8>
9f904: 00 00 00 60 nop
9f908: e8 90 22 e9 ld r9,-28440(r2)
...
# perf probe -x /usr/lib64/libc-2.26.so -a _int_malloc+0x180
# perf record -e probe_libc:_int_malloc -g ./test-malloc
# perf script
Before:
test-malloc 6554 [009] 515975.797403: probe_libc:_int_malloc: (7fffa6e6cf10)
7fffa6e6cf10 _int_malloc+0x180 (/usr/lib64/libc-2.26.so)
7fffa6dd0000 [unknown] (/usr/lib64/libc-2.26.so)
7fffa6e6f904 malloc+0x164 (/usr/lib64/libc-2.26.so)
7fffa6e6f9fc malloc+0x25c (/usr/lib64/libc-2.26.so)
100006b4 main+0x38 (/home/testuser/test-malloc)
7fffa6df36a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so)
7fffa6df3898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so)
0 [unknown] ([unknown])
After:
test-malloc 6554 [009] 515975.797403: probe_libc:_int_malloc: (7fffa6e6cf10)
7fffa6e6cf10 _int_malloc+0x180 (/usr/lib64/libc-2.26.so)
7fffa6e6e42c tcache_init.part.4+0x6c (/usr/lib64/libc-2.26.so)
7fffa6e6f904 malloc+0x164 (/usr/lib64/libc-2.26.so)
7fffa6e6f9fc malloc+0x25c (/usr/lib64/libc-2.26.so)
100006b4 main+0x38 (/home/sandipan/test-malloc)
7fffa6df36a0 generic_start_main.isra.0+0x140 (/usr/lib64/libc-2.26.so)
7fffa6df3898 __libc_start_main+0xb8 (/usr/lib64/libc-2.26.so)
0 [unknown] ([unknown])
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Maynard Johnson <maynard@us.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Fixes: a60335ba3298 ("perf tools powerpc: Adjust callchain based on DWARF debug info")
Link: http://lkml.kernel.org/r/24bb726d91ed173aebc972ec3f41a2ef2249434e.1530724939.git.sandipan@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-07-10 13:58:13 +00:00
|
|
|
ip = chain->ips[1];
|
2014-06-25 15:49:03 +00:00
|
|
|
|
2018-04-24 14:24:49 +00:00
|
|
|
thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
|
2014-06-25 15:49:03 +00:00
|
|
|
|
|
|
|
if (al.map)
|
|
|
|
dso = al.map->dso;
|
|
|
|
|
|
|
|
if (!dso) {
|
|
|
|
pr_debug("%" PRIx64 " dso is NULL\n", ip);
|
|
|
|
return skip_slot;
|
|
|
|
}
|
|
|
|
|
2014-11-22 01:33:53 +00:00
|
|
|
rc = check_return_addr(dso, al.map->start, ip);
|
2014-06-25 15:49:03 +00:00
|
|
|
|
2014-11-22 01:33:53 +00:00
|
|
|
pr_debug("[DSO %s, sym %s, ip 0x%" PRIx64 "] rc %d\n",
|
|
|
|
dso->long_name, al.sym->name, ip, rc);
|
2014-06-25 15:49:03 +00:00
|
|
|
|
|
|
|
if (rc == 0) {
|
|
|
|
/*
|
|
|
|
* Return address on stack. Ignore LR value in callchain
|
|
|
|
*/
|
|
|
|
skip_slot = 2;
|
|
|
|
} else if (rc == 2) {
|
|
|
|
/*
|
|
|
|
* New frame allocated but return address still in LR.
|
|
|
|
* Ignore the caller's caller entry in callchain.
|
|
|
|
*/
|
|
|
|
skip_slot = 3;
|
|
|
|
}
|
|
|
|
return skip_slot;
|
|
|
|
}
|