forked from Minki/linux
perf/core improvements and fixes:
- Support Intel PT in several tools, enabling the use of the processor trace feature introduced in Intel Broadwell processors: (Adrian Hunter) # dmesg | grep Performance # [0.188477] Performance Events: PEBS fmt2+, 16-deep LBR, Broadwell events, full-width counters, Intel PMU driver. # perf record -e intel_pt//u -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.216 MB perf.data ] # perf script # then navigate in the tool output to some area, like this one: 184 1030 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba661440 dl_main (/usr/lib64/ld-2.17.so) 185 1457 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba669f10 _dl_new_object (/usr/lib64/ld-2.17.so) 186 9f37 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba677b90 strlen (/usr/lib64/ld-2.17.so) 187 7ba3 strlen (/usr/lib64/ld-2.17.so) => 7f21ba677c75 strlen (/usr/lib64/ld-2.17.so) 188 7c78 strlen (/usr/lib64/ld-2.17.so) => 7f21ba669f3c _dl_new_object (/usr/lib64/ld-2.17.so) 189 9f8a _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba65fab0 calloc@plt (/usr/lib64/ld-2.17.so) 190 fab0 calloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e70 calloc (/usr/lib64/ld-2.17.so) 191 5e87 calloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa90 malloc@plt (/usr/lib64/ld-2.17.so) 192 fa90 malloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e60 malloc (/usr/lib64/ld-2.17.so) 193 5e68 malloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) 194 fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) => 7f21ba675d50 __libc_memalign (/usr/lib64/ld-2.17.so) 195 5d63 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e20 __libc_memalign (/usr/lib64/ld-2.17.so) 196 5e40 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675d73 __libc_memalign (/usr/lib64/ld-2.17.so) 197 5d97 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e18 __libc_memalign (/usr/lib64/ld-2.17.so) 198 5e1e __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675df9 __libc_memalign (/usr/lib64/ld-2.17.so) 199 5e10 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba669f8f _dl_new_object (/usr/lib64/ld-2.17.so) 200 9fc2 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba678e70 memcpy (/usr/lib64/ld-2.17.so) 201 8e8c memcpy (/usr/lib64/ld-2.17.so) => 7f21ba678ea0 memcpy (/usr/lib64/ld-2.17.so) - Fix annotation of vdso (Adrian Hunter) - Fix DWARF callchains in 'perf script' (Jiri Olsa) - Fix adding probes in kernel syscalls and listing which variables can be collected at kernel syscall function lines (Masami Hiramatsu) Build Fixes: - Fix 32-bit compilation error in util/annotate.c (Adrian Hunter) - Support static linking with libdw on Fedora 22 (Andi Kleen) Infrastructure: - Add a helper function to probe whether cpu-wide tracing is possible (Adrian Hunter) - Move vfs_getname storage to per thread area in 'perf trace' (Arnaldo Carvalho de Melo) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJV0jGMAAoJENZQFvNTUqpAZbUQAIiL/kt1TtOYwF1sXHJ9sUCZ vwAnk40Iqfd1AbXeIo4rUfrSufxi2AA5vvAIo7rNtS9sjAVnDAtqzqaNZ5Z5RP8m cbUcF8CUcoShdYSVbuEY2D0EF++E+xMny54FEmU8i4D5yXCiuIC1LGauWifJF+po wEk/r4hb8HbTMy0oKNZMcTp5lIc0zeUqRVLWzSehMHcAk5wERLhLWm2nBvWzH8gU ERDVYxSUcv0j+CaLRxUPyE1V3TTqjjY5YN2MiqndTaBFFnHwJhxoxNUK6P2RmEJd PA9OA7cLUUTGvG+u1kLZj3L5fjcOOPJvvU0gYDZUJNMX+6E9QGcqr9CfpDHsGzu4 UsIsQcmlZ/n3DbVrS5i1XOFmWu/f34v916zhm1fBqJmVuu3EmtHBuvSyjmxzzezY 2rwtrD0Z2k9xixAtZ+opFQb6tOKOrdl2TyVzlKdMslrWYRMAxeoSTAIZbynlFkO1 U/G2edOb0RZo3WZmNwvDxKKOh1Uv+kI9vosEnS36dl2Y9u9joF4UtlIX2jG7Kb3b 0QVb/r4DkKmtywW+CIXtPO4b6/I1NLD3HZRs0KnsVxicoo3B3hR2ej/0Ik+TW6hJ 0E41hiHyzx41ppzBDfyfrUdcgi1yum/R2aeob7XLCVD7b1Qv/cIOk/+85NAXC113 bb4zjKQLx4vLGVYDLZe+ =ZmA9 -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Support Intel PT in several tools, enabling the use of the processor trace feature introduced in Intel Broadwell processors: (Adrian Hunter) # dmesg | grep Performance # [0.188477] Performance Events: PEBS fmt2+, 16-deep LBR, Broadwell events, full-width counters, Intel PMU driver. # perf record -e intel_pt//u -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.216 MB perf.data ] # perf script # then navigate in the tool output to some area, like this one: 184 1030 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba661440 dl_main (/usr/lib64/ld-2.17.so) 185 1457 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba669f10 _dl_new_object (/usr/lib64/ld-2.17.so) 186 9f37 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba677b90 strlen (/usr/lib64/ld-2.17.so) 187 7ba3 strlen (/usr/lib64/ld-2.17.so) => 7f21ba677c75 strlen (/usr/lib64/ld-2.17.so) 188 7c78 strlen (/usr/lib64/ld-2.17.so) => 7f21ba669f3c _dl_new_object (/usr/lib64/ld-2.17.so) 189 9f8a _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba65fab0 calloc@plt (/usr/lib64/ld-2.17.so) 190 fab0 calloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e70 calloc (/usr/lib64/ld-2.17.so) 191 5e87 calloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa90 malloc@plt (/usr/lib64/ld-2.17.so) 192 fa90 malloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e60 malloc (/usr/lib64/ld-2.17.so) 193 5e68 malloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) 194 fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) => 7f21ba675d50 __libc_memalign (/usr/lib64/ld-2.17.so) 195 5d63 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e20 __libc_memalign (/usr/lib64/ld-2.17.so) 196 5e40 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675d73 __libc_memalign (/usr/lib64/ld-2.17.so) 197 5d97 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e18 __libc_memalign (/usr/lib64/ld-2.17.so) 198 5e1e __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675df9 __libc_memalign (/usr/lib64/ld-2.17.so) 199 5e10 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba669f8f _dl_new_object (/usr/lib64/ld-2.17.so) 200 9fc2 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba678e70 memcpy (/usr/lib64/ld-2.17.so) 201 8e8c memcpy (/usr/lib64/ld-2.17.so) => 7f21ba678ea0 memcpy (/usr/lib64/ld-2.17.so) - Fix annotation of vdso (Adrian Hunter) - Fix DWARF callchains in 'perf script' (Jiri Olsa) - Fix adding probes in kernel syscalls and listing which variables can be collected at kernel syscall function lines (Masami Hiramatsu) Build Fixes: - Fix 32-bit compilation error in util/annotate.c (Adrian Hunter) - Support static linking with libdw on Fedora 22 (Andi Kleen) Infrastructure changes: - Add a helper function to probe whether cpu-wide tracing is possible (Adrian Hunter) - Move vfs_getname storage to per thread area in 'perf trace' (Arnaldo Carvalho de Melo) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
dd2281be03
@ -57,6 +57,8 @@ quiet_cmd_cc_i_c = CPP $@
|
||||
quiet_cmd_cc_s_c = AS $@
|
||||
cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
|
||||
|
||||
quiet_cmd_gen = GEN $@
|
||||
|
||||
# Link agregate command
|
||||
# If there's nothing to link, create empty $@ object.
|
||||
quiet_cmd_ld_multi = LD $@
|
||||
|
@ -70,8 +70,13 @@ test-libelf.bin:
|
||||
test-glibc.bin:
|
||||
$(BUILD)
|
||||
|
||||
DWARFLIBS := -ldw
|
||||
ifeq ($(findstring -static,${LDFLAGS}),-static)
|
||||
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
|
||||
endif
|
||||
|
||||
test-dwarf.bin:
|
||||
$(BUILD) -ldw
|
||||
$(BUILD) $(DWARFLIBS)
|
||||
|
||||
test-libelf-mmap.bin:
|
||||
$(BUILD) -lelf
|
||||
|
1
tools/perf/.gitignore
vendored
1
tools/perf/.gitignore
vendored
@ -29,3 +29,4 @@ config.mak.autogen
|
||||
*.pyc
|
||||
*.pyo
|
||||
.config-detected
|
||||
util/intel-pt-decoder/inat-tables.c
|
||||
|
588
tools/perf/Documentation/intel-pt.txt
Normal file
588
tools/perf/Documentation/intel-pt.txt
Normal file
@ -0,0 +1,588 @@
|
||||
Intel Processor Trace
|
||||
=====================
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
|
||||
collects information about software execution such as control flow, execution
|
||||
modes and timings and formats it into highly compressed binary packets.
|
||||
Technical details are documented in the Intel 64 and IA-32 Architectures
|
||||
Software Developer Manuals, Chapter 36 Intel Processor Trace.
|
||||
|
||||
Intel PT is first supported in Intel Core M and 5th generation Intel Core
|
||||
processors that are based on the Intel micro-architecture code name Broadwell.
|
||||
|
||||
Trace data is collected by 'perf record' and stored within the perf.data file.
|
||||
See below for options to 'perf record'.
|
||||
|
||||
Trace data must be 'decoded' which involves walking the object code and matching
|
||||
the trace data packets. For example a TNT packet only tells whether a
|
||||
conditional branch was taken or not taken, so to make use of that packet the
|
||||
decoder must know precisely which instruction was being executed.
|
||||
|
||||
Decoding is done on-the-fly. The decoder outputs samples in the same format as
|
||||
samples output by perf hardware events, for example as though the "instructions"
|
||||
or "branches" events had been recorded. Presently 3 tools support this:
|
||||
'perf script', 'perf report' and 'perf inject'. See below for more information
|
||||
on using those tools.
|
||||
|
||||
The main distinguishing feature of Intel PT is that the decoder can determine
|
||||
the exact flow of software execution. Intel PT can be used to understand why
|
||||
and how did software get to a certain point, or behave a certain way. The
|
||||
software does not have to be recompiled, so Intel PT works with debug or release
|
||||
builds, however the executed images are needed - which makes use in JIT-compiled
|
||||
environments, or with self-modified code, a challenge. Also symbols need to be
|
||||
provided to make sense of addresses.
|
||||
|
||||
A limitation of Intel PT is that it produces huge amounts of trace data
|
||||
(hundreds of megabytes per second per core) which takes a long time to decode,
|
||||
for example two or three orders of magnitude longer than it took to collect.
|
||||
Another limitation is the performance impact of tracing, something that will
|
||||
vary depending on the use-case and architecture.
|
||||
|
||||
|
||||
Quickstart
|
||||
==========
|
||||
|
||||
It is important to start small. That is because it is easy to capture vastly
|
||||
more data than can possibly be processed.
|
||||
|
||||
The simplest thing to do with Intel PT is userspace profiling of small programs.
|
||||
Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
|
||||
|
||||
perf record -e intel_pt//u ls
|
||||
|
||||
And profiled with 'perf report' e.g.
|
||||
|
||||
perf report
|
||||
|
||||
To also trace kernel space presents a problem, namely kernel self-modifying
|
||||
code. A fairly good kernel image is available in /proc/kcore but to get an
|
||||
accurate image a copy of /proc/kcore needs to be made under the same conditions
|
||||
as the data capture. A script perf-with-kcore can do that, but beware that the
|
||||
script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
|
||||
locally from the source tree you can do:
|
||||
|
||||
~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
|
||||
|
||||
which will create a directory named 'pt_ls' and put the perf.data file and
|
||||
copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
|
||||
'perf report' becomes:
|
||||
|
||||
~/libexec/perf-core/perf-with-kcore report pt_ls
|
||||
|
||||
Because samples are synthesized after-the-fact, the sampling period can be
|
||||
selected for reporting. e.g. sample every microsecond
|
||||
|
||||
~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
|
||||
|
||||
See the sections below for more information about the --itrace option.
|
||||
|
||||
Beware the smaller the period, the more samples that are produced, and the
|
||||
longer it takes to process them.
|
||||
|
||||
Also note that the coarseness of Intel PT timing information will start to
|
||||
distort the statistical value of the sampling as the sampling period becomes
|
||||
smaller.
|
||||
|
||||
To represent software control flow, "branches" samples are produced. By default
|
||||
a branch sample is synthesized for every single branch. To get an idea what
|
||||
data is available you can use the 'perf script' tool with no parameters, which
|
||||
will list all the samples.
|
||||
|
||||
perf record -e intel_pt//u ls
|
||||
perf script
|
||||
|
||||
An interesting field that is not printed by default is 'flags' which can be
|
||||
displayed as follows:
|
||||
|
||||
perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags
|
||||
|
||||
The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
|
||||
system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
|
||||
in transaction, respectively.
|
||||
|
||||
While it is possible to create scripts to analyze the data, an alternative
|
||||
approach is available to export the data to a postgresql database. Refer to
|
||||
script export-to-postgresql.py for more details, and to script
|
||||
call-graph-from-postgresql.py for an example of using the database.
|
||||
|
||||
As mentioned above, it is easy to capture too much data. One way to limit the
|
||||
data captured is to use 'snapshot' mode which is explained further below.
|
||||
Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
|
||||
|
||||
Another problem that will be experienced is decoder errors. They can be caused
|
||||
by inability to access the executed image, self-modified or JIT-ed code, or the
|
||||
inability to match side-band information (such as context switches and mmaps)
|
||||
which results in the decoder not knowing what code was executed.
|
||||
|
||||
There is also the problem of perf not being able to copy the data fast enough,
|
||||
resulting in data lost because the buffer was full. See 'Buffer handling' below
|
||||
for more details.
|
||||
|
||||
|
||||
perf record
|
||||
===========
|
||||
|
||||
new event
|
||||
---------
|
||||
|
||||
The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
|
||||
selected by providing the PMU name followed by the "config" separated by slashes.
|
||||
An enhancement has been made to allow default "config" e.g. the option
|
||||
|
||||
-e intel_pt//
|
||||
|
||||
will use a default config value. Currently that is the same as
|
||||
|
||||
-e intel_pt/tsc,noretcomp=0/
|
||||
|
||||
which is the same as
|
||||
|
||||
-e intel_pt/tsc=1,noretcomp=0/
|
||||
|
||||
The config terms are listed in /sys/devices/intel_pt/format. They are bit
|
||||
fields within the config member of the struct perf_event_attr which is
|
||||
passed to the kernel by the perf_event_open system call. They correspond to bit
|
||||
fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
|
||||
|
||||
$ for f in `ls /sys/devices/intel_pt/format`;do
|
||||
> echo $f
|
||||
> cat /sys/devices/intel_pt/format/$f
|
||||
> done
|
||||
noretcomp
|
||||
config:11
|
||||
tsc
|
||||
config:10
|
||||
|
||||
Note that the default config must be overridden for each term i.e.
|
||||
|
||||
-e intel_pt/noretcomp=0/
|
||||
|
||||
is the same as:
|
||||
|
||||
-e intel_pt/tsc=1,noretcomp=0/
|
||||
|
||||
So, to disable TSC packets use:
|
||||
|
||||
-e intel_pt/tsc=0/
|
||||
|
||||
It is also possible to specify the config value explicitly:
|
||||
|
||||
-e intel_pt/config=0x400/
|
||||
|
||||
Note that, as with all events, the event is suffixed with event modifiers:
|
||||
|
||||
u userspace
|
||||
k kernel
|
||||
h hypervisor
|
||||
G guest
|
||||
H host
|
||||
p precise ip
|
||||
|
||||
'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
|
||||
'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
|
||||
meaningful for Intel PT.
|
||||
|
||||
perf_event_attr is displayed if the -vv option is used e.g.
|
||||
|
||||
------------------------------------------------------------
|
||||
perf_event_attr:
|
||||
type 6
|
||||
size 112
|
||||
config 0x400
|
||||
{ sample_period, sample_freq } 1
|
||||
sample_type IP|TID|TIME|CPU|IDENTIFIER
|
||||
read_format ID
|
||||
disabled 1
|
||||
inherit 1
|
||||
exclude_kernel 1
|
||||
exclude_hv 1
|
||||
enable_on_exec 1
|
||||
sample_id_all 1
|
||||
------------------------------------------------------------
|
||||
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
|
||||
------------------------------------------------------------
|
||||
|
||||
|
||||
new snapshot option
|
||||
-------------------
|
||||
|
||||
To select snapshot mode a new option has been added:
|
||||
|
||||
-S
|
||||
|
||||
Optionally it can be followed by the snapshot size e.g.
|
||||
|
||||
-S0x100000
|
||||
|
||||
The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
|
||||
nor snapshot size is specified, then the default is 4MiB for privileged users
|
||||
(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
|
||||
If an unprivileged user does not specify mmap pages, the mmap pages will be
|
||||
reduced as described in the 'new auxtrace mmap size option' section below.
|
||||
|
||||
The snapshot size is displayed if the option -vv is used e.g.
|
||||
|
||||
Intel PT snapshot size: %zu
|
||||
|
||||
|
||||
new auxtrace mmap size option
|
||||
---------------------------
|
||||
|
||||
Intel PT buffer size is specified by an addition to the -m option e.g.
|
||||
|
||||
-m,16
|
||||
|
||||
selects a buffer size of 16 pages i.e. 64KiB.
|
||||
|
||||
Note that the existing functionality of -m is unchanged. The auxtrace mmap size
|
||||
is specified by the optional addition of a comma and the value.
|
||||
|
||||
The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
|
||||
(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
|
||||
If an unprivileged user does not specify mmap pages, the mmap pages will be
|
||||
reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
|
||||
user is likely to get an error as they exceed their mlock limit (Max locked
|
||||
memory as shown in /proc/self/limits). Note that perf does not count the first
|
||||
512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
|
||||
against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
|
||||
their mlock limit (which defaults to 64KiB but is not multiplied by the number
|
||||
of cpus).
|
||||
|
||||
In full-trace mode, powers of two are allowed for buffer size, with a minimum
|
||||
size of 2 pages. In snapshot mode, it is the same but the minimum size is
|
||||
1 page.
|
||||
|
||||
The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
|
||||
|
||||
mmap length 528384
|
||||
auxtrace mmap length 4198400
|
||||
|
||||
|
||||
Intel PT modes of operation
|
||||
---------------------------
|
||||
|
||||
Intel PT can be used in 2 modes:
|
||||
full-trace mode
|
||||
snapshot mode
|
||||
|
||||
Full-trace mode traces continuously e.g.
|
||||
|
||||
perf record -e intel_pt//u uname
|
||||
|
||||
Snapshot mode captures the available data when a signal is sent e.g.
|
||||
|
||||
perf record -v -e intel_pt//u -S ./loopy 1000000000 &
|
||||
[1] 11435
|
||||
kill -USR2 11435
|
||||
Recording AUX area tracing snapshot
|
||||
|
||||
Note that the signal sent is SIGUSR2.
|
||||
Note that "Recording AUX area tracing snapshot" is displayed because the -v
|
||||
option is used.
|
||||
|
||||
The 2 modes cannot be used together.
|
||||
|
||||
|
||||
Buffer handling
|
||||
---------------
|
||||
|
||||
There may be buffer limitations (i.e. single ToPa entry) which means that actual
|
||||
buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
|
||||
provide other sizes, and in particular an arbitrarily large size, multiple
|
||||
buffers are logically concatenated. However an interrupt must be used to switch
|
||||
between buffers. That has two potential problems:
|
||||
a) the interrupt may not be handled in time so that the current buffer
|
||||
becomes full and some trace data is lost.
|
||||
b) the interrupts may slow the system and affect the performance
|
||||
results.
|
||||
|
||||
If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
|
||||
which the tools report as an error.
|
||||
|
||||
In full-trace mode, the driver waits for data to be copied out before allowing
|
||||
the (logical) buffer to wrap-around. If data is not copied out quickly enough,
|
||||
again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
|
||||
wait, the intel_pt event gets disabled. Because it is difficult to know when
|
||||
that happens, perf tools always re-enable the intel_pt event after copying out
|
||||
data.
|
||||
|
||||
|
||||
Intel PT and build ids
|
||||
----------------------
|
||||
|
||||
By default "perf record" post-processes the event stream to find all build ids
|
||||
for executables for all addresses sampled. Deliberately, Intel PT is not
|
||||
decoded for that purpose (it would take too long). Instead the build ids for
|
||||
all executables encountered (due to mmap, comm or task events) are included
|
||||
in the perf.data file.
|
||||
|
||||
To see buildids included in the perf.data file use the command:
|
||||
|
||||
perf buildid-list
|
||||
|
||||
If the perf.data file contains Intel PT data, that is the same as:
|
||||
|
||||
perf buildid-list --with-hits
|
||||
|
||||
|
||||
Snapshot mode and event disabling
|
||||
---------------------------------
|
||||
|
||||
In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
|
||||
namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
|
||||
collection of side-band information. In order to prevent that, a dummy
|
||||
software event has been introduced that permits tracking events (like mmaps) to
|
||||
continue to be recorded while intel_pt is disabled. That is important to ensure
|
||||
there is complete side-band information to allow the decoding of subsequent
|
||||
snapshots.
|
||||
|
||||
A test has been created for that. To find the test:
|
||||
|
||||
perf test list
|
||||
...
|
||||
23: Test using a dummy software event to keep tracking
|
||||
|
||||
To run the test:
|
||||
|
||||
perf test 23
|
||||
23: Test using a dummy software event to keep tracking : Ok
|
||||
|
||||
|
||||
perf record modes (nothing new here)
|
||||
------------------------------------
|
||||
|
||||
perf record essentially operates in one of three modes:
|
||||
per thread
|
||||
per cpu
|
||||
workload only
|
||||
|
||||
"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
|
||||
workload).
|
||||
"per cpu" is selected by -C or -a.
|
||||
"workload only" mode is selected by not using the other options but providing a
|
||||
command to run (i.e. the workload).
|
||||
|
||||
In per-thread mode an exact list of threads is traced. There is no inheritance.
|
||||
Each thread has its own event buffer.
|
||||
|
||||
In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
|
||||
option, or processes selected with -p or -u) are traced. Each cpu has its own
|
||||
buffer. Inheritance is allowed.
|
||||
|
||||
In workload-only mode, the workload is traced but with per-cpu buffers.
|
||||
Inheritance is allowed. Note that you can now trace a workload in per-thread
|
||||
mode by using the --per-thread option.
|
||||
|
||||
|
||||
Privileged vs non-privileged users
|
||||
----------------------------------
|
||||
|
||||
Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
|
||||
have memory limits imposed upon them. That affects what buffer sizes they can
|
||||
have as outlined above.
|
||||
|
||||
Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
|
||||
not permitted to use tracepoints which means there is insufficient side-band
|
||||
information to decode Intel PT in per-cpu mode, and potentially workload-only
|
||||
mode too if the workload creates new processes.
|
||||
|
||||
Note also, that to use tracepoints, read-access to debugfs is required. So if
|
||||
debugfs is not mounted or the user does not have read-access, it will again not
|
||||
be possible to decode Intel PT in per-cpu mode.
|
||||
|
||||
|
||||
sched_switch tracepoint
|
||||
-----------------------
|
||||
|
||||
The sched_switch tracepoint is used to provide side-band data for Intel PT
|
||||
decoding. sched_switch events are automatically added. e.g. the second event
|
||||
shown below
|
||||
|
||||
$ perf record -vv -e intel_pt//u uname
|
||||
------------------------------------------------------------
|
||||
perf_event_attr:
|
||||
type 6
|
||||
size 112
|
||||
config 0x400
|
||||
{ sample_period, sample_freq } 1
|
||||
sample_type IP|TID|TIME|CPU|IDENTIFIER
|
||||
read_format ID
|
||||
disabled 1
|
||||
inherit 1
|
||||
exclude_kernel 1
|
||||
exclude_hv 1
|
||||
enable_on_exec 1
|
||||
sample_id_all 1
|
||||
------------------------------------------------------------
|
||||
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
|
||||
------------------------------------------------------------
|
||||
perf_event_attr:
|
||||
type 2
|
||||
size 112
|
||||
config 0x108
|
||||
{ sample_period, sample_freq } 1
|
||||
sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
|
||||
read_format ID
|
||||
inherit 1
|
||||
sample_id_all 1
|
||||
exclude_guest 1
|
||||
------------------------------------------------------------
|
||||
sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
|
||||
------------------------------------------------------------
|
||||
perf_event_attr:
|
||||
type 1
|
||||
size 112
|
||||
config 0x9
|
||||
{ sample_period, sample_freq } 1
|
||||
sample_type IP|TID|TIME|IDENTIFIER
|
||||
read_format ID
|
||||
disabled 1
|
||||
inherit 1
|
||||
exclude_kernel 1
|
||||
exclude_hv 1
|
||||
mmap 1
|
||||
comm 1
|
||||
enable_on_exec 1
|
||||
task 1
|
||||
sample_id_all 1
|
||||
mmap2 1
|
||||
comm_exec 1
|
||||
------------------------------------------------------------
|
||||
sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
|
||||
sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
|
||||
mmap size 528384B
|
||||
AUX area mmap length 4194304
|
||||
perf event ring buffer mmapped per cpu
|
||||
Synthesizing auxtrace information
|
||||
Linux
|
||||
[ perf record: Woken up 1 times to write data ]
|
||||
[ perf record: Captured and wrote 0.042 MB perf.data ]
|
||||
|
||||
Note, the sched_switch event is only added if the user is permitted to use it
|
||||
and only in per-cpu mode.
|
||||
|
||||
Note also, the sched_switch event is only added if TSC packets are requested.
|
||||
That is because, in the absence of timing information, the sched_switch events
|
||||
cannot be matched against the Intel PT trace.
|
||||
|
||||
|
||||
perf script
|
||||
===========
|
||||
|
||||
By default, perf script will decode trace data found in the perf.data file.
|
||||
This can be further controlled by new option --itrace.
|
||||
|
||||
|
||||
New --itrace option
|
||||
-------------------
|
||||
|
||||
Having no option is the same as
|
||||
|
||||
--itrace
|
||||
|
||||
which, in turn, is the same as
|
||||
|
||||
--itrace=ibxe
|
||||
|
||||
The letters are:
|
||||
|
||||
i synthesize "instructions" events
|
||||
b synthesize "branches" events
|
||||
x synthesize "transactions" events
|
||||
c synthesize branches events (calls only)
|
||||
r synthesize branches events (returns only)
|
||||
e synthesize tracing error events
|
||||
d create a debug log
|
||||
g synthesize a call chain (use with i or x)
|
||||
|
||||
"Instructions" events look like they were recorded by "perf record -e
|
||||
instructions".
|
||||
|
||||
"Branches" events look like they were recorded by "perf record -e branches". "c"
|
||||
and "r" can be combined to get calls and returns.
|
||||
|
||||
"Transactions" events correspond to the start or end of transactions. The
|
||||
'flags' field can be used in perf script to determine whether the event is a
|
||||
tranasaction start, commit or abort.
|
||||
|
||||
Error events are new. They show where the decoder lost the trace. Error events
|
||||
are quite important. Users must know if what they are seeing is a complete
|
||||
picture or not.
|
||||
|
||||
The "d" option will cause the creation of a file "intel_pt.log" containing all
|
||||
decoded packets and instructions. Note that this option slows down the decoder
|
||||
and that the resulting file may be very large.
|
||||
|
||||
In addition, the period of the "instructions" event can be specified. e.g.
|
||||
|
||||
--itrace=i10us
|
||||
|
||||
sets the period to 10us i.e. one instruction sample is synthesized for each 10
|
||||
microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
|
||||
"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
|
||||
|
||||
"ms", "us" and "ns" are converted to TSC ticks.
|
||||
|
||||
The timing information included with Intel PT does not give the time of every
|
||||
instruction. Consequently, for the purpose of sampling, the decoder estimates
|
||||
the time since the last timing packet based on 1 tick per instruction. The time
|
||||
on the sample is *not* adjusted and reflects the last known value of TSC.
|
||||
|
||||
For Intel PT, the default period is 100us.
|
||||
|
||||
Also the call chain size (default 16, max. 1024) for instructions or
|
||||
transactions events can be specified. e.g.
|
||||
|
||||
--itrace=ig32
|
||||
--itrace=xg32
|
||||
|
||||
To disable trace decoding entirely, use the option --no-itrace.
|
||||
|
||||
|
||||
dump option
|
||||
-----------
|
||||
|
||||
perf script has an option (-D) to "dump" the events i.e. display the binary
|
||||
data.
|
||||
|
||||
When -D is used, Intel PT packets are displayed. The packet decoder does not
|
||||
pay attention to PSB packets, but just decodes the bytes - so the packets seen
|
||||
by the actual decoder may not be identical in places where the data is corrupt.
|
||||
One example of that would be when the buffer-switching interrupt has been too
|
||||
slow, and the buffer has been filled completely. In that case, the last packet
|
||||
in the buffer might be truncated and immediately followed by a PSB as the trace
|
||||
continues in the next buffer.
|
||||
|
||||
To disable the display of Intel PT packets, combine the -D option with
|
||||
--no-itrace.
|
||||
|
||||
|
||||
perf report
|
||||
===========
|
||||
|
||||
By default, perf report will decode trace data found in the perf.data file.
|
||||
This can be further controlled by new option --itrace exactly the same as
|
||||
perf script, with the exception that the default is --itrace=igxe.
|
||||
|
||||
|
||||
perf inject
|
||||
===========
|
||||
|
||||
perf inject also accepts the --itrace option in which case tracing data is
|
||||
removed and replaced with the synthesized events. e.g.
|
||||
|
||||
perf inject --itrace -i perf.data -o perf.data.new
|
@ -76,6 +76,12 @@ include config/utilities.mak
|
||||
#
|
||||
# Define NO_AUXTRACE if you do not want AUX area tracing support
|
||||
|
||||
# As per kernel Makefile, avoid funny character set dependencies
|
||||
unexport LC_ALL
|
||||
LC_COLLATE=C
|
||||
LC_NUMERIC=C
|
||||
export LC_COLLATE LC_NUMERIC
|
||||
|
||||
ifeq ($(srctree),)
|
||||
srctree := $(patsubst %/,%,$(dir $(shell pwd)))
|
||||
srctree := $(patsubst %/,%,$(dir $(srctree)))
|
||||
@ -135,6 +141,7 @@ INSTALL = install
|
||||
FLEX = flex
|
||||
BISON = bison
|
||||
STRIP = strip
|
||||
AWK = awk
|
||||
|
||||
LIB_DIR = $(srctree)/tools/lib/api/
|
||||
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
|
||||
@ -289,7 +296,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf
|
||||
|
||||
PERF_IN := $(OUTPUT)perf-in.o
|
||||
|
||||
export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX
|
||||
export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
|
||||
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
|
||||
|
||||
$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
|
||||
@ -565,7 +572,8 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean
|
||||
$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
|
||||
$(Q)$(RM) $(OUTPUT).config-detected
|
||||
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
|
||||
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
|
||||
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
|
||||
$(OUTPUT)util/intel-pt-decoder/inat-tables.c
|
||||
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
|
||||
$(python-clean)
|
||||
|
||||
|
@ -1,8 +1,12 @@
|
||||
libperf-y += header.o
|
||||
libperf-y += tsc.o
|
||||
libperf-y += pmu.o
|
||||
libperf-y += kvm-stat.o
|
||||
|
||||
libperf-$(CONFIG_DWARF) += dwarf-regs.o
|
||||
|
||||
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
|
||||
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
|
||||
|
||||
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
|
||||
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
|
||||
|
38
tools/perf/arch/x86/util/auxtrace.c
Normal file
38
tools/perf/arch/x86/util/auxtrace.c
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* auxtrace.c: AUX area tracing support
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "../../util/header.h"
|
||||
#include "../../util/auxtrace.h"
|
||||
#include "../../util/intel-pt.h"
|
||||
|
||||
struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
|
||||
int *err)
|
||||
{
|
||||
char buffer[64];
|
||||
int ret;
|
||||
|
||||
*err = 0;
|
||||
|
||||
ret = get_cpuid(buffer, sizeof(buffer));
|
||||
if (ret) {
|
||||
*err = ret;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!strncmp(buffer, "GenuineIntel,", 13))
|
||||
return intel_pt_recording_init(err);
|
||||
|
||||
return NULL;
|
||||
}
|
752
tools/perf/arch/x86/util/intel-pt.c
Normal file
752
tools/perf/arch/x86/util/intel-pt.c
Normal file
@ -0,0 +1,752 @@
|
||||
/*
|
||||
* intel_pt.c: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2015, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
#include "../../perf.h"
|
||||
#include "../../util/session.h"
|
||||
#include "../../util/event.h"
|
||||
#include "../../util/evlist.h"
|
||||
#include "../../util/evsel.h"
|
||||
#include "../../util/cpumap.h"
|
||||
#include "../../util/parse-options.h"
|
||||
#include "../../util/parse-events.h"
|
||||
#include "../../util/pmu.h"
|
||||
#include "../../util/debug.h"
|
||||
#include "../../util/auxtrace.h"
|
||||
#include "../../util/tsc.h"
|
||||
#include "../../util/intel-pt.h"
|
||||
|
||||
#define KiB(x) ((x) * 1024)
|
||||
#define MiB(x) ((x) * 1024 * 1024)
|
||||
#define KiB_MASK(x) (KiB(x) - 1)
|
||||
#define MiB_MASK(x) (MiB(x) - 1)
|
||||
|
||||
#define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4)
|
||||
|
||||
#define INTEL_PT_MAX_SAMPLE_SIZE KiB(60)
|
||||
|
||||
#define INTEL_PT_PSB_PERIOD_NEAR 256
|
||||
|
||||
struct intel_pt_snapshot_ref {
|
||||
void *ref_buf;
|
||||
size_t ref_offset;
|
||||
bool wrapped;
|
||||
};
|
||||
|
||||
struct intel_pt_recording {
|
||||
struct auxtrace_record itr;
|
||||
struct perf_pmu *intel_pt_pmu;
|
||||
int have_sched_switch;
|
||||
struct perf_evlist *evlist;
|
||||
bool snapshot_mode;
|
||||
bool snapshot_init_done;
|
||||
size_t snapshot_size;
|
||||
size_t snapshot_ref_buf_size;
|
||||
int snapshot_ref_cnt;
|
||||
struct intel_pt_snapshot_ref *snapshot_refs;
|
||||
};
|
||||
|
||||
static int intel_pt_parse_terms_with_default(struct list_head *formats,
|
||||
const char *str,
|
||||
u64 *config)
|
||||
{
|
||||
struct list_head *terms;
|
||||
struct perf_event_attr attr = { .size = 0, };
|
||||
int err;
|
||||
|
||||
terms = malloc(sizeof(struct list_head));
|
||||
if (!terms)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(terms);
|
||||
|
||||
err = parse_events_terms(terms, str);
|
||||
if (err)
|
||||
goto out_free;
|
||||
|
||||
attr.config = *config;
|
||||
err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
|
||||
if (err)
|
||||
goto out_free;
|
||||
|
||||
*config = attr.config;
|
||||
out_free:
|
||||
parse_events__free_terms(terms);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int intel_pt_parse_terms(struct list_head *formats, const char *str,
|
||||
u64 *config)
|
||||
{
|
||||
*config = 0;
|
||||
return intel_pt_parse_terms_with_default(formats, str, config);
|
||||
}
|
||||
|
||||
static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu __maybe_unused,
|
||||
struct perf_evlist *evlist __maybe_unused)
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
|
||||
{
|
||||
u64 config;
|
||||
|
||||
intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &config);
|
||||
return config;
|
||||
}
|
||||
|
||||
static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
|
||||
struct record_opts *opts,
|
||||
const char *str)
|
||||
{
|
||||
struct intel_pt_recording *ptr =
|
||||
container_of(itr, struct intel_pt_recording, itr);
|
||||
unsigned long long snapshot_size = 0;
|
||||
char *endptr;
|
||||
|
||||
if (str) {
|
||||
snapshot_size = strtoull(str, &endptr, 0);
|
||||
if (*endptr || snapshot_size > SIZE_MAX)
|
||||
return -1;
|
||||
}
|
||||
|
||||
opts->auxtrace_snapshot_mode = true;
|
||||
opts->auxtrace_snapshot_size = snapshot_size;
|
||||
|
||||
ptr->snapshot_size = snapshot_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct perf_event_attr *
|
||||
intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
|
||||
{
|
||||
struct perf_event_attr *attr;
|
||||
|
||||
attr = zalloc(sizeof(struct perf_event_attr));
|
||||
if (!attr)
|
||||
return NULL;
|
||||
|
||||
attr->config = intel_pt_default_config(intel_pt_pmu);
|
||||
|
||||
intel_pt_pmu->selectable = true;
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
|
||||
{
|
||||
return INTEL_PT_AUXTRACE_PRIV_SIZE;
|
||||
}
|
||||
|
||||
static int intel_pt_info_fill(struct auxtrace_record *itr,
|
||||
struct perf_session *session,
|
||||
struct auxtrace_info_event *auxtrace_info,
|
||||
size_t priv_size)
|
||||
{
|
||||
struct intel_pt_recording *ptr =
|
||||
container_of(itr, struct intel_pt_recording, itr);
|
||||
struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
|
||||
struct perf_event_mmap_page *pc;
|
||||
struct perf_tsc_conversion tc = { .time_mult = 0, };
|
||||
bool cap_user_time_zero = false, per_cpu_mmaps;
|
||||
u64 tsc_bit, noretcomp_bit;
|
||||
int err;
|
||||
|
||||
if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
|
||||
intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
|
||||
&noretcomp_bit);
|
||||
|
||||
if (!session->evlist->nr_mmaps)
|
||||
return -EINVAL;
|
||||
|
||||
pc = session->evlist->mmap[0].base;
|
||||
if (pc) {
|
||||
err = perf_read_tsc_conversion(pc, &tc);
|
||||
if (err) {
|
||||
if (err != -EOPNOTSUPP)
|
||||
return err;
|
||||
} else {
|
||||
cap_user_time_zero = tc.time_mult != 0;
|
||||
}
|
||||
if (!cap_user_time_zero)
|
||||
ui__warning("Intel Processor Trace: TSC not available\n");
|
||||
}
|
||||
|
||||
per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus);
|
||||
|
||||
auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
|
||||
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
|
||||
auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
|
||||
auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
|
||||
auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
|
||||
auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
|
||||
auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
|
||||
auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
|
||||
auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
|
||||
auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
|
||||
auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int intel_pt_track_switches(struct perf_evlist *evlist)
|
||||
{
|
||||
const char *sched_switch = "sched:sched_switch";
|
||||
struct perf_evsel *evsel;
|
||||
int err;
|
||||
|
||||
if (!perf_evlist__can_select_event(evlist, sched_switch))
|
||||
return -EPERM;
|
||||
|
||||
err = parse_events(evlist, sched_switch, NULL);
|
||||
if (err) {
|
||||
pr_debug2("%s: failed to parse %s, error %d\n",
|
||||
__func__, sched_switch, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
evsel = perf_evlist__last(evlist);
|
||||
|
||||
perf_evsel__set_sample_bit(evsel, CPU);
|
||||
perf_evsel__set_sample_bit(evsel, TIME);
|
||||
|
||||
evsel->system_wide = true;
|
||||
evsel->no_aux_samples = true;
|
||||
evsel->immediate = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int intel_pt_recording_options(struct auxtrace_record *itr,
|
||||
struct perf_evlist *evlist,
|
||||
struct record_opts *opts)
|
||||
{
|
||||
struct intel_pt_recording *ptr =
|
||||
container_of(itr, struct intel_pt_recording, itr);
|
||||
struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
|
||||
bool have_timing_info;
|
||||
struct perf_evsel *evsel, *intel_pt_evsel = NULL;
|
||||
const struct cpu_map *cpus = evlist->cpus;
|
||||
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
|
||||
u64 tsc_bit;
|
||||
|
||||
ptr->evlist = evlist;
|
||||
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
|
||||
|
||||
evlist__for_each(evlist, evsel) {
|
||||
if (evsel->attr.type == intel_pt_pmu->type) {
|
||||
if (intel_pt_evsel) {
|
||||
pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
evsel->attr.freq = 0;
|
||||
evsel->attr.sample_period = 1;
|
||||
intel_pt_evsel = evsel;
|
||||
opts->full_auxtrace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
|
||||
pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (opts->use_clockid) {
|
||||
pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!opts->full_auxtrace)
|
||||
return 0;
|
||||
|
||||
/* Set default sizes for snapshot mode */
|
||||
if (opts->auxtrace_snapshot_mode) {
|
||||
size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
|
||||
|
||||
if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
|
||||
if (privileged) {
|
||||
opts->auxtrace_mmap_pages = MiB(4) / page_size;
|
||||
} else {
|
||||
opts->auxtrace_mmap_pages = KiB(128) / page_size;
|
||||
if (opts->mmap_pages == UINT_MAX)
|
||||
opts->mmap_pages = KiB(256) / page_size;
|
||||
}
|
||||
} else if (!opts->auxtrace_mmap_pages && !privileged &&
|
||||
opts->mmap_pages == UINT_MAX) {
|
||||
opts->mmap_pages = KiB(256) / page_size;
|
||||
}
|
||||
if (!opts->auxtrace_snapshot_size)
|
||||
opts->auxtrace_snapshot_size =
|
||||
opts->auxtrace_mmap_pages * (size_t)page_size;
|
||||
if (!opts->auxtrace_mmap_pages) {
|
||||
size_t sz = opts->auxtrace_snapshot_size;
|
||||
|
||||
sz = round_up(sz, page_size) / page_size;
|
||||
opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
|
||||
}
|
||||
if (opts->auxtrace_snapshot_size >
|
||||
opts->auxtrace_mmap_pages * (size_t)page_size) {
|
||||
pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
|
||||
opts->auxtrace_snapshot_size,
|
||||
opts->auxtrace_mmap_pages * (size_t)page_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
|
||||
pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
pr_debug2("Intel PT snapshot size: %zu\n",
|
||||
opts->auxtrace_snapshot_size);
|
||||
if (psb_period &&
|
||||
opts->auxtrace_snapshot_size <= psb_period +
|
||||
INTEL_PT_PSB_PERIOD_NEAR)
|
||||
ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
|
||||
opts->auxtrace_snapshot_size, psb_period);
|
||||
}
|
||||
|
||||
/* Set default sizes for full trace mode */
|
||||
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
|
||||
if (privileged) {
|
||||
opts->auxtrace_mmap_pages = MiB(4) / page_size;
|
||||
} else {
|
||||
opts->auxtrace_mmap_pages = KiB(128) / page_size;
|
||||
if (opts->mmap_pages == UINT_MAX)
|
||||
opts->mmap_pages = KiB(256) / page_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate auxtrace_mmap_pages */
|
||||
if (opts->auxtrace_mmap_pages) {
|
||||
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
|
||||
size_t min_sz;
|
||||
|
||||
if (opts->auxtrace_snapshot_mode)
|
||||
min_sz = KiB(4);
|
||||
else
|
||||
min_sz = KiB(8);
|
||||
|
||||
if (sz < min_sz || !is_power_of_2(sz)) {
|
||||
pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
|
||||
min_sz / 1024);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
|
||||
|
||||
if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit))
|
||||
have_timing_info = true;
|
||||
else
|
||||
have_timing_info = false;
|
||||
|
||||
/*
|
||||
* Per-cpu recording needs sched_switch events to distinguish different
|
||||
* threads.
|
||||
*/
|
||||
if (have_timing_info && !cpu_map__empty(cpus)) {
|
||||
int err;
|
||||
|
||||
err = intel_pt_track_switches(evlist);
|
||||
if (err == -EPERM)
|
||||
pr_debug2("Unable to select sched:sched_switch\n");
|
||||
else if (err)
|
||||
return err;
|
||||
else
|
||||
ptr->have_sched_switch = 1;
|
||||
}
|
||||
|
||||
if (intel_pt_evsel) {
|
||||
/*
|
||||
* To obtain the auxtrace buffer file descriptor, the auxtrace
|
||||
* event must come first.
|
||||
*/
|
||||
perf_evlist__to_front(evlist, intel_pt_evsel);
|
||||
/*
|
||||
* In the case of per-cpu mmaps, we need the CPU on the
|
||||
* AUX event.
|
||||
*/
|
||||
if (!cpu_map__empty(cpus))
|
||||
perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
|
||||
}
|
||||
|
||||
/* Add dummy event to keep tracking */
|
||||
if (opts->full_auxtrace) {
|
||||
struct perf_evsel *tracking_evsel;
|
||||
int err;
|
||||
|
||||
err = parse_events(evlist, "dummy:u", NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
tracking_evsel = perf_evlist__last(evlist);
|
||||
|
||||
perf_evlist__set_tracking_event(evlist, tracking_evsel);
|
||||
|
||||
tracking_evsel->attr.freq = 0;
|
||||
tracking_evsel->attr.sample_period = 1;
|
||||
|
||||
/* In per-cpu case, always need the time of mmap events etc */
|
||||
if (!cpu_map__empty(cpus))
|
||||
perf_evsel__set_sample_bit(tracking_evsel, TIME);
|
||||
}
|
||||
|
||||
/*
|
||||
* Warn the user when we do not have enough information to decode i.e.
|
||||
* per-cpu with no sched_switch (except workload-only).
|
||||
*/
|
||||
if (!ptr->have_sched_switch && !cpu_map__empty(cpus) &&
|
||||
!target__none(&opts->target))
|
||||
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int intel_pt_snapshot_start(struct auxtrace_record *itr)
|
||||
{
|
||||
struct intel_pt_recording *ptr =
|
||||
container_of(itr, struct intel_pt_recording, itr);
|
||||
struct perf_evsel *evsel;
|
||||
|
||||
evlist__for_each(ptr->evlist, evsel) {
|
||||
if (evsel->attr.type == ptr->intel_pt_pmu->type)
|
||||
return perf_evlist__disable_event(ptr->evlist, evsel);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
|
||||
{
|
||||
struct intel_pt_recording *ptr =
|
||||
container_of(itr, struct intel_pt_recording, itr);
|
||||
struct perf_evsel *evsel;
|
||||
|
||||
evlist__for_each(ptr->evlist, evsel) {
|
||||
if (evsel->attr.type == ptr->intel_pt_pmu->type)
|
||||
return perf_evlist__enable_event(ptr->evlist, evsel);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
|
||||
{
|
||||
const size_t sz = sizeof(struct intel_pt_snapshot_ref);
|
||||
int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
|
||||
struct intel_pt_snapshot_ref *refs;
|
||||
|
||||
if (!new_cnt)
|
||||
new_cnt = 16;
|
||||
|
||||
while (new_cnt <= idx)
|
||||
new_cnt *= 2;
|
||||
|
||||
refs = calloc(new_cnt, sz);
|
||||
if (!refs)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(refs, ptr->snapshot_refs, cnt * sz);
|
||||
|
||||
ptr->snapshot_refs = refs;
|
||||
ptr->snapshot_ref_cnt = new_cnt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ptr->snapshot_ref_cnt; i++)
|
||||
zfree(&ptr->snapshot_refs[i].ref_buf);
|
||||
zfree(&ptr->snapshot_refs);
|
||||
}
|
||||
|
||||
static void intel_pt_recording_free(struct auxtrace_record *itr)
|
||||
{
|
||||
struct intel_pt_recording *ptr =
|
||||
container_of(itr, struct intel_pt_recording, itr);
|
||||
|
||||
intel_pt_free_snapshot_refs(ptr);
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
|
||||
size_t snapshot_buf_size)
|
||||
{
|
||||
size_t ref_buf_size = ptr->snapshot_ref_buf_size;
|
||||
void *ref_buf;
|
||||
|
||||
ref_buf = zalloc(ref_buf_size);
|
||||
if (!ref_buf)
|
||||
return -ENOMEM;
|
||||
|
||||
ptr->snapshot_refs[idx].ref_buf = ref_buf;
|
||||
ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
|
||||
size_t snapshot_buf_size)
|
||||
{
|
||||
const size_t max_size = 256 * 1024;
|
||||
size_t buf_size = 0, psb_period;
|
||||
|
||||
if (ptr->snapshot_size <= 64 * 1024)
|
||||
return 0;
|
||||
|
||||
psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
|
||||
if (psb_period)
|
||||
buf_size = psb_period * 2;
|
||||
|
||||
if (!buf_size || buf_size > max_size)
|
||||
buf_size = max_size;
|
||||
|
||||
if (buf_size >= snapshot_buf_size)
|
||||
return 0;
|
||||
|
||||
if (buf_size >= ptr->snapshot_size / 2)
|
||||
return 0;
|
||||
|
||||
return buf_size;
|
||||
}
|
||||
|
||||
static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
|
||||
size_t snapshot_buf_size)
|
||||
{
|
||||
if (ptr->snapshot_init_done)
|
||||
return 0;
|
||||
|
||||
ptr->snapshot_init_done = true;
|
||||
|
||||
ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
|
||||
snapshot_buf_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
|
||||
* @buf1: first buffer
|
||||
* @compare_size: number of bytes to compare
|
||||
* @buf2: second buffer (a circular buffer)
|
||||
* @offs2: offset in second buffer
|
||||
* @buf2_size: size of second buffer
|
||||
*
|
||||
* The comparison allows for the possibility that the bytes to compare in the
|
||||
* circular buffer are not contiguous. It is assumed that @compare_size <=
|
||||
* @buf2_size. This function returns %false if the bytes are identical, %true
|
||||
* otherwise.
|
||||
*/
|
||||
static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
|
||||
void *buf2, size_t offs2, size_t buf2_size)
|
||||
{
|
||||
size_t end2 = offs2 + compare_size, part_size;
|
||||
|
||||
if (end2 <= buf2_size)
|
||||
return memcmp(buf1, buf2 + offs2, compare_size);
|
||||
|
||||
part_size = end2 - buf2_size;
|
||||
if (memcmp(buf1, buf2 + offs2, part_size))
|
||||
return true;
|
||||
|
||||
compare_size -= part_size;
|
||||
|
||||
return memcmp(buf1 + part_size, buf2, compare_size);
|
||||
}
|
||||
|
||||
static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
|
||||
size_t ref_size, size_t buf_size,
|
||||
void *data, size_t head)
|
||||
{
|
||||
size_t ref_end = ref_offset + ref_size;
|
||||
|
||||
if (ref_end > buf_size) {
|
||||
if (head > ref_offset || head < ref_end - buf_size)
|
||||
return true;
|
||||
} else if (head > ref_offset && head < ref_end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
|
||||
buf_size);
|
||||
}
|
||||
|
||||
static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
|
||||
void *data, size_t head)
|
||||
{
|
||||
if (head >= ref_size) {
|
||||
memcpy(ref_buf, data + head - ref_size, ref_size);
|
||||
} else {
|
||||
memcpy(ref_buf, data, head);
|
||||
ref_size -= head;
|
||||
memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
|
||||
}
|
||||
}
|
||||
|
||||
static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
|
||||
struct auxtrace_mmap *mm, unsigned char *data,
|
||||
u64 head)
|
||||
{
|
||||
struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
|
||||
bool wrapped;
|
||||
|
||||
wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
|
||||
ptr->snapshot_ref_buf_size, mm->len,
|
||||
data, head);
|
||||
|
||||
intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
|
||||
data, head);
|
||||
|
||||
return wrapped;
|
||||
}
|
||||
|
||||
static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
|
||||
{
|
||||
int i, a, b;
|
||||
|
||||
b = buf_size >> 3;
|
||||
a = b - 512;
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
for (i = a; i < b; i++) {
|
||||
if (data[i])
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
|
||||
struct auxtrace_mmap *mm, unsigned char *data,
|
||||
u64 *head, u64 *old)
|
||||
{
|
||||
struct intel_pt_recording *ptr =
|
||||
container_of(itr, struct intel_pt_recording, itr);
|
||||
bool wrapped;
|
||||
int err;
|
||||
|
||||
pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
|
||||
__func__, idx, (size_t)*old, (size_t)*head);
|
||||
|
||||
err = intel_pt_snapshot_init(ptr, mm->len);
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
if (idx >= ptr->snapshot_ref_cnt) {
|
||||
err = intel_pt_alloc_snapshot_refs(ptr, idx);
|
||||
if (err)
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
if (ptr->snapshot_ref_buf_size) {
|
||||
if (!ptr->snapshot_refs[idx].ref_buf) {
|
||||
err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
|
||||
if (err)
|
||||
goto out_err;
|
||||
}
|
||||
wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
|
||||
} else {
|
||||
wrapped = ptr->snapshot_refs[idx].wrapped;
|
||||
if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
|
||||
ptr->snapshot_refs[idx].wrapped = true;
|
||||
wrapped = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* In full trace mode 'head' continually increases. However in snapshot
|
||||
* mode 'head' is an offset within the buffer. Here 'old' and 'head'
|
||||
* are adjusted to match the full trace case which expects that 'old' is
|
||||
* always less than 'head'.
|
||||
*/
|
||||
if (wrapped) {
|
||||
*old = *head;
|
||||
*head += mm->len;
|
||||
} else {
|
||||
if (mm->mask)
|
||||
*old &= mm->mask;
|
||||
else
|
||||
*old %= mm->len;
|
||||
if (*old > *head)
|
||||
*head += mm->len;
|
||||
}
|
||||
|
||||
pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
|
||||
__func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
|
||||
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
pr_err("%s: failed, error %d\n", __func__, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
|
||||
{
|
||||
return rdtsc();
|
||||
}
|
||||
|
||||
static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
|
||||
{
|
||||
struct intel_pt_recording *ptr =
|
||||
container_of(itr, struct intel_pt_recording, itr);
|
||||
struct perf_evsel *evsel;
|
||||
|
||||
evlist__for_each(ptr->evlist, evsel) {
|
||||
if (evsel->attr.type == ptr->intel_pt_pmu->type)
|
||||
return perf_evlist__enable_event_idx(ptr->evlist, evsel,
|
||||
idx);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
struct auxtrace_record *intel_pt_recording_init(int *err)
|
||||
{
|
||||
struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
|
||||
struct intel_pt_recording *ptr;
|
||||
|
||||
if (!intel_pt_pmu)
|
||||
return NULL;
|
||||
|
||||
ptr = zalloc(sizeof(struct intel_pt_recording));
|
||||
if (!ptr) {
|
||||
*err = -ENOMEM;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptr->intel_pt_pmu = intel_pt_pmu;
|
||||
ptr->itr.recording_options = intel_pt_recording_options;
|
||||
ptr->itr.info_priv_size = intel_pt_info_priv_size;
|
||||
ptr->itr.info_fill = intel_pt_info_fill;
|
||||
ptr->itr.free = intel_pt_recording_free;
|
||||
ptr->itr.snapshot_start = intel_pt_snapshot_start;
|
||||
ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
|
||||
ptr->itr.find_snapshot = intel_pt_find_snapshot;
|
||||
ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
|
||||
ptr->itr.reference = intel_pt_reference;
|
||||
ptr->itr.read_finish = intel_pt_read_finish;
|
||||
return &ptr->itr;
|
||||
}
|
15
tools/perf/arch/x86/util/pmu.c
Normal file
15
tools/perf/arch/x86/util/pmu.c
Normal file
@ -0,0 +1,15 @@
|
||||
#include <string.h>
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include "../../util/intel-pt.h"
|
||||
#include "../../util/pmu.h"
|
||||
|
||||
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
|
||||
{
|
||||
#ifdef HAVE_AUXTRACE_SUPPORT
|
||||
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
|
||||
return intel_pt_pmu_default_config(pmu);
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
@ -1561,6 +1561,22 @@ static int have_cmd(int argc, const char **argv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void script__setup_sample_type(struct perf_script *script)
|
||||
{
|
||||
struct perf_session *session = script->session;
|
||||
u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
|
||||
|
||||
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
|
||||
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
|
||||
(sample_type & PERF_SAMPLE_STACK_USER))
|
||||
callchain_param.record_mode = CALLCHAIN_DWARF;
|
||||
else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
|
||||
callchain_param.record_mode = CALLCHAIN_LBR;
|
||||
else
|
||||
callchain_param.record_mode = CALLCHAIN_FP;
|
||||
}
|
||||
}
|
||||
|
||||
int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
{
|
||||
bool show_full_info = false;
|
||||
@ -1849,6 +1865,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||
goto out_delete;
|
||||
|
||||
script.session = session;
|
||||
script__setup_sample_type(&script);
|
||||
|
||||
session->itrace_synth_opts = &itrace_synth_opts;
|
||||
|
||||
|
@ -1315,7 +1315,10 @@ struct thread_trace {
|
||||
double runtime_ms;
|
||||
struct {
|
||||
unsigned long ptr;
|
||||
int entry_str_pos;
|
||||
short int entry_str_pos;
|
||||
bool pending_open;
|
||||
unsigned int namelen;
|
||||
char *name;
|
||||
} filename;
|
||||
struct {
|
||||
int max;
|
||||
@ -1391,7 +1394,6 @@ struct trace {
|
||||
size_t nr;
|
||||
int *entries;
|
||||
} ev_qualifier_ids;
|
||||
const char *last_vfs_getname;
|
||||
struct intlist *tid_list;
|
||||
struct intlist *pid_list;
|
||||
struct {
|
||||
@ -1966,8 +1968,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
|
||||
trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
|
||||
fprintf(trace->output, "%-70s\n", ttrace->entry_str);
|
||||
}
|
||||
} else
|
||||
} else {
|
||||
ttrace->entry_pending = true;
|
||||
/* See trace__vfs_getname & trace__sys_exit */
|
||||
ttrace->filename.pending_open = false;
|
||||
}
|
||||
|
||||
if (trace->current != thread) {
|
||||
thread__put(trace->current);
|
||||
@ -2003,9 +2008,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
|
||||
|
||||
ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
|
||||
|
||||
if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
|
||||
trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
|
||||
trace->last_vfs_getname = NULL;
|
||||
if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
|
||||
trace__set_fd_pathname(thread, ret, ttrace->filename.name);
|
||||
ttrace->filename.pending_open = false;
|
||||
++trace->stats.vfs_getname;
|
||||
}
|
||||
|
||||
@ -2065,9 +2070,7 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
|
||||
size_t filename_len, entry_str_len, to_move;
|
||||
ssize_t remaining_space;
|
||||
char *pos;
|
||||
const char *filename;
|
||||
|
||||
trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
|
||||
const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
|
||||
|
||||
if (!thread)
|
||||
goto out;
|
||||
@ -2076,6 +2079,21 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
|
||||
if (!ttrace)
|
||||
goto out;
|
||||
|
||||
filename_len = strlen(filename);
|
||||
|
||||
if (ttrace->filename.namelen < filename_len) {
|
||||
char *f = realloc(ttrace->filename.name, filename_len + 1);
|
||||
|
||||
if (f == NULL)
|
||||
goto out;
|
||||
|
||||
ttrace->filename.namelen = filename_len;
|
||||
ttrace->filename.name = f;
|
||||
}
|
||||
|
||||
strcpy(ttrace->filename.name, filename);
|
||||
ttrace->filename.pending_open = true;
|
||||
|
||||
if (!ttrace->filename.ptr)
|
||||
goto out;
|
||||
|
||||
@ -2084,8 +2102,6 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
|
||||
if (remaining_space <= 0)
|
||||
goto out;
|
||||
|
||||
filename = trace->last_vfs_getname;
|
||||
filename_len = strlen(filename);
|
||||
if (filename_len > (size_t)remaining_space) {
|
||||
filename += filename_len - remaining_space;
|
||||
filename_len = remaining_space;
|
||||
|
@ -297,7 +297,11 @@ ifndef NO_LIBELF
|
||||
else
|
||||
CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
|
||||
LDFLAGS += $(LIBDW_LDFLAGS)
|
||||
EXTLIBS += -ldw
|
||||
DWARFLIBS := -ldw
|
||||
ifeq ($(findstring -static,${LDFLAGS}),-static)
|
||||
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
|
||||
endif
|
||||
EXTLIBS += ${DWARFLIBS}
|
||||
$(call detected,CONFIG_DWARF)
|
||||
endif # PERF_HAVE_DWARF_REGS
|
||||
endif # NO_DWARF
|
||||
|
@ -78,6 +78,8 @@ libperf-$(CONFIG_X86) += tsc.o
|
||||
libperf-y += cloexec.o
|
||||
libperf-y += thread-stack.o
|
||||
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
|
||||
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
|
||||
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
|
||||
libperf-y += parse-branch-options.o
|
||||
|
||||
libperf-$(CONFIG_LIBELF) += symbol-elf.o
|
||||
|
@ -621,7 +621,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
|
||||
struct addr_map_symbol *start,
|
||||
unsigned cycles)
|
||||
{
|
||||
unsigned long saddr = 0;
|
||||
u64 saddr = 0;
|
||||
int err;
|
||||
|
||||
if (!cycles)
|
||||
@ -640,7 +640,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
|
||||
start->addr == ams->sym->start + ams->map->start)))
|
||||
saddr = start->al_addr;
|
||||
if (saddr == 0)
|
||||
pr_debug2("BB with bad start: addr %lx start %lx sym %lx saddr %lx\n",
|
||||
pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
|
||||
ams->addr,
|
||||
start ? start->addr : 0,
|
||||
ams->sym ? ams->sym->start + ams->map->start : 0,
|
||||
|
@ -47,6 +47,8 @@
|
||||
#include "debug.h"
|
||||
#include "parse-options.h"
|
||||
|
||||
#include "intel-pt.h"
|
||||
|
||||
int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
|
||||
struct auxtrace_mmap_params *mp,
|
||||
void *userpg, int fd)
|
||||
@ -876,7 +878,7 @@ static bool auxtrace__dont_decode(struct perf_session *session)
|
||||
|
||||
int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
|
||||
union perf_event *event,
|
||||
struct perf_session *session __maybe_unused)
|
||||
struct perf_session *session)
|
||||
{
|
||||
enum auxtrace_type type = event->auxtrace_info.type;
|
||||
|
||||
@ -884,6 +886,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
|
||||
fprintf(stdout, " type: %u\n", type);
|
||||
|
||||
switch (type) {
|
||||
case PERF_AUXTRACE_INTEL_PT:
|
||||
return intel_pt_process_auxtrace_info(event, session);
|
||||
case PERF_AUXTRACE_UNKNOWN:
|
||||
default:
|
||||
return -EINVAL;
|
||||
|
@ -39,6 +39,7 @@ struct events_stats;
|
||||
|
||||
enum auxtrace_type {
|
||||
PERF_AUXTRACE_UNKNOWN,
|
||||
PERF_AUXTRACE_INTEL_PT,
|
||||
};
|
||||
|
||||
enum itrace_period_type {
|
||||
|
@ -770,7 +770,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
|
||||
continue;
|
||||
}
|
||||
/* Filter lines based on address */
|
||||
if (rt_die != cu_die)
|
||||
if (rt_die != cu_die) {
|
||||
/*
|
||||
* Address filtering
|
||||
* The line is included in given function, and
|
||||
@ -784,6 +784,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
|
||||
decf != dwarf_decl_file(&die_mem))
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* Get source line */
|
||||
fname = dwarf_linesrc(line, NULL, NULL);
|
||||
|
||||
|
@ -115,6 +115,7 @@ void perf_evlist__close(struct perf_evlist *evlist);
|
||||
void perf_evlist__set_id_pos(struct perf_evlist *evlist);
|
||||
bool perf_can_sample_identifier(void);
|
||||
bool perf_can_record_switch_events(void);
|
||||
bool perf_can_record_cpu_wide(void);
|
||||
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
|
||||
int record_opts__config(struct record_opts *opts);
|
||||
|
||||
|
11
tools/perf/util/intel-pt-decoder/Build
Normal file
11
tools/perf/util/intel-pt-decoder/Build
Normal file
@ -0,0 +1,11 @@
|
||||
libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
|
||||
|
||||
inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
|
||||
inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
|
||||
|
||||
$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
|
||||
@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
|
||||
|
||||
$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
|
||||
|
||||
CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init
|
386
tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
Normal file
386
tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
Normal file
@ -0,0 +1,386 @@
|
||||
#!/bin/awk -f
|
||||
# gen-insn-attr-x86.awk: Instruction attribute table generator
|
||||
# Written by Masami Hiramatsu <mhiramat@redhat.com>
|
||||
#
|
||||
# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
|
||||
|
||||
# Awk implementation sanity check
|
||||
function check_awk_implement() {
|
||||
if (sprintf("%x", 0) != "0")
|
||||
return "Your awk has a printf-format problem."
|
||||
return ""
|
||||
}
|
||||
|
||||
# Clear working vars
|
||||
function clear_vars() {
|
||||
delete table
|
||||
delete lptable2
|
||||
delete lptable1
|
||||
delete lptable3
|
||||
eid = -1 # escape id
|
||||
gid = -1 # group id
|
||||
aid = -1 # AVX id
|
||||
tname = ""
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
# Implementation error checking
|
||||
awkchecked = check_awk_implement()
|
||||
if (awkchecked != "") {
|
||||
print "Error: " awkchecked > "/dev/stderr"
|
||||
print "Please try to use gawk." > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Setup generating tables
|
||||
print "/* x86 opcode map generated from x86-opcode-map.txt */"
|
||||
print "/* Do not change this code. */\n"
|
||||
ggid = 1
|
||||
geid = 1
|
||||
gaid = 0
|
||||
delete etable
|
||||
delete gtable
|
||||
delete atable
|
||||
|
||||
opnd_expr = "^[A-Za-z/]"
|
||||
ext_expr = "^\\("
|
||||
sep_expr = "^\\|$"
|
||||
group_expr = "^Grp[0-9A-Za-z]+"
|
||||
|
||||
imm_expr = "^[IJAOL][a-z]"
|
||||
imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
|
||||
imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
|
||||
imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
|
||||
imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
|
||||
imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
|
||||
imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
|
||||
imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
|
||||
imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
|
||||
imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
|
||||
imm_flag["Ob"] = "INAT_MOFFSET"
|
||||
imm_flag["Ov"] = "INAT_MOFFSET"
|
||||
imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
|
||||
|
||||
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
|
||||
force64_expr = "\\([df]64\\)"
|
||||
rex_expr = "^REX(\\.[XRWB]+)*"
|
||||
fpu_expr = "^ESC" # TODO
|
||||
|
||||
lprefix1_expr = "\\((66|!F3)\\)"
|
||||
lprefix2_expr = "\\(F3\\)"
|
||||
lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
|
||||
lprefix_expr = "\\((66|F2|F3)\\)"
|
||||
max_lprefix = 4
|
||||
|
||||
# All opcodes starting with lower-case 'v' or with (v1) superscript
|
||||
# accepts VEX prefix
|
||||
vexok_opcode_expr = "^v.*"
|
||||
vexok_expr = "\\(v1\\)"
|
||||
# All opcodes with (v) superscript supports *only* VEX prefix
|
||||
vexonly_expr = "\\(v\\)"
|
||||
|
||||
prefix_expr = "\\(Prefix\\)"
|
||||
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
|
||||
prefix_num["REPNE"] = "INAT_PFX_REPNE"
|
||||
prefix_num["REP/REPE"] = "INAT_PFX_REPE"
|
||||
prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
|
||||
prefix_num["XRELEASE"] = "INAT_PFX_REPE"
|
||||
prefix_num["LOCK"] = "INAT_PFX_LOCK"
|
||||
prefix_num["SEG=CS"] = "INAT_PFX_CS"
|
||||
prefix_num["SEG=DS"] = "INAT_PFX_DS"
|
||||
prefix_num["SEG=ES"] = "INAT_PFX_ES"
|
||||
prefix_num["SEG=FS"] = "INAT_PFX_FS"
|
||||
prefix_num["SEG=GS"] = "INAT_PFX_GS"
|
||||
prefix_num["SEG=SS"] = "INAT_PFX_SS"
|
||||
prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
|
||||
prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
|
||||
prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
|
||||
|
||||
clear_vars()
|
||||
}
|
||||
|
||||
function semantic_error(msg) {
|
||||
print "Semantic error at " NR ": " msg > "/dev/stderr"
|
||||
exit 1
|
||||
}
|
||||
|
||||
function debug(msg) {
|
||||
print "DEBUG: " msg
|
||||
}
|
||||
|
||||
function array_size(arr, i,c) {
|
||||
c = 0
|
||||
for (i in arr)
|
||||
c++
|
||||
return c
|
||||
}
|
||||
|
||||
/^Table:/ {
|
||||
print "/* " $0 " */"
|
||||
if (tname != "")
|
||||
semantic_error("Hit Table: before EndTable:.");
|
||||
}
|
||||
|
||||
/^Referrer:/ {
|
||||
if (NF != 1) {
|
||||
# escape opcode table
|
||||
ref = ""
|
||||
for (i = 2; i <= NF; i++)
|
||||
ref = ref $i
|
||||
eid = escape[ref]
|
||||
tname = sprintf("inat_escape_table_%d", eid)
|
||||
}
|
||||
}
|
||||
|
||||
/^AVXcode:/ {
|
||||
if (NF != 1) {
|
||||
# AVX/escape opcode table
|
||||
aid = $2
|
||||
if (gaid <= aid)
|
||||
gaid = aid + 1
|
||||
if (tname == "") # AVX only opcode table
|
||||
tname = sprintf("inat_avx_table_%d", $2)
|
||||
}
|
||||
if (aid == -1 && eid == -1) # primary opcode table
|
||||
tname = "inat_primary_table"
|
||||
}
|
||||
|
||||
/^GrpTable:/ {
|
||||
print "/* " $0 " */"
|
||||
if (!($2 in group))
|
||||
semantic_error("No group: " $2 )
|
||||
gid = group[$2]
|
||||
tname = "inat_group_table_" gid
|
||||
}
|
||||
|
||||
function print_table(tbl,name,fmt,n)
|
||||
{
|
||||
print "const insn_attr_t " name " = {"
|
||||
for (i = 0; i < n; i++) {
|
||||
id = sprintf(fmt, i)
|
||||
if (tbl[id])
|
||||
print " [" id "] = " tbl[id] ","
|
||||
}
|
||||
print "};"
|
||||
}
|
||||
|
||||
/^EndTable/ {
|
||||
if (gid != -1) {
|
||||
# print group tables
|
||||
if (array_size(table) != 0) {
|
||||
print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
|
||||
"0x%x", 8)
|
||||
gtable[gid,0] = tname
|
||||
}
|
||||
if (array_size(lptable1) != 0) {
|
||||
print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
|
||||
"0x%x", 8)
|
||||
gtable[gid,1] = tname "_1"
|
||||
}
|
||||
if (array_size(lptable2) != 0) {
|
||||
print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
|
||||
"0x%x", 8)
|
||||
gtable[gid,2] = tname "_2"
|
||||
}
|
||||
if (array_size(lptable3) != 0) {
|
||||
print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
|
||||
"0x%x", 8)
|
||||
gtable[gid,3] = tname "_3"
|
||||
}
|
||||
} else {
|
||||
# print primary/escaped tables
|
||||
if (array_size(table) != 0) {
|
||||
print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
|
||||
"0x%02x", 256)
|
||||
etable[eid,0] = tname
|
||||
if (aid >= 0)
|
||||
atable[aid,0] = tname
|
||||
}
|
||||
if (array_size(lptable1) != 0) {
|
||||
print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
|
||||
"0x%02x", 256)
|
||||
etable[eid,1] = tname "_1"
|
||||
if (aid >= 0)
|
||||
atable[aid,1] = tname "_1"
|
||||
}
|
||||
if (array_size(lptable2) != 0) {
|
||||
print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
|
||||
"0x%02x", 256)
|
||||
etable[eid,2] = tname "_2"
|
||||
if (aid >= 0)
|
||||
atable[aid,2] = tname "_2"
|
||||
}
|
||||
if (array_size(lptable3) != 0) {
|
||||
print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
|
||||
"0x%02x", 256)
|
||||
etable[eid,3] = tname "_3"
|
||||
if (aid >= 0)
|
||||
atable[aid,3] = tname "_3"
|
||||
}
|
||||
}
|
||||
print ""
|
||||
clear_vars()
|
||||
}
|
||||
|
||||
function add_flags(old,new) {
|
||||
if (old && new)
|
||||
return old " | " new
|
||||
else if (old)
|
||||
return old
|
||||
else
|
||||
return new
|
||||
}
|
||||
|
||||
# convert operands to flags.
|
||||
function convert_operands(count,opnd, i,j,imm,mod)
|
||||
{
|
||||
imm = null
|
||||
mod = null
|
||||
for (j = 1; j <= count; j++) {
|
||||
i = opnd[j]
|
||||
if (match(i, imm_expr) == 1) {
|
||||
if (!imm_flag[i])
|
||||
semantic_error("Unknown imm opnd: " i)
|
||||
if (imm) {
|
||||
if (i != "Ib")
|
||||
semantic_error("Second IMM error")
|
||||
imm = add_flags(imm, "INAT_SCNDIMM")
|
||||
} else
|
||||
imm = imm_flag[i]
|
||||
} else if (match(i, modrm_expr))
|
||||
mod = "INAT_MODRM"
|
||||
}
|
||||
return add_flags(imm, mod)
|
||||
}
|
||||
|
||||
/^[0-9a-f]+\:/ {
|
||||
if (NR == 1)
|
||||
next
|
||||
# get index
|
||||
idx = "0x" substr($1, 1, index($1,":") - 1)
|
||||
if (idx in table)
|
||||
semantic_error("Redefine " idx " in " tname)
|
||||
|
||||
# check if escaped opcode
|
||||
if ("escape" == $2) {
|
||||
if ($3 != "#")
|
||||
semantic_error("No escaped name")
|
||||
ref = ""
|
||||
for (i = 4; i <= NF; i++)
|
||||
ref = ref $i
|
||||
if (ref in escape)
|
||||
semantic_error("Redefine escape (" ref ")")
|
||||
escape[ref] = geid
|
||||
geid++
|
||||
table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
|
||||
next
|
||||
}
|
||||
|
||||
variant = null
|
||||
# converts
|
||||
i = 2
|
||||
while (i <= NF) {
|
||||
opcode = $(i++)
|
||||
delete opnds
|
||||
ext = null
|
||||
flags = null
|
||||
opnd = null
|
||||
# parse one opcode
|
||||
if (match($i, opnd_expr)) {
|
||||
opnd = $i
|
||||
count = split($(i++), opnds, ",")
|
||||
flags = convert_operands(count, opnds)
|
||||
}
|
||||
if (match($i, ext_expr))
|
||||
ext = $(i++)
|
||||
if (match($i, sep_expr))
|
||||
i++
|
||||
else if (i < NF)
|
||||
semantic_error($i " is not a separator")
|
||||
|
||||
# check if group opcode
|
||||
if (match(opcode, group_expr)) {
|
||||
if (!(opcode in group)) {
|
||||
group[opcode] = ggid
|
||||
ggid++
|
||||
}
|
||||
flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
|
||||
}
|
||||
# check force(or default) 64bit
|
||||
if (match(ext, force64_expr))
|
||||
flags = add_flags(flags, "INAT_FORCE64")
|
||||
|
||||
# check REX prefix
|
||||
if (match(opcode, rex_expr))
|
||||
flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
|
||||
|
||||
# check coprocessor escape : TODO
|
||||
if (match(opcode, fpu_expr))
|
||||
flags = add_flags(flags, "INAT_MODRM")
|
||||
|
||||
# check VEX codes
|
||||
if (match(ext, vexonly_expr))
|
||||
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
|
||||
else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
|
||||
flags = add_flags(flags, "INAT_VEXOK")
|
||||
|
||||
# check prefixes
|
||||
if (match(ext, prefix_expr)) {
|
||||
if (!prefix_num[opcode])
|
||||
semantic_error("Unknown prefix: " opcode)
|
||||
flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
|
||||
}
|
||||
if (length(flags) == 0)
|
||||
continue
|
||||
# check if last prefix
|
||||
if (match(ext, lprefix1_expr)) {
|
||||
lptable1[idx] = add_flags(lptable1[idx],flags)
|
||||
variant = "INAT_VARIANT"
|
||||
}
|
||||
if (match(ext, lprefix2_expr)) {
|
||||
lptable2[idx] = add_flags(lptable2[idx],flags)
|
||||
variant = "INAT_VARIANT"
|
||||
}
|
||||
if (match(ext, lprefix3_expr)) {
|
||||
lptable3[idx] = add_flags(lptable3[idx],flags)
|
||||
variant = "INAT_VARIANT"
|
||||
}
|
||||
if (!match(ext, lprefix_expr)){
|
||||
table[idx] = add_flags(table[idx],flags)
|
||||
}
|
||||
}
|
||||
if (variant)
|
||||
table[idx] = add_flags(table[idx],variant)
|
||||
}
|
||||
|
||||
END {
|
||||
if (awkchecked != "")
|
||||
exit 1
|
||||
# print escape opcode map's array
|
||||
print "/* Escape opcode map array */"
|
||||
print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
|
||||
"[INAT_LSTPFX_MAX + 1] = {"
|
||||
for (i = 0; i < geid; i++)
|
||||
for (j = 0; j < max_lprefix; j++)
|
||||
if (etable[i,j])
|
||||
print " ["i"]["j"] = "etable[i,j]","
|
||||
print "};\n"
|
||||
# print group opcode map's array
|
||||
print "/* Group opcode map array */"
|
||||
print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
|
||||
"[INAT_LSTPFX_MAX + 1] = {"
|
||||
for (i = 0; i < ggid; i++)
|
||||
for (j = 0; j < max_lprefix; j++)
|
||||
if (gtable[i,j])
|
||||
print " ["i"]["j"] = "gtable[i,j]","
|
||||
print "};\n"
|
||||
# print AVX opcode map's array
|
||||
print "/* AVX opcode map array */"
|
||||
print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
|
||||
"[INAT_LSTPFX_MAX + 1] = {"
|
||||
for (i = 0; i < gaid; i++)
|
||||
for (j = 0; j < max_lprefix; j++)
|
||||
if (atable[i,j])
|
||||
print " ["i"]["j"] = "atable[i,j]","
|
||||
print "};"
|
||||
}
|
96
tools/perf/util/intel-pt-decoder/inat.c
Normal file
96
tools/perf/util/intel-pt-decoder/inat.c
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* x86 instruction attribute tables
|
||||
*
|
||||
* Written by Masami Hiramatsu <mhiramat@redhat.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
*/
|
||||
#include <asm/insn.h>
|
||||
|
||||
/* Attribute tables are generated from opcode map */
|
||||
#include "inat-tables.c"
|
||||
|
||||
/* Attribute search APIs */
|
||||
insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
|
||||
{
|
||||
return inat_primary_table[opcode];
|
||||
}
|
||||
|
||||
int inat_get_last_prefix_id(insn_byte_t last_pfx)
|
||||
{
|
||||
insn_attr_t lpfx_attr;
|
||||
|
||||
lpfx_attr = inat_get_opcode_attribute(last_pfx);
|
||||
return inat_last_prefix_id(lpfx_attr);
|
||||
}
|
||||
|
||||
insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
|
||||
insn_attr_t esc_attr)
|
||||
{
|
||||
const insn_attr_t *table;
|
||||
int n;
|
||||
|
||||
n = inat_escape_id(esc_attr);
|
||||
|
||||
table = inat_escape_tables[n][0];
|
||||
if (!table)
|
||||
return 0;
|
||||
if (inat_has_variant(table[opcode]) && lpfx_id) {
|
||||
table = inat_escape_tables[n][lpfx_id];
|
||||
if (!table)
|
||||
return 0;
|
||||
}
|
||||
return table[opcode];
|
||||
}
|
||||
|
||||
insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
|
||||
insn_attr_t grp_attr)
|
||||
{
|
||||
const insn_attr_t *table;
|
||||
int n;
|
||||
|
||||
n = inat_group_id(grp_attr);
|
||||
|
||||
table = inat_group_tables[n][0];
|
||||
if (!table)
|
||||
return inat_group_common_attribute(grp_attr);
|
||||
if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
|
||||
table = inat_group_tables[n][lpfx_id];
|
||||
if (!table)
|
||||
return inat_group_common_attribute(grp_attr);
|
||||
}
|
||||
return table[X86_MODRM_REG(modrm)] |
|
||||
inat_group_common_attribute(grp_attr);
|
||||
}
|
||||
|
||||
insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
|
||||
insn_byte_t vex_p)
|
||||
{
|
||||
const insn_attr_t *table;
|
||||
if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
|
||||
return 0;
|
||||
/* At first, this checks the master table */
|
||||
table = inat_avx_tables[vex_m][0];
|
||||
if (!table)
|
||||
return 0;
|
||||
if (!inat_is_group(table[opcode]) && vex_p) {
|
||||
/* If this is not a group, get attribute directly */
|
||||
table = inat_avx_tables[vex_m][vex_p];
|
||||
if (!table)
|
||||
return 0;
|
||||
}
|
||||
return table[opcode];
|
||||
}
|
221
tools/perf/util/intel-pt-decoder/inat.h
Normal file
221
tools/perf/util/intel-pt-decoder/inat.h
Normal file
@ -0,0 +1,221 @@
|
||||
#ifndef _ASM_X86_INAT_H
|
||||
#define _ASM_X86_INAT_H
|
||||
/*
|
||||
* x86 instruction attributes
|
||||
*
|
||||
* Written by Masami Hiramatsu <mhiramat@redhat.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
*/
|
||||
#include <asm/inat_types.h>
|
||||
|
||||
/*
|
||||
* Internal bits. Don't use bitmasks directly, because these bits are
|
||||
* unstable. You should use checking functions.
|
||||
*/
|
||||
|
||||
#define INAT_OPCODE_TABLE_SIZE 256
|
||||
#define INAT_GROUP_TABLE_SIZE 8
|
||||
|
||||
/* Legacy last prefixes */
|
||||
#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
|
||||
#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
|
||||
#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
|
||||
/* Other Legacy prefixes */
|
||||
#define INAT_PFX_LOCK 4 /* 0xF0 */
|
||||
#define INAT_PFX_CS 5 /* 0x2E */
|
||||
#define INAT_PFX_DS 6 /* 0x3E */
|
||||
#define INAT_PFX_ES 7 /* 0x26 */
|
||||
#define INAT_PFX_FS 8 /* 0x64 */
|
||||
#define INAT_PFX_GS 9 /* 0x65 */
|
||||
#define INAT_PFX_SS 10 /* 0x36 */
|
||||
#define INAT_PFX_ADDRSZ 11 /* 0x67 */
|
||||
/* x86-64 REX prefix */
|
||||
#define INAT_PFX_REX 12 /* 0x4X */
|
||||
/* AVX VEX prefixes */
|
||||
#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
|
||||
#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
|
||||
|
||||
#define INAT_LSTPFX_MAX 3
|
||||
#define INAT_LGCPFX_MAX 11
|
||||
|
||||
/* Immediate size */
|
||||
#define INAT_IMM_BYTE 1
|
||||
#define INAT_IMM_WORD 2
|
||||
#define INAT_IMM_DWORD 3
|
||||
#define INAT_IMM_QWORD 4
|
||||
#define INAT_IMM_PTR 5
|
||||
#define INAT_IMM_VWORD32 6
|
||||
#define INAT_IMM_VWORD 7
|
||||
|
||||
/* Legacy prefix */
|
||||
#define INAT_PFX_OFFS 0
|
||||
#define INAT_PFX_BITS 4
|
||||
#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
|
||||
#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
|
||||
/* Escape opcodes */
|
||||
#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
|
||||
#define INAT_ESC_BITS 2
|
||||
#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
|
||||
#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
|
||||
/* Group opcodes (1-16) */
|
||||
#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
|
||||
#define INAT_GRP_BITS 5
|
||||
#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
|
||||
#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
|
||||
/* Immediates */
|
||||
#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
|
||||
#define INAT_IMM_BITS 3
|
||||
#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
|
||||
/* Flags */
|
||||
#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
|
||||
#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
|
||||
#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
|
||||
#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
|
||||
#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
|
||||
#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
|
||||
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
|
||||
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
|
||||
/* Attribute making macros for attribute tables */
|
||||
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
|
||||
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
|
||||
#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
|
||||
#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
|
||||
|
||||
/* Attribute search APIs */
|
||||
extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
|
||||
extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
|
||||
extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
|
||||
int lpfx_id,
|
||||
insn_attr_t esc_attr);
|
||||
extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
|
||||
int lpfx_id,
|
||||
insn_attr_t esc_attr);
|
||||
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
|
||||
insn_byte_t vex_m,
|
||||
insn_byte_t vex_pp);
|
||||
|
||||
/* Attribute checking functions */
|
||||
static inline int inat_is_legacy_prefix(insn_attr_t attr)
|
||||
{
|
||||
attr &= INAT_PFX_MASK;
|
||||
return attr && attr <= INAT_LGCPFX_MAX;
|
||||
}
|
||||
|
||||
static inline int inat_is_address_size_prefix(insn_attr_t attr)
|
||||
{
|
||||
return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
|
||||
}
|
||||
|
||||
static inline int inat_is_operand_size_prefix(insn_attr_t attr)
|
||||
{
|
||||
return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
|
||||
}
|
||||
|
||||
static inline int inat_is_rex_prefix(insn_attr_t attr)
|
||||
{
|
||||
return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
|
||||
}
|
||||
|
||||
static inline int inat_last_prefix_id(insn_attr_t attr)
|
||||
{
|
||||
if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
|
||||
return 0;
|
||||
else
|
||||
return attr & INAT_PFX_MASK;
|
||||
}
|
||||
|
||||
static inline int inat_is_vex_prefix(insn_attr_t attr)
|
||||
{
|
||||
attr &= INAT_PFX_MASK;
|
||||
return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
|
||||
}
|
||||
|
||||
static inline int inat_is_vex3_prefix(insn_attr_t attr)
|
||||
{
|
||||
return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
|
||||
}
|
||||
|
||||
static inline int inat_is_escape(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_ESC_MASK;
|
||||
}
|
||||
|
||||
static inline int inat_escape_id(insn_attr_t attr)
|
||||
{
|
||||
return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
|
||||
}
|
||||
|
||||
static inline int inat_is_group(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_GRP_MASK;
|
||||
}
|
||||
|
||||
static inline int inat_group_id(insn_attr_t attr)
|
||||
{
|
||||
return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
|
||||
}
|
||||
|
||||
static inline int inat_group_common_attribute(insn_attr_t attr)
|
||||
{
|
||||
return attr & ~INAT_GRP_MASK;
|
||||
}
|
||||
|
||||
static inline int inat_has_immediate(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_IMM_MASK;
|
||||
}
|
||||
|
||||
static inline int inat_immediate_size(insn_attr_t attr)
|
||||
{
|
||||
return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
|
||||
}
|
||||
|
||||
static inline int inat_has_modrm(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_MODRM;
|
||||
}
|
||||
|
||||
static inline int inat_is_force64(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_FORCE64;
|
||||
}
|
||||
|
||||
static inline int inat_has_second_immediate(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_SCNDIMM;
|
||||
}
|
||||
|
||||
static inline int inat_has_moffset(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_MOFFSET;
|
||||
}
|
||||
|
||||
static inline int inat_has_variant(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_VARIANT;
|
||||
}
|
||||
|
||||
static inline int inat_accept_vex(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_VEXOK;
|
||||
}
|
||||
|
||||
static inline int inat_must_vex(insn_attr_t attr)
|
||||
{
|
||||
return attr & INAT_VEXONLY;
|
||||
}
|
||||
#endif
|
594
tools/perf/util/intel-pt-decoder/insn.c
Normal file
594
tools/perf/util/intel-pt-decoder/insn.c
Normal file
@ -0,0 +1,594 @@
|
||||
/*
|
||||
* x86 instruction analysis
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2002, 2004, 2009
|
||||
*/
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/string.h>
|
||||
#else
|
||||
#include <string.h>
|
||||
#endif
|
||||
#include <asm/inat.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
/* Verify next sizeof(t) bytes can be on the same instruction */
|
||||
#define validate_next(t, insn, n) \
|
||||
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
|
||||
|
||||
#define __get_next(t, insn) \
|
||||
({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
|
||||
|
||||
#define __peek_nbyte_next(t, insn, n) \
|
||||
({ t r = *(t*)((insn)->next_byte + n); r; })
|
||||
|
||||
#define get_next(t, insn) \
|
||||
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
|
||||
|
||||
#define peek_nbyte_next(t, insn, n) \
|
||||
({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
|
||||
|
||||
#define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
|
||||
|
||||
/**
|
||||
* insn_init() - initialize struct insn
|
||||
* @insn: &struct insn to be initialized
|
||||
* @kaddr: address (in kernel memory) of instruction (or copy thereof)
|
||||
* @x86_64: !0 for 64-bit kernel or 64-bit app
|
||||
*/
|
||||
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
|
||||
{
|
||||
/*
|
||||
* Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
|
||||
* even if the input buffer is long enough to hold them.
|
||||
*/
|
||||
if (buf_len > MAX_INSN_SIZE)
|
||||
buf_len = MAX_INSN_SIZE;
|
||||
|
||||
memset(insn, 0, sizeof(*insn));
|
||||
insn->kaddr = kaddr;
|
||||
insn->end_kaddr = kaddr + buf_len;
|
||||
insn->next_byte = kaddr;
|
||||
insn->x86_64 = x86_64 ? 1 : 0;
|
||||
insn->opnd_bytes = 4;
|
||||
if (x86_64)
|
||||
insn->addr_bytes = 8;
|
||||
else
|
||||
insn->addr_bytes = 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_get_prefixes - scan x86 instruction prefix bytes
|
||||
* @insn: &struct insn containing instruction
|
||||
*
|
||||
* Populates the @insn->prefixes bitmap, and updates @insn->next_byte
|
||||
* to point to the (first) opcode. No effect if @insn->prefixes.got
|
||||
* is already set.
|
||||
*/
|
||||
void insn_get_prefixes(struct insn *insn)
|
||||
{
|
||||
struct insn_field *prefixes = &insn->prefixes;
|
||||
insn_attr_t attr;
|
||||
insn_byte_t b, lb;
|
||||
int i, nb;
|
||||
|
||||
if (prefixes->got)
|
||||
return;
|
||||
|
||||
nb = 0;
|
||||
lb = 0;
|
||||
b = peek_next(insn_byte_t, insn);
|
||||
attr = inat_get_opcode_attribute(b);
|
||||
while (inat_is_legacy_prefix(attr)) {
|
||||
/* Skip if same prefix */
|
||||
for (i = 0; i < nb; i++)
|
||||
if (prefixes->bytes[i] == b)
|
||||
goto found;
|
||||
if (nb == 4)
|
||||
/* Invalid instruction */
|
||||
break;
|
||||
prefixes->bytes[nb++] = b;
|
||||
if (inat_is_address_size_prefix(attr)) {
|
||||
/* address size switches 2/4 or 4/8 */
|
||||
if (insn->x86_64)
|
||||
insn->addr_bytes ^= 12;
|
||||
else
|
||||
insn->addr_bytes ^= 6;
|
||||
} else if (inat_is_operand_size_prefix(attr)) {
|
||||
/* oprand size switches 2/4 */
|
||||
insn->opnd_bytes ^= 6;
|
||||
}
|
||||
found:
|
||||
prefixes->nbytes++;
|
||||
insn->next_byte++;
|
||||
lb = b;
|
||||
b = peek_next(insn_byte_t, insn);
|
||||
attr = inat_get_opcode_attribute(b);
|
||||
}
|
||||
/* Set the last prefix */
|
||||
if (lb && lb != insn->prefixes.bytes[3]) {
|
||||
if (unlikely(insn->prefixes.bytes[3])) {
|
||||
/* Swap the last prefix */
|
||||
b = insn->prefixes.bytes[3];
|
||||
for (i = 0; i < nb; i++)
|
||||
if (prefixes->bytes[i] == lb)
|
||||
prefixes->bytes[i] = b;
|
||||
}
|
||||
insn->prefixes.bytes[3] = lb;
|
||||
}
|
||||
|
||||
/* Decode REX prefix */
|
||||
if (insn->x86_64) {
|
||||
b = peek_next(insn_byte_t, insn);
|
||||
attr = inat_get_opcode_attribute(b);
|
||||
if (inat_is_rex_prefix(attr)) {
|
||||
insn->rex_prefix.value = b;
|
||||
insn->rex_prefix.nbytes = 1;
|
||||
insn->next_byte++;
|
||||
if (X86_REX_W(b))
|
||||
/* REX.W overrides opnd_size */
|
||||
insn->opnd_bytes = 8;
|
||||
}
|
||||
}
|
||||
insn->rex_prefix.got = 1;
|
||||
|
||||
/* Decode VEX prefix */
|
||||
b = peek_next(insn_byte_t, insn);
|
||||
attr = inat_get_opcode_attribute(b);
|
||||
if (inat_is_vex_prefix(attr)) {
|
||||
insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
|
||||
if (!insn->x86_64) {
|
||||
/*
|
||||
* In 32-bits mode, if the [7:6] bits (mod bits of
|
||||
* ModRM) on the second byte are not 11b, it is
|
||||
* LDS or LES.
|
||||
*/
|
||||
if (X86_MODRM_MOD(b2) != 3)
|
||||
goto vex_end;
|
||||
}
|
||||
insn->vex_prefix.bytes[0] = b;
|
||||
insn->vex_prefix.bytes[1] = b2;
|
||||
if (inat_is_vex3_prefix(attr)) {
|
||||
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
|
||||
insn->vex_prefix.bytes[2] = b2;
|
||||
insn->vex_prefix.nbytes = 3;
|
||||
insn->next_byte += 3;
|
||||
if (insn->x86_64 && X86_VEX_W(b2))
|
||||
/* VEX.W overrides opnd_size */
|
||||
insn->opnd_bytes = 8;
|
||||
} else {
|
||||
/*
|
||||
* For VEX2, fake VEX3-like byte#2.
|
||||
* Makes it easier to decode vex.W, vex.vvvv,
|
||||
* vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
|
||||
*/
|
||||
insn->vex_prefix.bytes[2] = b2 & 0x7f;
|
||||
insn->vex_prefix.nbytes = 2;
|
||||
insn->next_byte += 2;
|
||||
}
|
||||
}
|
||||
vex_end:
|
||||
insn->vex_prefix.got = 1;
|
||||
|
||||
prefixes->got = 1;
|
||||
|
||||
err_out:
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_get_opcode - collect opcode(s)
|
||||
* @insn: &struct insn containing instruction
|
||||
*
|
||||
* Populates @insn->opcode, updates @insn->next_byte to point past the
|
||||
* opcode byte(s), and set @insn->attr (except for groups).
|
||||
* If necessary, first collects any preceding (prefix) bytes.
|
||||
* Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
|
||||
* is already 1.
|
||||
*/
|
||||
void insn_get_opcode(struct insn *insn)
|
||||
{
|
||||
struct insn_field *opcode = &insn->opcode;
|
||||
insn_byte_t op;
|
||||
int pfx_id;
|
||||
if (opcode->got)
|
||||
return;
|
||||
if (!insn->prefixes.got)
|
||||
insn_get_prefixes(insn);
|
||||
|
||||
/* Get first opcode */
|
||||
op = get_next(insn_byte_t, insn);
|
||||
opcode->bytes[0] = op;
|
||||
opcode->nbytes = 1;
|
||||
|
||||
/* Check if there is VEX prefix or not */
|
||||
if (insn_is_avx(insn)) {
|
||||
insn_byte_t m, p;
|
||||
m = insn_vex_m_bits(insn);
|
||||
p = insn_vex_p_bits(insn);
|
||||
insn->attr = inat_get_avx_attribute(op, m, p);
|
||||
if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
|
||||
insn->attr = 0; /* This instruction is bad */
|
||||
goto end; /* VEX has only 1 byte for opcode */
|
||||
}
|
||||
|
||||
insn->attr = inat_get_opcode_attribute(op);
|
||||
while (inat_is_escape(insn->attr)) {
|
||||
/* Get escaped opcode */
|
||||
op = get_next(insn_byte_t, insn);
|
||||
opcode->bytes[opcode->nbytes++] = op;
|
||||
pfx_id = insn_last_prefix_id(insn);
|
||||
insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
|
||||
}
|
||||
if (inat_must_vex(insn->attr))
|
||||
insn->attr = 0; /* This instruction is bad */
|
||||
end:
|
||||
opcode->got = 1;
|
||||
|
||||
err_out:
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_get_modrm - collect ModRM byte, if any
|
||||
* @insn: &struct insn containing instruction
|
||||
*
|
||||
* Populates @insn->modrm and updates @insn->next_byte to point past the
|
||||
* ModRM byte, if any. If necessary, first collects the preceding bytes
|
||||
* (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
|
||||
*/
|
||||
void insn_get_modrm(struct insn *insn)
|
||||
{
|
||||
struct insn_field *modrm = &insn->modrm;
|
||||
insn_byte_t pfx_id, mod;
|
||||
if (modrm->got)
|
||||
return;
|
||||
if (!insn->opcode.got)
|
||||
insn_get_opcode(insn);
|
||||
|
||||
if (inat_has_modrm(insn->attr)) {
|
||||
mod = get_next(insn_byte_t, insn);
|
||||
modrm->value = mod;
|
||||
modrm->nbytes = 1;
|
||||
if (inat_is_group(insn->attr)) {
|
||||
pfx_id = insn_last_prefix_id(insn);
|
||||
insn->attr = inat_get_group_attribute(mod, pfx_id,
|
||||
insn->attr);
|
||||
if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
|
||||
insn->attr = 0; /* This is bad */
|
||||
}
|
||||
}
|
||||
|
||||
if (insn->x86_64 && inat_is_force64(insn->attr))
|
||||
insn->opnd_bytes = 8;
|
||||
modrm->got = 1;
|
||||
|
||||
err_out:
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* insn_rip_relative() - Does instruction use RIP-relative addressing mode?
|
||||
* @insn: &struct insn containing instruction
|
||||
*
|
||||
* If necessary, first collects the instruction up to and including the
|
||||
* ModRM byte. No effect if @insn->x86_64 is 0.
|
||||
*/
|
||||
int insn_rip_relative(struct insn *insn)
|
||||
{
|
||||
struct insn_field *modrm = &insn->modrm;
|
||||
|
||||
if (!insn->x86_64)
|
||||
return 0;
|
||||
if (!modrm->got)
|
||||
insn_get_modrm(insn);
|
||||
/*
|
||||
* For rip-relative instructions, the mod field (top 2 bits)
|
||||
* is zero and the r/m field (bottom 3 bits) is 0x5.
|
||||
*/
|
||||
return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_get_sib() - Get the SIB byte of instruction
|
||||
* @insn: &struct insn containing instruction
|
||||
*
|
||||
* If necessary, first collects the instruction up to and including the
|
||||
* ModRM byte.
|
||||
*/
|
||||
void insn_get_sib(struct insn *insn)
|
||||
{
|
||||
insn_byte_t modrm;
|
||||
|
||||
if (insn->sib.got)
|
||||
return;
|
||||
if (!insn->modrm.got)
|
||||
insn_get_modrm(insn);
|
||||
if (insn->modrm.nbytes) {
|
||||
modrm = (insn_byte_t)insn->modrm.value;
|
||||
if (insn->addr_bytes != 2 &&
|
||||
X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
|
||||
insn->sib.value = get_next(insn_byte_t, insn);
|
||||
insn->sib.nbytes = 1;
|
||||
}
|
||||
}
|
||||
insn->sib.got = 1;
|
||||
|
||||
err_out:
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* insn_get_displacement() - Get the displacement of instruction
|
||||
* @insn: &struct insn containing instruction
|
||||
*
|
||||
* If necessary, first collects the instruction up to and including the
|
||||
* SIB byte.
|
||||
* Displacement value is sign-expanded.
|
||||
*/
|
||||
void insn_get_displacement(struct insn *insn)
|
||||
{
|
||||
insn_byte_t mod, rm, base;
|
||||
|
||||
if (insn->displacement.got)
|
||||
return;
|
||||
if (!insn->sib.got)
|
||||
insn_get_sib(insn);
|
||||
if (insn->modrm.nbytes) {
|
||||
/*
|
||||
* Interpreting the modrm byte:
|
||||
* mod = 00 - no displacement fields (exceptions below)
|
||||
* mod = 01 - 1-byte displacement field
|
||||
* mod = 10 - displacement field is 4 bytes, or 2 bytes if
|
||||
* address size = 2 (0x67 prefix in 32-bit mode)
|
||||
* mod = 11 - no memory operand
|
||||
*
|
||||
* If address size = 2...
|
||||
* mod = 00, r/m = 110 - displacement field is 2 bytes
|
||||
*
|
||||
* If address size != 2...
|
||||
* mod != 11, r/m = 100 - SIB byte exists
|
||||
* mod = 00, SIB base = 101 - displacement field is 4 bytes
|
||||
* mod = 00, r/m = 101 - rip-relative addressing, displacement
|
||||
* field is 4 bytes
|
||||
*/
|
||||
mod = X86_MODRM_MOD(insn->modrm.value);
|
||||
rm = X86_MODRM_RM(insn->modrm.value);
|
||||
base = X86_SIB_BASE(insn->sib.value);
|
||||
if (mod == 3)
|
||||
goto out;
|
||||
if (mod == 1) {
|
||||
insn->displacement.value = get_next(char, insn);
|
||||
insn->displacement.nbytes = 1;
|
||||
} else if (insn->addr_bytes == 2) {
|
||||
if ((mod == 0 && rm == 6) || mod == 2) {
|
||||
insn->displacement.value =
|
||||
get_next(short, insn);
|
||||
insn->displacement.nbytes = 2;
|
||||
}
|
||||
} else {
|
||||
if ((mod == 0 && rm == 5) || mod == 2 ||
|
||||
(mod == 0 && base == 5)) {
|
||||
insn->displacement.value = get_next(int, insn);
|
||||
insn->displacement.nbytes = 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
out:
|
||||
insn->displacement.got = 1;
|
||||
|
||||
err_out:
|
||||
return;
|
||||
}
|
||||
|
||||
/* Decode moffset16/32/64. Return 0 if failed */
|
||||
static int __get_moffset(struct insn *insn)
|
||||
{
|
||||
switch (insn->addr_bytes) {
|
||||
case 2:
|
||||
insn->moffset1.value = get_next(short, insn);
|
||||
insn->moffset1.nbytes = 2;
|
||||
break;
|
||||
case 4:
|
||||
insn->moffset1.value = get_next(int, insn);
|
||||
insn->moffset1.nbytes = 4;
|
||||
break;
|
||||
case 8:
|
||||
insn->moffset1.value = get_next(int, insn);
|
||||
insn->moffset1.nbytes = 4;
|
||||
insn->moffset2.value = get_next(int, insn);
|
||||
insn->moffset2.nbytes = 4;
|
||||
break;
|
||||
default: /* opnd_bytes must be modified manually */
|
||||
goto err_out;
|
||||
}
|
||||
insn->moffset1.got = insn->moffset2.got = 1;
|
||||
|
||||
return 1;
|
||||
|
||||
err_out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Decode imm v32(Iz). Return 0 if failed */
|
||||
static int __get_immv32(struct insn *insn)
|
||||
{
|
||||
switch (insn->opnd_bytes) {
|
||||
case 2:
|
||||
insn->immediate.value = get_next(short, insn);
|
||||
insn->immediate.nbytes = 2;
|
||||
break;
|
||||
case 4:
|
||||
case 8:
|
||||
insn->immediate.value = get_next(int, insn);
|
||||
insn->immediate.nbytes = 4;
|
||||
break;
|
||||
default: /* opnd_bytes must be modified manually */
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
err_out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Decode imm v64(Iv/Ov), Return 0 if failed */
|
||||
static int __get_immv(struct insn *insn)
|
||||
{
|
||||
switch (insn->opnd_bytes) {
|
||||
case 2:
|
||||
insn->immediate1.value = get_next(short, insn);
|
||||
insn->immediate1.nbytes = 2;
|
||||
break;
|
||||
case 4:
|
||||
insn->immediate1.value = get_next(int, insn);
|
||||
insn->immediate1.nbytes = 4;
|
||||
break;
|
||||
case 8:
|
||||
insn->immediate1.value = get_next(int, insn);
|
||||
insn->immediate1.nbytes = 4;
|
||||
insn->immediate2.value = get_next(int, insn);
|
||||
insn->immediate2.nbytes = 4;
|
||||
break;
|
||||
default: /* opnd_bytes must be modified manually */
|
||||
goto err_out;
|
||||
}
|
||||
insn->immediate1.got = insn->immediate2.got = 1;
|
||||
|
||||
return 1;
|
||||
err_out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Decode ptr16:16/32(Ap) */
|
||||
static int __get_immptr(struct insn *insn)
|
||||
{
|
||||
switch (insn->opnd_bytes) {
|
||||
case 2:
|
||||
insn->immediate1.value = get_next(short, insn);
|
||||
insn->immediate1.nbytes = 2;
|
||||
break;
|
||||
case 4:
|
||||
insn->immediate1.value = get_next(int, insn);
|
||||
insn->immediate1.nbytes = 4;
|
||||
break;
|
||||
case 8:
|
||||
/* ptr16:64 is not exist (no segment) */
|
||||
return 0;
|
||||
default: /* opnd_bytes must be modified manually */
|
||||
goto err_out;
|
||||
}
|
||||
insn->immediate2.value = get_next(unsigned short, insn);
|
||||
insn->immediate2.nbytes = 2;
|
||||
insn->immediate1.got = insn->immediate2.got = 1;
|
||||
|
||||
return 1;
|
||||
err_out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_get_immediate() - Get the immediates of instruction
|
||||
* @insn: &struct insn containing instruction
|
||||
*
|
||||
* If necessary, first collects the instruction up to and including the
|
||||
* displacement bytes.
|
||||
* Basically, most of immediates are sign-expanded. Unsigned-value can be
|
||||
* get by bit masking with ((1 << (nbytes * 8)) - 1)
|
||||
*/
|
||||
void insn_get_immediate(struct insn *insn)
|
||||
{
|
||||
if (insn->immediate.got)
|
||||
return;
|
||||
if (!insn->displacement.got)
|
||||
insn_get_displacement(insn);
|
||||
|
||||
if (inat_has_moffset(insn->attr)) {
|
||||
if (!__get_moffset(insn))
|
||||
goto err_out;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!inat_has_immediate(insn->attr))
|
||||
/* no immediates */
|
||||
goto done;
|
||||
|
||||
switch (inat_immediate_size(insn->attr)) {
|
||||
case INAT_IMM_BYTE:
|
||||
insn->immediate.value = get_next(char, insn);
|
||||
insn->immediate.nbytes = 1;
|
||||
break;
|
||||
case INAT_IMM_WORD:
|
||||
insn->immediate.value = get_next(short, insn);
|
||||
insn->immediate.nbytes = 2;
|
||||
break;
|
||||
case INAT_IMM_DWORD:
|
||||
insn->immediate.value = get_next(int, insn);
|
||||
insn->immediate.nbytes = 4;
|
||||
break;
|
||||
case INAT_IMM_QWORD:
|
||||
insn->immediate1.value = get_next(int, insn);
|
||||
insn->immediate1.nbytes = 4;
|
||||
insn->immediate2.value = get_next(int, insn);
|
||||
insn->immediate2.nbytes = 4;
|
||||
break;
|
||||
case INAT_IMM_PTR:
|
||||
if (!__get_immptr(insn))
|
||||
goto err_out;
|
||||
break;
|
||||
case INAT_IMM_VWORD32:
|
||||
if (!__get_immv32(insn))
|
||||
goto err_out;
|
||||
break;
|
||||
case INAT_IMM_VWORD:
|
||||
if (!__get_immv(insn))
|
||||
goto err_out;
|
||||
break;
|
||||
default:
|
||||
/* Here, insn must have an immediate, but failed */
|
||||
goto err_out;
|
||||
}
|
||||
if (inat_has_second_immediate(insn->attr)) {
|
||||
insn->immediate2.value = get_next(char, insn);
|
||||
insn->immediate2.nbytes = 1;
|
||||
}
|
||||
done:
|
||||
insn->immediate.got = 1;
|
||||
|
||||
err_out:
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* insn_get_length() - Get the length of instruction
|
||||
* @insn: &struct insn containing instruction
|
||||
*
|
||||
* If necessary, first collects the instruction up to and including the
|
||||
* immediates bytes.
|
||||
*/
|
||||
void insn_get_length(struct insn *insn)
|
||||
{
|
||||
if (insn->length)
|
||||
return;
|
||||
if (!insn->immediate.got)
|
||||
insn_get_immediate(insn);
|
||||
insn->length = (unsigned char)((unsigned long)insn->next_byte
|
||||
- (unsigned long)insn->kaddr);
|
||||
}
|
201
tools/perf/util/intel-pt-decoder/insn.h
Normal file
201
tools/perf/util/intel-pt-decoder/insn.h
Normal file
@ -0,0 +1,201 @@
|
||||
#ifndef _ASM_X86_INSN_H
|
||||
#define _ASM_X86_INSN_H
|
||||
/*
|
||||
* x86 instruction analysis
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2009
|
||||
*/
|
||||
|
||||
/* insn_attr_t is defined in inat.h */
|
||||
#include <asm/inat.h>
|
||||
|
||||
struct insn_field {
|
||||
union {
|
||||
insn_value_t value;
|
||||
insn_byte_t bytes[4];
|
||||
};
|
||||
/* !0 if we've run insn_get_xxx() for this field */
|
||||
unsigned char got;
|
||||
unsigned char nbytes;
|
||||
};
|
||||
|
||||
struct insn {
|
||||
struct insn_field prefixes; /*
|
||||
* Prefixes
|
||||
* prefixes.bytes[3]: last prefix
|
||||
*/
|
||||
struct insn_field rex_prefix; /* REX prefix */
|
||||
struct insn_field vex_prefix; /* VEX prefix */
|
||||
struct insn_field opcode; /*
|
||||
* opcode.bytes[0]: opcode1
|
||||
* opcode.bytes[1]: opcode2
|
||||
* opcode.bytes[2]: opcode3
|
||||
*/
|
||||
struct insn_field modrm;
|
||||
struct insn_field sib;
|
||||
struct insn_field displacement;
|
||||
union {
|
||||
struct insn_field immediate;
|
||||
struct insn_field moffset1; /* for 64bit MOV */
|
||||
struct insn_field immediate1; /* for 64bit imm or off16/32 */
|
||||
};
|
||||
union {
|
||||
struct insn_field moffset2; /* for 64bit MOV */
|
||||
struct insn_field immediate2; /* for 64bit imm or seg16 */
|
||||
};
|
||||
|
||||
insn_attr_t attr;
|
||||
unsigned char opnd_bytes;
|
||||
unsigned char addr_bytes;
|
||||
unsigned char length;
|
||||
unsigned char x86_64;
|
||||
|
||||
const insn_byte_t *kaddr; /* kernel address of insn to analyze */
|
||||
const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
|
||||
const insn_byte_t *next_byte;
|
||||
};
|
||||
|
||||
#define MAX_INSN_SIZE 15
|
||||
|
||||
#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
|
||||
#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
|
||||
#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
|
||||
|
||||
#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
|
||||
#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
|
||||
#define X86_SIB_BASE(sib) ((sib) & 0x07)
|
||||
|
||||
#define X86_REX_W(rex) ((rex) & 8)
|
||||
#define X86_REX_R(rex) ((rex) & 4)
|
||||
#define X86_REX_X(rex) ((rex) & 2)
|
||||
#define X86_REX_B(rex) ((rex) & 1)
|
||||
|
||||
/* VEX bit flags */
|
||||
#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
|
||||
#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
|
||||
#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
|
||||
#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
|
||||
#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
|
||||
/* VEX bit fields */
|
||||
#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
|
||||
#define X86_VEX2_M 1 /* VEX2.M always 1 */
|
||||
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
|
||||
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
|
||||
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
|
||||
|
||||
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
|
||||
extern void insn_get_prefixes(struct insn *insn);
|
||||
extern void insn_get_opcode(struct insn *insn);
|
||||
extern void insn_get_modrm(struct insn *insn);
|
||||
extern void insn_get_sib(struct insn *insn);
|
||||
extern void insn_get_displacement(struct insn *insn);
|
||||
extern void insn_get_immediate(struct insn *insn);
|
||||
extern void insn_get_length(struct insn *insn);
|
||||
|
||||
/* Attribute will be determined after getting ModRM (for opcode groups) */
|
||||
static inline void insn_get_attribute(struct insn *insn)
|
||||
{
|
||||
insn_get_modrm(insn);
|
||||
}
|
||||
|
||||
/* Instruction uses RIP-relative addressing */
|
||||
extern int insn_rip_relative(struct insn *insn);
|
||||
|
||||
/* Init insn for kernel text */
|
||||
static inline void kernel_insn_init(struct insn *insn,
|
||||
const void *kaddr, int buf_len)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
insn_init(insn, kaddr, buf_len, 1);
|
||||
#else /* CONFIG_X86_32 */
|
||||
insn_init(insn, kaddr, buf_len, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int insn_is_avx(struct insn *insn)
|
||||
{
|
||||
if (!insn->prefixes.got)
|
||||
insn_get_prefixes(insn);
|
||||
return (insn->vex_prefix.value != 0);
|
||||
}
|
||||
|
||||
/* Ensure this instruction is decoded completely */
|
||||
static inline int insn_complete(struct insn *insn)
|
||||
{
|
||||
return insn->opcode.got && insn->modrm.got && insn->sib.got &&
|
||||
insn->displacement.got && insn->immediate.got;
|
||||
}
|
||||
|
||||
static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
|
||||
{
|
||||
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
|
||||
return X86_VEX2_M;
|
||||
else
|
||||
return X86_VEX3_M(insn->vex_prefix.bytes[1]);
|
||||
}
|
||||
|
||||
static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
|
||||
{
|
||||
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
|
||||
return X86_VEX_P(insn->vex_prefix.bytes[1]);
|
||||
else
|
||||
return X86_VEX_P(insn->vex_prefix.bytes[2]);
|
||||
}
|
||||
|
||||
/* Get the last prefix id from last prefix or VEX prefix */
|
||||
static inline int insn_last_prefix_id(struct insn *insn)
|
||||
{
|
||||
if (insn_is_avx(insn))
|
||||
return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
|
||||
|
||||
if (insn->prefixes.bytes[3])
|
||||
return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Offset of each field from kaddr */
|
||||
static inline int insn_offset_rex_prefix(struct insn *insn)
|
||||
{
|
||||
return insn->prefixes.nbytes;
|
||||
}
|
||||
static inline int insn_offset_vex_prefix(struct insn *insn)
|
||||
{
|
||||
return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
|
||||
}
|
||||
static inline int insn_offset_opcode(struct insn *insn)
|
||||
{
|
||||
return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
|
||||
}
|
||||
static inline int insn_offset_modrm(struct insn *insn)
|
||||
{
|
||||
return insn_offset_opcode(insn) + insn->opcode.nbytes;
|
||||
}
|
||||
static inline int insn_offset_sib(struct insn *insn)
|
||||
{
|
||||
return insn_offset_modrm(insn) + insn->modrm.nbytes;
|
||||
}
|
||||
static inline int insn_offset_displacement(struct insn *insn)
|
||||
{
|
||||
return insn_offset_sib(insn) + insn->sib.nbytes;
|
||||
}
|
||||
static inline int insn_offset_immediate(struct insn *insn)
|
||||
{
|
||||
return insn_offset_displacement(insn) + insn->displacement.nbytes;
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_INSN_H */
|
1816
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
Normal file
1816
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
Normal file
File diff suppressed because it is too large
Load Diff
104
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
Normal file
104
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* intel_pt_decoder.h: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE__INTEL_PT_DECODER_H__
|
||||
#define INCLUDE__INTEL_PT_DECODER_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "intel-pt-insn-decoder.h"
|
||||
|
||||
#define INTEL_PT_IN_TX (1 << 0)
|
||||
#define INTEL_PT_ABORT_TX (1 << 1)
|
||||
#define INTEL_PT_ASYNC (1 << 2)
|
||||
|
||||
enum intel_pt_sample_type {
|
||||
INTEL_PT_BRANCH = 1 << 0,
|
||||
INTEL_PT_INSTRUCTION = 1 << 1,
|
||||
INTEL_PT_TRANSACTION = 1 << 2,
|
||||
};
|
||||
|
||||
enum intel_pt_period_type {
|
||||
INTEL_PT_PERIOD_NONE,
|
||||
INTEL_PT_PERIOD_INSTRUCTIONS,
|
||||
INTEL_PT_PERIOD_TICKS,
|
||||
};
|
||||
|
||||
enum {
|
||||
INTEL_PT_ERR_NOMEM = 1,
|
||||
INTEL_PT_ERR_INTERN,
|
||||
INTEL_PT_ERR_BADPKT,
|
||||
INTEL_PT_ERR_NODATA,
|
||||
INTEL_PT_ERR_NOINSN,
|
||||
INTEL_PT_ERR_MISMAT,
|
||||
INTEL_PT_ERR_OVR,
|
||||
INTEL_PT_ERR_LOST,
|
||||
INTEL_PT_ERR_UNK,
|
||||
INTEL_PT_ERR_NELOOP,
|
||||
INTEL_PT_ERR_MAX,
|
||||
};
|
||||
|
||||
struct intel_pt_state {
|
||||
enum intel_pt_sample_type type;
|
||||
int err;
|
||||
uint64_t from_ip;
|
||||
uint64_t to_ip;
|
||||
uint64_t cr3;
|
||||
uint64_t timestamp;
|
||||
uint64_t est_timestamp;
|
||||
uint64_t trace_nr;
|
||||
uint32_t flags;
|
||||
enum intel_pt_insn_op insn_op;
|
||||
int insn_len;
|
||||
};
|
||||
|
||||
struct intel_pt_insn;
|
||||
|
||||
struct intel_pt_buffer {
|
||||
const unsigned char *buf;
|
||||
size_t len;
|
||||
bool consecutive;
|
||||
uint64_t ref_timestamp;
|
||||
uint64_t trace_nr;
|
||||
};
|
||||
|
||||
struct intel_pt_params {
|
||||
int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
|
||||
int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
|
||||
uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
|
||||
uint64_t max_insn_cnt, void *data);
|
||||
void *data;
|
||||
bool return_compression;
|
||||
uint64_t period;
|
||||
enum intel_pt_period_type period_type;
|
||||
unsigned max_non_turbo_ratio;
|
||||
};
|
||||
|
||||
struct intel_pt_decoder;
|
||||
|
||||
struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
|
||||
void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
|
||||
|
||||
const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
|
||||
|
||||
unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
|
||||
unsigned char *buf_b, size_t len_b,
|
||||
bool have_tsc);
|
||||
|
||||
int intel_pt__strerror(int code, char *buf, size_t buflen);
|
||||
|
||||
#endif
|
246
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
Normal file
246
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
Normal file
@ -0,0 +1,246 @@
|
||||
/*
|
||||
* intel_pt_insn_decoder.c: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <endian.h>
|
||||
#include <byteswap.h>
|
||||
|
||||
#include "event.h"
|
||||
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include "inat.c"
|
||||
#include "insn.c"
|
||||
|
||||
#include "intel-pt-insn-decoder.h"
|
||||
|
||||
/* Based on branch_type() from perf_event_intel_lbr.c */
|
||||
static void intel_pt_insn_decoder(struct insn *insn,
|
||||
struct intel_pt_insn *intel_pt_insn)
|
||||
{
|
||||
enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
|
||||
enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
|
||||
int ext;
|
||||
|
||||
if (insn_is_avx(insn)) {
|
||||
intel_pt_insn->op = INTEL_PT_OP_OTHER;
|
||||
intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
|
||||
intel_pt_insn->length = insn->length;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (insn->opcode.bytes[0]) {
|
||||
case 0xf:
|
||||
switch (insn->opcode.bytes[1]) {
|
||||
case 0x05: /* syscall */
|
||||
case 0x34: /* sysenter */
|
||||
op = INTEL_PT_OP_SYSCALL;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
case 0x07: /* sysret */
|
||||
case 0x35: /* sysexit */
|
||||
op = INTEL_PT_OP_SYSRET;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
case 0x80 ... 0x8f: /* jcc */
|
||||
op = INTEL_PT_OP_JCC;
|
||||
branch = INTEL_PT_BR_CONDITIONAL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x70 ... 0x7f: /* jcc */
|
||||
op = INTEL_PT_OP_JCC;
|
||||
branch = INTEL_PT_BR_CONDITIONAL;
|
||||
break;
|
||||
case 0xc2: /* near ret */
|
||||
case 0xc3: /* near ret */
|
||||
case 0xca: /* far ret */
|
||||
case 0xcb: /* far ret */
|
||||
op = INTEL_PT_OP_RET;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
case 0xcf: /* iret */
|
||||
op = INTEL_PT_OP_IRET;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
case 0xcc ... 0xce: /* int */
|
||||
op = INTEL_PT_OP_INT;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
case 0xe8: /* call near rel */
|
||||
op = INTEL_PT_OP_CALL;
|
||||
branch = INTEL_PT_BR_UNCONDITIONAL;
|
||||
break;
|
||||
case 0x9a: /* call far absolute */
|
||||
op = INTEL_PT_OP_CALL;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
case 0xe0 ... 0xe2: /* loop */
|
||||
op = INTEL_PT_OP_LOOP;
|
||||
branch = INTEL_PT_BR_CONDITIONAL;
|
||||
break;
|
||||
case 0xe3: /* jcc */
|
||||
op = INTEL_PT_OP_JCC;
|
||||
branch = INTEL_PT_BR_CONDITIONAL;
|
||||
break;
|
||||
case 0xe9: /* jmp */
|
||||
case 0xeb: /* jmp */
|
||||
op = INTEL_PT_OP_JMP;
|
||||
branch = INTEL_PT_BR_UNCONDITIONAL;
|
||||
break;
|
||||
case 0xea: /* far jmp */
|
||||
op = INTEL_PT_OP_JMP;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
case 0xff: /* call near absolute, call far absolute ind */
|
||||
ext = (insn->modrm.bytes[0] >> 3) & 0x7;
|
||||
switch (ext) {
|
||||
case 2: /* near ind call */
|
||||
case 3: /* far ind call */
|
||||
op = INTEL_PT_OP_CALL;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
op = INTEL_PT_OP_JMP;
|
||||
branch = INTEL_PT_BR_INDIRECT;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
intel_pt_insn->op = op;
|
||||
intel_pt_insn->branch = branch;
|
||||
intel_pt_insn->length = insn->length;
|
||||
|
||||
if (branch == INTEL_PT_BR_CONDITIONAL ||
|
||||
branch == INTEL_PT_BR_UNCONDITIONAL) {
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
switch (insn->immediate.nbytes) {
|
||||
case 1:
|
||||
intel_pt_insn->rel = insn->immediate.value;
|
||||
break;
|
||||
case 2:
|
||||
intel_pt_insn->rel =
|
||||
bswap_16((short)insn->immediate.value);
|
||||
break;
|
||||
case 4:
|
||||
intel_pt_insn->rel = bswap_32(insn->immediate.value);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
intel_pt_insn->rel = insn->immediate.value;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
|
||||
struct intel_pt_insn *intel_pt_insn)
|
||||
{
|
||||
struct insn insn;
|
||||
|
||||
insn_init(&insn, buf, len, x86_64);
|
||||
insn_get_length(&insn);
|
||||
if (!insn_complete(&insn) || insn.length > len)
|
||||
return -1;
|
||||
intel_pt_insn_decoder(&insn, intel_pt_insn);
|
||||
if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ)
|
||||
memcpy(intel_pt_insn->buf, buf, insn.length);
|
||||
else
|
||||
memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *branch_name[] = {
|
||||
[INTEL_PT_OP_OTHER] = "Other",
|
||||
[INTEL_PT_OP_CALL] = "Call",
|
||||
[INTEL_PT_OP_RET] = "Ret",
|
||||
[INTEL_PT_OP_JCC] = "Jcc",
|
||||
[INTEL_PT_OP_JMP] = "Jmp",
|
||||
[INTEL_PT_OP_LOOP] = "Loop",
|
||||
[INTEL_PT_OP_IRET] = "IRet",
|
||||
[INTEL_PT_OP_INT] = "Int",
|
||||
[INTEL_PT_OP_SYSCALL] = "Syscall",
|
||||
[INTEL_PT_OP_SYSRET] = "Sysret",
|
||||
};
|
||||
|
||||
const char *intel_pt_insn_name(enum intel_pt_insn_op op)
|
||||
{
|
||||
return branch_name[op];
|
||||
}
|
||||
|
||||
int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
|
||||
size_t buf_len)
|
||||
{
|
||||
switch (intel_pt_insn->branch) {
|
||||
case INTEL_PT_BR_CONDITIONAL:
|
||||
case INTEL_PT_BR_UNCONDITIONAL:
|
||||
return snprintf(buf, buf_len, "%s %s%d",
|
||||
intel_pt_insn_name(intel_pt_insn->op),
|
||||
intel_pt_insn->rel > 0 ? "+" : "",
|
||||
intel_pt_insn->rel);
|
||||
case INTEL_PT_BR_NO_BRANCH:
|
||||
case INTEL_PT_BR_INDIRECT:
|
||||
return snprintf(buf, buf_len, "%s",
|
||||
intel_pt_insn_name(intel_pt_insn->op));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t intel_pt_insn_max_size(void)
|
||||
{
|
||||
return MAX_INSN_SIZE;
|
||||
}
|
||||
|
||||
int intel_pt_insn_type(enum intel_pt_insn_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case INTEL_PT_OP_OTHER:
|
||||
return 0;
|
||||
case INTEL_PT_OP_CALL:
|
||||
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
|
||||
case INTEL_PT_OP_RET:
|
||||
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
|
||||
case INTEL_PT_OP_JCC:
|
||||
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
|
||||
case INTEL_PT_OP_JMP:
|
||||
return PERF_IP_FLAG_BRANCH;
|
||||
case INTEL_PT_OP_LOOP:
|
||||
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
|
||||
case INTEL_PT_OP_IRET:
|
||||
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
|
||||
PERF_IP_FLAG_INTERRUPT;
|
||||
case INTEL_PT_OP_INT:
|
||||
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
|
||||
PERF_IP_FLAG_INTERRUPT;
|
||||
case INTEL_PT_OP_SYSCALL:
|
||||
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
|
||||
PERF_IP_FLAG_SYSCALLRET;
|
||||
case INTEL_PT_OP_SYSRET:
|
||||
return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
|
||||
PERF_IP_FLAG_SYSCALLRET;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
65
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
Normal file
65
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* intel_pt_insn_decoder.h: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
|
||||
#define INCLUDE__INTEL_PT_INSN_DECODER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define INTEL_PT_INSN_DESC_MAX 32
|
||||
#define INTEL_PT_INSN_DBG_BUF_SZ 16
|
||||
|
||||
enum intel_pt_insn_op {
|
||||
INTEL_PT_OP_OTHER,
|
||||
INTEL_PT_OP_CALL,
|
||||
INTEL_PT_OP_RET,
|
||||
INTEL_PT_OP_JCC,
|
||||
INTEL_PT_OP_JMP,
|
||||
INTEL_PT_OP_LOOP,
|
||||
INTEL_PT_OP_IRET,
|
||||
INTEL_PT_OP_INT,
|
||||
INTEL_PT_OP_SYSCALL,
|
||||
INTEL_PT_OP_SYSRET,
|
||||
};
|
||||
|
||||
enum intel_pt_insn_branch {
|
||||
INTEL_PT_BR_NO_BRANCH,
|
||||
INTEL_PT_BR_INDIRECT,
|
||||
INTEL_PT_BR_CONDITIONAL,
|
||||
INTEL_PT_BR_UNCONDITIONAL,
|
||||
};
|
||||
|
||||
struct intel_pt_insn {
|
||||
enum intel_pt_insn_op op;
|
||||
enum intel_pt_insn_branch branch;
|
||||
int length;
|
||||
int32_t rel;
|
||||
unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ];
|
||||
};
|
||||
|
||||
int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
|
||||
struct intel_pt_insn *intel_pt_insn);
|
||||
|
||||
const char *intel_pt_insn_name(enum intel_pt_insn_op op);
|
||||
|
||||
int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
|
||||
size_t buf_len);
|
||||
|
||||
size_t intel_pt_insn_max_size(void);
|
||||
|
||||
int intel_pt_insn_type(enum intel_pt_insn_op op);
|
||||
|
||||
#endif
|
155
tools/perf/util/intel-pt-decoder/intel-pt-log.c
Normal file
155
tools/perf/util/intel-pt-decoder/intel-pt-log.c
Normal file
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* intel_pt_log.c: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "intel-pt-log.h"
|
||||
#include "intel-pt-insn-decoder.h"
|
||||
|
||||
#include "intel-pt-pkt-decoder.h"
|
||||
|
||||
#define MAX_LOG_NAME 256
|
||||
|
||||
static FILE *f;
|
||||
static char log_name[MAX_LOG_NAME];
|
||||
static bool enable_logging;
|
||||
|
||||
void intel_pt_log_enable(void)
|
||||
{
|
||||
enable_logging = true;
|
||||
}
|
||||
|
||||
void intel_pt_log_disable(void)
|
||||
{
|
||||
if (f)
|
||||
fflush(f);
|
||||
enable_logging = false;
|
||||
}
|
||||
|
||||
void intel_pt_log_set_name(const char *name)
|
||||
{
|
||||
strncpy(log_name, name, MAX_LOG_NAME - 5);
|
||||
strcat(log_name, ".log");
|
||||
}
|
||||
|
||||
static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
|
||||
int indent)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < indent; i++)
|
||||
fprintf(f, " ");
|
||||
|
||||
fprintf(f, " %08" PRIx64 ": ", pos);
|
||||
for (i = 0; i < len; i++)
|
||||
fprintf(f, " %02x", buf[i]);
|
||||
for (; i < 16; i++)
|
||||
fprintf(f, " ");
|
||||
fprintf(f, " ");
|
||||
}
|
||||
|
||||
static void intel_pt_print_no_data(uint64_t pos, int indent)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < indent; i++)
|
||||
fprintf(f, " ");
|
||||
|
||||
fprintf(f, " %08" PRIx64 ": ", pos);
|
||||
for (i = 0; i < 16; i++)
|
||||
fprintf(f, " ");
|
||||
fprintf(f, " ");
|
||||
}
|
||||
|
||||
static int intel_pt_log_open(void)
|
||||
{
|
||||
if (!enable_logging)
|
||||
return -1;
|
||||
|
||||
if (f)
|
||||
return 0;
|
||||
|
||||
if (!log_name[0])
|
||||
return -1;
|
||||
|
||||
f = fopen(log_name, "w+");
|
||||
if (!f) {
|
||||
enable_logging = false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
|
||||
uint64_t pos, const unsigned char *buf)
|
||||
{
|
||||
char desc[INTEL_PT_PKT_DESC_MAX];
|
||||
|
||||
if (intel_pt_log_open())
|
||||
return;
|
||||
|
||||
intel_pt_print_data(buf, pkt_len, pos, 0);
|
||||
intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
|
||||
fprintf(f, "%s\n", desc);
|
||||
}
|
||||
|
||||
void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
|
||||
{
|
||||
char desc[INTEL_PT_INSN_DESC_MAX];
|
||||
size_t len = intel_pt_insn->length;
|
||||
|
||||
if (intel_pt_log_open())
|
||||
return;
|
||||
|
||||
if (len > INTEL_PT_INSN_DBG_BUF_SZ)
|
||||
len = INTEL_PT_INSN_DBG_BUF_SZ;
|
||||
intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
|
||||
if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
|
||||
fprintf(f, "%s\n", desc);
|
||||
else
|
||||
fprintf(f, "Bad instruction!\n");
|
||||
}
|
||||
|
||||
void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
|
||||
{
|
||||
char desc[INTEL_PT_INSN_DESC_MAX];
|
||||
|
||||
if (intel_pt_log_open())
|
||||
return;
|
||||
|
||||
intel_pt_print_no_data(ip, 8);
|
||||
if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
|
||||
fprintf(f, "%s\n", desc);
|
||||
else
|
||||
fprintf(f, "Bad instruction!\n");
|
||||
}
|
||||
|
||||
void intel_pt_log(const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
|
||||
if (intel_pt_log_open())
|
||||
return;
|
||||
|
||||
va_start(args, fmt);
|
||||
vfprintf(f, fmt, args);
|
||||
va_end(args);
|
||||
}
|
52
tools/perf/util/intel-pt-decoder/intel-pt-log.h
Normal file
52
tools/perf/util/intel-pt-decoder/intel-pt-log.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* intel_pt_log.h: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE__INTEL_PT_LOG_H__
|
||||
#define INCLUDE__INTEL_PT_LOG_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
struct intel_pt_pkt;
|
||||
|
||||
void intel_pt_log_enable(void);
|
||||
void intel_pt_log_disable(void);
|
||||
void intel_pt_log_set_name(const char *name);
|
||||
|
||||
void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
|
||||
uint64_t pos, const unsigned char *buf);
|
||||
|
||||
struct intel_pt_insn;
|
||||
|
||||
void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
|
||||
void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
|
||||
uint64_t ip);
|
||||
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
void intel_pt_log(const char *fmt, ...);
|
||||
|
||||
#define x64_fmt "0x%" PRIx64
|
||||
|
||||
static inline void intel_pt_log_at(const char *msg, uint64_t u)
|
||||
{
|
||||
intel_pt_log("%s at " x64_fmt "\n", msg, u);
|
||||
}
|
||||
|
||||
static inline void intel_pt_log_to(const char *msg, uint64_t u)
|
||||
{
|
||||
intel_pt_log("%s to " x64_fmt "\n", msg, u);
|
||||
}
|
||||
|
||||
#endif
|
400
tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
Normal file
400
tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
Normal file
@ -0,0 +1,400 @@
|
||||
/*
|
||||
* intel_pt_pkt_decoder.c: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <endian.h>
|
||||
#include <byteswap.h>
|
||||
|
||||
#include "intel-pt-pkt-decoder.h"
|
||||
|
||||
#define BIT(n) (1 << (n))
|
||||
|
||||
#define BIT63 ((uint64_t)1 << 63)
|
||||
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
#define le16_to_cpu bswap_16
|
||||
#define le32_to_cpu bswap_32
|
||||
#define le64_to_cpu bswap_64
|
||||
#define memcpy_le64(d, s, n) do { \
|
||||
memcpy((d), (s), (n)); \
|
||||
*(d) = le64_to_cpu(*(d)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define le16_to_cpu
|
||||
#define le32_to_cpu
|
||||
#define le64_to_cpu
|
||||
#define memcpy_le64 memcpy
|
||||
#endif
|
||||
|
||||
static const char * const packet_name[] = {
|
||||
[INTEL_PT_BAD] = "Bad Packet!",
|
||||
[INTEL_PT_PAD] = "PAD",
|
||||
[INTEL_PT_TNT] = "TNT",
|
||||
[INTEL_PT_TIP_PGD] = "TIP.PGD",
|
||||
[INTEL_PT_TIP_PGE] = "TIP.PGE",
|
||||
[INTEL_PT_TSC] = "TSC",
|
||||
[INTEL_PT_MODE_EXEC] = "MODE.Exec",
|
||||
[INTEL_PT_MODE_TSX] = "MODE.TSX",
|
||||
[INTEL_PT_TIP] = "TIP",
|
||||
[INTEL_PT_FUP] = "FUP",
|
||||
[INTEL_PT_PSB] = "PSB",
|
||||
[INTEL_PT_PSBEND] = "PSBEND",
|
||||
[INTEL_PT_CBR] = "CBR",
|
||||
[INTEL_PT_PIP] = "PIP",
|
||||
[INTEL_PT_OVF] = "OVF",
|
||||
};
|
||||
|
||||
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
|
||||
{
|
||||
return packet_name[type];
|
||||
}
|
||||
|
||||
static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
uint64_t payload;
|
||||
int count;
|
||||
|
||||
if (len < 8)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
|
||||
payload = le64_to_cpu(*(uint64_t *)buf);
|
||||
|
||||
for (count = 47; count; count--) {
|
||||
if (payload & BIT63)
|
||||
break;
|
||||
payload <<= 1;
|
||||
}
|
||||
|
||||
packet->type = INTEL_PT_TNT;
|
||||
packet->count = count;
|
||||
packet->payload = payload << 1;
|
||||
return 8;
|
||||
}
|
||||
|
||||
static int intel_pt_get_pip(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
uint64_t payload = 0;
|
||||
|
||||
if (len < 8)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
|
||||
packet->type = INTEL_PT_PIP;
|
||||
memcpy_le64(&payload, buf + 2, 6);
|
||||
packet->payload = payload >> 1;
|
||||
|
||||
return 8;
|
||||
}
|
||||
|
||||
static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
if (len < 4)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->type = INTEL_PT_CBR;
|
||||
packet->payload = buf[2];
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
|
||||
{
|
||||
packet->type = INTEL_PT_OVF;
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int intel_pt_get_psb(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (len < 16)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
|
||||
for (i = 2; i < 16; i += 2) {
|
||||
if (buf[i] != 2 || buf[i + 1] != 0x82)
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
|
||||
packet->type = INTEL_PT_PSB;
|
||||
return 16;
|
||||
}
|
||||
|
||||
static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
|
||||
{
|
||||
packet->type = INTEL_PT_PSBEND;
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int intel_pt_get_pad(struct intel_pt_pkt *packet)
|
||||
{
|
||||
packet->type = INTEL_PT_PAD;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
if (len < 2)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
|
||||
switch (buf[1]) {
|
||||
case 0xa3: /* Long TNT */
|
||||
return intel_pt_get_long_tnt(buf, len, packet);
|
||||
case 0x43: /* PIP */
|
||||
return intel_pt_get_pip(buf, len, packet);
|
||||
case 0x03: /* CBR */
|
||||
return intel_pt_get_cbr(buf, len, packet);
|
||||
case 0xf3: /* OVF */
|
||||
return intel_pt_get_ovf(packet);
|
||||
case 0x82: /* PSB */
|
||||
return intel_pt_get_psb(buf, len, packet);
|
||||
case 0x23: /* PSBEND */
|
||||
return intel_pt_get_psbend(packet);
|
||||
default:
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_pt_get_short_tnt(unsigned int byte,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
int count;
|
||||
|
||||
for (count = 6; count; count--) {
|
||||
if (byte & BIT(7))
|
||||
break;
|
||||
byte <<= 1;
|
||||
}
|
||||
|
||||
packet->type = INTEL_PT_TNT;
|
||||
packet->count = count;
|
||||
packet->payload = (uint64_t)byte << 57;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
|
||||
const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
switch (byte >> 5) {
|
||||
case 0:
|
||||
packet->count = 0;
|
||||
break;
|
||||
case 1:
|
||||
if (len < 3)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->count = 2;
|
||||
packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
|
||||
break;
|
||||
case 2:
|
||||
if (len < 5)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->count = 4;
|
||||
packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
|
||||
break;
|
||||
case 3:
|
||||
case 6:
|
||||
if (len < 7)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->count = 6;
|
||||
memcpy_le64(&packet->payload, buf + 1, 6);
|
||||
break;
|
||||
default:
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
|
||||
packet->type = type;
|
||||
|
||||
return packet->count + 1;
|
||||
}
|
||||
|
||||
static int intel_pt_get_mode(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
if (len < 2)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
|
||||
switch (buf[1] >> 5) {
|
||||
case 0:
|
||||
packet->type = INTEL_PT_MODE_EXEC;
|
||||
switch (buf[1] & 3) {
|
||||
case 0:
|
||||
packet->payload = 16;
|
||||
break;
|
||||
case 1:
|
||||
packet->payload = 64;
|
||||
break;
|
||||
case 2:
|
||||
packet->payload = 32;
|
||||
break;
|
||||
default:
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
packet->type = INTEL_PT_MODE_TSX;
|
||||
if ((buf[1] & 3) == 3)
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
packet->payload = buf[1] & 3;
|
||||
break;
|
||||
default:
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
if (len < 8)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->type = INTEL_PT_TSC;
|
||||
memcpy_le64(&packet->payload, buf + 1, 7);
|
||||
return 8;
|
||||
}
|
||||
|
||||
static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
unsigned int byte;
|
||||
|
||||
memset(packet, 0, sizeof(struct intel_pt_pkt));
|
||||
|
||||
if (!len)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
|
||||
byte = buf[0];
|
||||
if (!(byte & BIT(0))) {
|
||||
if (byte == 0)
|
||||
return intel_pt_get_pad(packet);
|
||||
if (byte == 2)
|
||||
return intel_pt_get_ext(buf, len, packet);
|
||||
return intel_pt_get_short_tnt(byte, packet);
|
||||
}
|
||||
|
||||
switch (byte & 0x1f) {
|
||||
case 0x0D:
|
||||
return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
|
||||
case 0x11:
|
||||
return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
|
||||
packet);
|
||||
case 0x01:
|
||||
return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
|
||||
packet);
|
||||
case 0x1D:
|
||||
return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
|
||||
case 0x19:
|
||||
switch (byte) {
|
||||
case 0x99:
|
||||
return intel_pt_get_mode(buf, len, packet);
|
||||
case 0x19:
|
||||
return intel_pt_get_tsc(buf, len, packet);
|
||||
default:
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
default:
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
}
|
||||
|
||||
int intel_pt_get_packet(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = intel_pt_do_get_packet(buf, len, packet);
|
||||
if (ret > 0) {
|
||||
while (ret < 8 && len > (size_t)ret && !buf[ret])
|
||||
ret += 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
|
||||
size_t buf_len)
|
||||
{
|
||||
int ret, i;
|
||||
unsigned long long payload = packet->payload;
|
||||
const char *name = intel_pt_pkt_name(packet->type);
|
||||
|
||||
switch (packet->type) {
|
||||
case INTEL_PT_BAD:
|
||||
case INTEL_PT_PAD:
|
||||
case INTEL_PT_PSB:
|
||||
case INTEL_PT_PSBEND:
|
||||
case INTEL_PT_OVF:
|
||||
return snprintf(buf, buf_len, "%s", name);
|
||||
case INTEL_PT_TNT: {
|
||||
size_t blen = buf_len;
|
||||
|
||||
ret = snprintf(buf, blen, "%s ", name);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
buf += ret;
|
||||
blen -= ret;
|
||||
for (i = 0; i < packet->count; i++) {
|
||||
if (payload & BIT63)
|
||||
ret = snprintf(buf, blen, "T");
|
||||
else
|
||||
ret = snprintf(buf, blen, "N");
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
buf += ret;
|
||||
blen -= ret;
|
||||
payload <<= 1;
|
||||
}
|
||||
ret = snprintf(buf, blen, " (%d)", packet->count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
blen -= ret;
|
||||
return buf_len - blen;
|
||||
}
|
||||
case INTEL_PT_TIP_PGD:
|
||||
case INTEL_PT_TIP_PGE:
|
||||
case INTEL_PT_TIP:
|
||||
case INTEL_PT_FUP:
|
||||
if (!(packet->count))
|
||||
return snprintf(buf, buf_len, "%s no ip", name);
|
||||
case INTEL_PT_CBR:
|
||||
return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
|
||||
case INTEL_PT_TSC:
|
||||
if (packet->count)
|
||||
return snprintf(buf, buf_len,
|
||||
"%s 0x%llx CTC 0x%x FC 0x%x",
|
||||
name, payload, packet->count & 0xffff,
|
||||
(packet->count >> 16) & 0x1ff);
|
||||
else
|
||||
return snprintf(buf, buf_len, "%s 0x%llx",
|
||||
name, payload);
|
||||
case INTEL_PT_MODE_EXEC:
|
||||
return snprintf(buf, buf_len, "%s %lld", name, payload);
|
||||
case INTEL_PT_MODE_TSX:
|
||||
return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
|
||||
name, (unsigned)(payload >> 1) & 1,
|
||||
(unsigned)payload & 1);
|
||||
case INTEL_PT_PIP:
|
||||
ret = snprintf(buf, buf_len, "%s 0x%llx",
|
||||
name, payload);
|
||||
return ret;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return snprintf(buf, buf_len, "%s 0x%llx (%d)",
|
||||
name, payload, packet->count);
|
||||
}
|
64
tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
Normal file
64
tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* intel_pt_pkt_decoder.h: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2014, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
|
||||
#define INCLUDE__INTEL_PT_PKT_DECODER_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define INTEL_PT_PKT_DESC_MAX 256
|
||||
|
||||
#define INTEL_PT_NEED_MORE_BYTES -1
|
||||
#define INTEL_PT_BAD_PACKET -2
|
||||
|
||||
#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \
|
||||
"\002\202\002\202\002\202\002\202"
|
||||
#define INTEL_PT_PSB_LEN 16
|
||||
|
||||
#define INTEL_PT_PKT_MAX_SZ 16
|
||||
|
||||
enum intel_pt_pkt_type {
|
||||
INTEL_PT_BAD,
|
||||
INTEL_PT_PAD,
|
||||
INTEL_PT_TNT,
|
||||
INTEL_PT_TIP_PGD,
|
||||
INTEL_PT_TIP_PGE,
|
||||
INTEL_PT_TSC,
|
||||
INTEL_PT_MODE_EXEC,
|
||||
INTEL_PT_MODE_TSX,
|
||||
INTEL_PT_TIP,
|
||||
INTEL_PT_FUP,
|
||||
INTEL_PT_PSB,
|
||||
INTEL_PT_PSBEND,
|
||||
INTEL_PT_CBR,
|
||||
INTEL_PT_PIP,
|
||||
INTEL_PT_OVF,
|
||||
};
|
||||
|
||||
struct intel_pt_pkt {
|
||||
enum intel_pt_pkt_type type;
|
||||
int count;
|
||||
uint64_t payload;
|
||||
};
|
||||
|
||||
const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
|
||||
|
||||
int intel_pt_get_packet(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet);
|
||||
|
||||
int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
|
||||
|
||||
#endif
|
970
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
Normal file
970
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
Normal file
@ -0,0 +1,970 @@
|
||||
# x86 Opcode Maps
|
||||
#
|
||||
# This is (mostly) based on following documentations.
|
||||
# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
|
||||
# (#326018-047US, June 2013)
|
||||
#
|
||||
#<Opcode maps>
|
||||
# Table: table-name
|
||||
# Referrer: escaped-name
|
||||
# AVXcode: avx-code
|
||||
# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
|
||||
# (or)
|
||||
# opcode: escape # escaped-name
|
||||
# EndTable
|
||||
#
|
||||
#<group maps>
|
||||
# GrpTable: GrpXXX
|
||||
# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
|
||||
# EndTable
|
||||
#
|
||||
# AVX Superscripts
|
||||
# (v): this opcode requires VEX prefix.
|
||||
# (v1): this opcode only supports 128bit VEX.
|
||||
#
|
||||
# Last Prefix Superscripts
|
||||
# - (66): the last prefix is 0x66
|
||||
# - (F3): the last prefix is 0xF3
|
||||
# - (F2): the last prefix is 0xF2
|
||||
# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
|
||||
# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
|
||||
|
||||
Table: one byte opcode
|
||||
Referrer:
|
||||
AVXcode:
|
||||
# 0x00 - 0x0f
|
||||
00: ADD Eb,Gb
|
||||
01: ADD Ev,Gv
|
||||
02: ADD Gb,Eb
|
||||
03: ADD Gv,Ev
|
||||
04: ADD AL,Ib
|
||||
05: ADD rAX,Iz
|
||||
06: PUSH ES (i64)
|
||||
07: POP ES (i64)
|
||||
08: OR Eb,Gb
|
||||
09: OR Ev,Gv
|
||||
0a: OR Gb,Eb
|
||||
0b: OR Gv,Ev
|
||||
0c: OR AL,Ib
|
||||
0d: OR rAX,Iz
|
||||
0e: PUSH CS (i64)
|
||||
0f: escape # 2-byte escape
|
||||
# 0x10 - 0x1f
|
||||
10: ADC Eb,Gb
|
||||
11: ADC Ev,Gv
|
||||
12: ADC Gb,Eb
|
||||
13: ADC Gv,Ev
|
||||
14: ADC AL,Ib
|
||||
15: ADC rAX,Iz
|
||||
16: PUSH SS (i64)
|
||||
17: POP SS (i64)
|
||||
18: SBB Eb,Gb
|
||||
19: SBB Ev,Gv
|
||||
1a: SBB Gb,Eb
|
||||
1b: SBB Gv,Ev
|
||||
1c: SBB AL,Ib
|
||||
1d: SBB rAX,Iz
|
||||
1e: PUSH DS (i64)
|
||||
1f: POP DS (i64)
|
||||
# 0x20 - 0x2f
|
||||
20: AND Eb,Gb
|
||||
21: AND Ev,Gv
|
||||
22: AND Gb,Eb
|
||||
23: AND Gv,Ev
|
||||
24: AND AL,Ib
|
||||
25: AND rAx,Iz
|
||||
26: SEG=ES (Prefix)
|
||||
27: DAA (i64)
|
||||
28: SUB Eb,Gb
|
||||
29: SUB Ev,Gv
|
||||
2a: SUB Gb,Eb
|
||||
2b: SUB Gv,Ev
|
||||
2c: SUB AL,Ib
|
||||
2d: SUB rAX,Iz
|
||||
2e: SEG=CS (Prefix)
|
||||
2f: DAS (i64)
|
||||
# 0x30 - 0x3f
|
||||
30: XOR Eb,Gb
|
||||
31: XOR Ev,Gv
|
||||
32: XOR Gb,Eb
|
||||
33: XOR Gv,Ev
|
||||
34: XOR AL,Ib
|
||||
35: XOR rAX,Iz
|
||||
36: SEG=SS (Prefix)
|
||||
37: AAA (i64)
|
||||
38: CMP Eb,Gb
|
||||
39: CMP Ev,Gv
|
||||
3a: CMP Gb,Eb
|
||||
3b: CMP Gv,Ev
|
||||
3c: CMP AL,Ib
|
||||
3d: CMP rAX,Iz
|
||||
3e: SEG=DS (Prefix)
|
||||
3f: AAS (i64)
|
||||
# 0x40 - 0x4f
|
||||
40: INC eAX (i64) | REX (o64)
|
||||
41: INC eCX (i64) | REX.B (o64)
|
||||
42: INC eDX (i64) | REX.X (o64)
|
||||
43: INC eBX (i64) | REX.XB (o64)
|
||||
44: INC eSP (i64) | REX.R (o64)
|
||||
45: INC eBP (i64) | REX.RB (o64)
|
||||
46: INC eSI (i64) | REX.RX (o64)
|
||||
47: INC eDI (i64) | REX.RXB (o64)
|
||||
48: DEC eAX (i64) | REX.W (o64)
|
||||
49: DEC eCX (i64) | REX.WB (o64)
|
||||
4a: DEC eDX (i64) | REX.WX (o64)
|
||||
4b: DEC eBX (i64) | REX.WXB (o64)
|
||||
4c: DEC eSP (i64) | REX.WR (o64)
|
||||
4d: DEC eBP (i64) | REX.WRB (o64)
|
||||
4e: DEC eSI (i64) | REX.WRX (o64)
|
||||
4f: DEC eDI (i64) | REX.WRXB (o64)
|
||||
# 0x50 - 0x5f
|
||||
50: PUSH rAX/r8 (d64)
|
||||
51: PUSH rCX/r9 (d64)
|
||||
52: PUSH rDX/r10 (d64)
|
||||
53: PUSH rBX/r11 (d64)
|
||||
54: PUSH rSP/r12 (d64)
|
||||
55: PUSH rBP/r13 (d64)
|
||||
56: PUSH rSI/r14 (d64)
|
||||
57: PUSH rDI/r15 (d64)
|
||||
58: POP rAX/r8 (d64)
|
||||
59: POP rCX/r9 (d64)
|
||||
5a: POP rDX/r10 (d64)
|
||||
5b: POP rBX/r11 (d64)
|
||||
5c: POP rSP/r12 (d64)
|
||||
5d: POP rBP/r13 (d64)
|
||||
5e: POP rSI/r14 (d64)
|
||||
5f: POP rDI/r15 (d64)
|
||||
# 0x60 - 0x6f
|
||||
60: PUSHA/PUSHAD (i64)
|
||||
61: POPA/POPAD (i64)
|
||||
62: BOUND Gv,Ma (i64)
|
||||
63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
|
||||
64: SEG=FS (Prefix)
|
||||
65: SEG=GS (Prefix)
|
||||
66: Operand-Size (Prefix)
|
||||
67: Address-Size (Prefix)
|
||||
68: PUSH Iz (d64)
|
||||
69: IMUL Gv,Ev,Iz
|
||||
6a: PUSH Ib (d64)
|
||||
6b: IMUL Gv,Ev,Ib
|
||||
6c: INS/INSB Yb,DX
|
||||
6d: INS/INSW/INSD Yz,DX
|
||||
6e: OUTS/OUTSB DX,Xb
|
||||
6f: OUTS/OUTSW/OUTSD DX,Xz
|
||||
# 0x70 - 0x7f
|
||||
70: JO Jb
|
||||
71: JNO Jb
|
||||
72: JB/JNAE/JC Jb
|
||||
73: JNB/JAE/JNC Jb
|
||||
74: JZ/JE Jb
|
||||
75: JNZ/JNE Jb
|
||||
76: JBE/JNA Jb
|
||||
77: JNBE/JA Jb
|
||||
78: JS Jb
|
||||
79: JNS Jb
|
||||
7a: JP/JPE Jb
|
||||
7b: JNP/JPO Jb
|
||||
7c: JL/JNGE Jb
|
||||
7d: JNL/JGE Jb
|
||||
7e: JLE/JNG Jb
|
||||
7f: JNLE/JG Jb
|
||||
# 0x80 - 0x8f
|
||||
80: Grp1 Eb,Ib (1A)
|
||||
81: Grp1 Ev,Iz (1A)
|
||||
82: Grp1 Eb,Ib (1A),(i64)
|
||||
83: Grp1 Ev,Ib (1A)
|
||||
84: TEST Eb,Gb
|
||||
85: TEST Ev,Gv
|
||||
86: XCHG Eb,Gb
|
||||
87: XCHG Ev,Gv
|
||||
88: MOV Eb,Gb
|
||||
89: MOV Ev,Gv
|
||||
8a: MOV Gb,Eb
|
||||
8b: MOV Gv,Ev
|
||||
8c: MOV Ev,Sw
|
||||
8d: LEA Gv,M
|
||||
8e: MOV Sw,Ew
|
||||
8f: Grp1A (1A) | POP Ev (d64)
|
||||
# 0x90 - 0x9f
|
||||
90: NOP | PAUSE (F3) | XCHG r8,rAX
|
||||
91: XCHG rCX/r9,rAX
|
||||
92: XCHG rDX/r10,rAX
|
||||
93: XCHG rBX/r11,rAX
|
||||
94: XCHG rSP/r12,rAX
|
||||
95: XCHG rBP/r13,rAX
|
||||
96: XCHG rSI/r14,rAX
|
||||
97: XCHG rDI/r15,rAX
|
||||
98: CBW/CWDE/CDQE
|
||||
99: CWD/CDQ/CQO
|
||||
9a: CALLF Ap (i64)
|
||||
9b: FWAIT/WAIT
|
||||
9c: PUSHF/D/Q Fv (d64)
|
||||
9d: POPF/D/Q Fv (d64)
|
||||
9e: SAHF
|
||||
9f: LAHF
|
||||
# 0xa0 - 0xaf
|
||||
a0: MOV AL,Ob
|
||||
a1: MOV rAX,Ov
|
||||
a2: MOV Ob,AL
|
||||
a3: MOV Ov,rAX
|
||||
a4: MOVS/B Yb,Xb
|
||||
a5: MOVS/W/D/Q Yv,Xv
|
||||
a6: CMPS/B Xb,Yb
|
||||
a7: CMPS/W/D Xv,Yv
|
||||
a8: TEST AL,Ib
|
||||
a9: TEST rAX,Iz
|
||||
aa: STOS/B Yb,AL
|
||||
ab: STOS/W/D/Q Yv,rAX
|
||||
ac: LODS/B AL,Xb
|
||||
ad: LODS/W/D/Q rAX,Xv
|
||||
ae: SCAS/B AL,Yb
|
||||
# Note: The May 2011 Intel manual shows Xv for the second parameter of the
|
||||
# next instruction but Yv is correct
|
||||
af: SCAS/W/D/Q rAX,Yv
|
||||
# 0xb0 - 0xbf
|
||||
b0: MOV AL/R8L,Ib
|
||||
b1: MOV CL/R9L,Ib
|
||||
b2: MOV DL/R10L,Ib
|
||||
b3: MOV BL/R11L,Ib
|
||||
b4: MOV AH/R12L,Ib
|
||||
b5: MOV CH/R13L,Ib
|
||||
b6: MOV DH/R14L,Ib
|
||||
b7: MOV BH/R15L,Ib
|
||||
b8: MOV rAX/r8,Iv
|
||||
b9: MOV rCX/r9,Iv
|
||||
ba: MOV rDX/r10,Iv
|
||||
bb: MOV rBX/r11,Iv
|
||||
bc: MOV rSP/r12,Iv
|
||||
bd: MOV rBP/r13,Iv
|
||||
be: MOV rSI/r14,Iv
|
||||
bf: MOV rDI/r15,Iv
|
||||
# 0xc0 - 0xcf
|
||||
c0: Grp2 Eb,Ib (1A)
|
||||
c1: Grp2 Ev,Ib (1A)
|
||||
c2: RETN Iw (f64)
|
||||
c3: RETN
|
||||
c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
|
||||
c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
|
||||
c6: Grp11A Eb,Ib (1A)
|
||||
c7: Grp11B Ev,Iz (1A)
|
||||
c8: ENTER Iw,Ib
|
||||
c9: LEAVE (d64)
|
||||
ca: RETF Iw
|
||||
cb: RETF
|
||||
cc: INT3
|
||||
cd: INT Ib
|
||||
ce: INTO (i64)
|
||||
cf: IRET/D/Q
|
||||
# 0xd0 - 0xdf
|
||||
d0: Grp2 Eb,1 (1A)
|
||||
d1: Grp2 Ev,1 (1A)
|
||||
d2: Grp2 Eb,CL (1A)
|
||||
d3: Grp2 Ev,CL (1A)
|
||||
d4: AAM Ib (i64)
|
||||
d5: AAD Ib (i64)
|
||||
d6:
|
||||
d7: XLAT/XLATB
|
||||
d8: ESC
|
||||
d9: ESC
|
||||
da: ESC
|
||||
db: ESC
|
||||
dc: ESC
|
||||
dd: ESC
|
||||
de: ESC
|
||||
df: ESC
|
||||
# 0xe0 - 0xef
|
||||
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
|
||||
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
|
||||
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
|
||||
e0: LOOPNE/LOOPNZ Jb (f64)
|
||||
e1: LOOPE/LOOPZ Jb (f64)
|
||||
e2: LOOP Jb (f64)
|
||||
e3: JrCXZ Jb (f64)
|
||||
e4: IN AL,Ib
|
||||
e5: IN eAX,Ib
|
||||
e6: OUT Ib,AL
|
||||
e7: OUT Ib,eAX
|
||||
# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
|
||||
# in "near" jumps and calls is 16-bit. For CALL,
|
||||
# push of return address is 16-bit wide, RSP is decremented by 2
|
||||
# but is not truncated to 16 bits, unlike RIP.
|
||||
e8: CALL Jz (f64)
|
||||
e9: JMP-near Jz (f64)
|
||||
ea: JMP-far Ap (i64)
|
||||
eb: JMP-short Jb (f64)
|
||||
ec: IN AL,DX
|
||||
ed: IN eAX,DX
|
||||
ee: OUT DX,AL
|
||||
ef: OUT DX,eAX
|
||||
# 0xf0 - 0xff
|
||||
f0: LOCK (Prefix)
|
||||
f1:
|
||||
f2: REPNE (Prefix) | XACQUIRE (Prefix)
|
||||
f3: REP/REPE (Prefix) | XRELEASE (Prefix)
|
||||
f4: HLT
|
||||
f5: CMC
|
||||
f6: Grp3_1 Eb (1A)
|
||||
f7: Grp3_2 Ev (1A)
|
||||
f8: CLC
|
||||
f9: STC
|
||||
fa: CLI
|
||||
fb: STI
|
||||
fc: CLD
|
||||
fd: STD
|
||||
fe: Grp4 (1A)
|
||||
ff: Grp5 (1A)
|
||||
EndTable
|
||||
|
||||
Table: 2-byte opcode (0x0f)
|
||||
Referrer: 2-byte escape
|
||||
AVXcode: 1
|
||||
# 0x0f 0x00-0x0f
|
||||
00: Grp6 (1A)
|
||||
01: Grp7 (1A)
|
||||
02: LAR Gv,Ew
|
||||
03: LSL Gv,Ew
|
||||
04:
|
||||
05: SYSCALL (o64)
|
||||
06: CLTS
|
||||
07: SYSRET (o64)
|
||||
08: INVD
|
||||
09: WBINVD
|
||||
0a:
|
||||
0b: UD2 (1B)
|
||||
0c:
|
||||
# AMD's prefetch group. Intel supports prefetchw(/1) only.
|
||||
0d: GrpP
|
||||
0e: FEMMS
|
||||
# 3DNow! uses the last imm byte as opcode extension.
|
||||
0f: 3DNow! Pq,Qq,Ib
|
||||
# 0x0f 0x10-0x1f
|
||||
# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
|
||||
# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
|
||||
# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
|
||||
# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
|
||||
# Reference A.1
|
||||
10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
|
||||
11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
|
||||
12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
|
||||
13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
|
||||
14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
|
||||
15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
|
||||
16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
|
||||
17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
|
||||
18: Grp16 (1A)
|
||||
19:
|
||||
1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv
|
||||
1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv
|
||||
1c:
|
||||
1d:
|
||||
1e:
|
||||
1f: NOP Ev
|
||||
# 0x0f 0x20-0x2f
|
||||
20: MOV Rd,Cd
|
||||
21: MOV Rd,Dd
|
||||
22: MOV Cd,Rd
|
||||
23: MOV Dd,Rd
|
||||
24:
|
||||
25:
|
||||
26:
|
||||
27:
|
||||
28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
|
||||
29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
|
||||
2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
|
||||
2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
|
||||
2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
|
||||
2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
|
||||
2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
|
||||
2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
|
||||
# 0x0f 0x30-0x3f
|
||||
30: WRMSR
|
||||
31: RDTSC
|
||||
32: RDMSR
|
||||
33: RDPMC
|
||||
34: SYSENTER
|
||||
35: SYSEXIT
|
||||
36:
|
||||
37: GETSEC
|
||||
38: escape # 3-byte escape 1
|
||||
39:
|
||||
3a: escape # 3-byte escape 2
|
||||
3b:
|
||||
3c:
|
||||
3d:
|
||||
3e:
|
||||
3f:
|
||||
# 0x0f 0x40-0x4f
|
||||
40: CMOVO Gv,Ev
|
||||
41: CMOVNO Gv,Ev
|
||||
42: CMOVB/C/NAE Gv,Ev
|
||||
43: CMOVAE/NB/NC Gv,Ev
|
||||
44: CMOVE/Z Gv,Ev
|
||||
45: CMOVNE/NZ Gv,Ev
|
||||
46: CMOVBE/NA Gv,Ev
|
||||
47: CMOVA/NBE Gv,Ev
|
||||
48: CMOVS Gv,Ev
|
||||
49: CMOVNS Gv,Ev
|
||||
4a: CMOVP/PE Gv,Ev
|
||||
4b: CMOVNP/PO Gv,Ev
|
||||
4c: CMOVL/NGE Gv,Ev
|
||||
4d: CMOVNL/GE Gv,Ev
|
||||
4e: CMOVLE/NG Gv,Ev
|
||||
4f: CMOVNLE/G Gv,Ev
|
||||
# 0x0f 0x50-0x5f
|
||||
50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
|
||||
51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
|
||||
52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
|
||||
53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
|
||||
54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
|
||||
55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
|
||||
56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
|
||||
57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
|
||||
58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
|
||||
59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
|
||||
5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
|
||||
5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
|
||||
5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
|
||||
5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
|
||||
5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
|
||||
5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
|
||||
# 0x0f 0x60-0x6f
|
||||
60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
|
||||
61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
|
||||
62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
|
||||
63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
|
||||
64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
|
||||
65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
|
||||
66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
|
||||
67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
|
||||
68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
|
||||
69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
|
||||
6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
|
||||
6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
|
||||
6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
|
||||
6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
|
||||
6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
|
||||
6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
|
||||
# 0x0f 0x70-0x7f
|
||||
70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
|
||||
71: Grp12 (1A)
|
||||
72: Grp13 (1A)
|
||||
73: Grp14 (1A)
|
||||
74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
|
||||
75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
|
||||
76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
|
||||
# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
|
||||
77: emms | vzeroupper | vzeroall
|
||||
78: VMREAD Ey,Gy
|
||||
79: VMWRITE Gy,Ey
|
||||
7a:
|
||||
7b:
|
||||
7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
|
||||
7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
|
||||
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
|
||||
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
|
||||
# 0x0f 0x80-0x8f
|
||||
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
|
||||
80: JO Jz (f64)
|
||||
81: JNO Jz (f64)
|
||||
82: JB/JC/JNAE Jz (f64)
|
||||
83: JAE/JNB/JNC Jz (f64)
|
||||
84: JE/JZ Jz (f64)
|
||||
85: JNE/JNZ Jz (f64)
|
||||
86: JBE/JNA Jz (f64)
|
||||
87: JA/JNBE Jz (f64)
|
||||
88: JS Jz (f64)
|
||||
89: JNS Jz (f64)
|
||||
8a: JP/JPE Jz (f64)
|
||||
8b: JNP/JPO Jz (f64)
|
||||
8c: JL/JNGE Jz (f64)
|
||||
8d: JNL/JGE Jz (f64)
|
||||
8e: JLE/JNG Jz (f64)
|
||||
8f: JNLE/JG Jz (f64)
|
||||
# 0x0f 0x90-0x9f
|
||||
90: SETO Eb
|
||||
91: SETNO Eb
|
||||
92: SETB/C/NAE Eb
|
||||
93: SETAE/NB/NC Eb
|
||||
94: SETE/Z Eb
|
||||
95: SETNE/NZ Eb
|
||||
96: SETBE/NA Eb
|
||||
97: SETA/NBE Eb
|
||||
98: SETS Eb
|
||||
99: SETNS Eb
|
||||
9a: SETP/PE Eb
|
||||
9b: SETNP/PO Eb
|
||||
9c: SETL/NGE Eb
|
||||
9d: SETNL/GE Eb
|
||||
9e: SETLE/NG Eb
|
||||
9f: SETNLE/G Eb
|
||||
# 0x0f 0xa0-0xaf
|
||||
a0: PUSH FS (d64)
|
||||
a1: POP FS (d64)
|
||||
a2: CPUID
|
||||
a3: BT Ev,Gv
|
||||
a4: SHLD Ev,Gv,Ib
|
||||
a5: SHLD Ev,Gv,CL
|
||||
a6: GrpPDLK
|
||||
a7: GrpRNG
|
||||
a8: PUSH GS (d64)
|
||||
a9: POP GS (d64)
|
||||
aa: RSM
|
||||
ab: BTS Ev,Gv
|
||||
ac: SHRD Ev,Gv,Ib
|
||||
ad: SHRD Ev,Gv,CL
|
||||
ae: Grp15 (1A),(1C)
|
||||
af: IMUL Gv,Ev
|
||||
# 0x0f 0xb0-0xbf
|
||||
b0: CMPXCHG Eb,Gb
|
||||
b1: CMPXCHG Ev,Gv
|
||||
b2: LSS Gv,Mp
|
||||
b3: BTR Ev,Gv
|
||||
b4: LFS Gv,Mp
|
||||
b5: LGS Gv,Mp
|
||||
b6: MOVZX Gv,Eb
|
||||
b7: MOVZX Gv,Ew
|
||||
b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
|
||||
b9: Grp10 (1A)
|
||||
ba: Grp8 Ev,Ib (1A)
|
||||
bb: BTC Ev,Gv
|
||||
bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
|
||||
bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
|
||||
be: MOVSX Gv,Eb
|
||||
bf: MOVSX Gv,Ew
|
||||
# 0x0f 0xc0-0xcf
|
||||
c0: XADD Eb,Gb
|
||||
c1: XADD Ev,Gv
|
||||
c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
|
||||
c3: movnti My,Gy
|
||||
c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
|
||||
c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
|
||||
c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
|
||||
c7: Grp9 (1A)
|
||||
c8: BSWAP RAX/EAX/R8/R8D
|
||||
c9: BSWAP RCX/ECX/R9/R9D
|
||||
ca: BSWAP RDX/EDX/R10/R10D
|
||||
cb: BSWAP RBX/EBX/R11/R11D
|
||||
cc: BSWAP RSP/ESP/R12/R12D
|
||||
cd: BSWAP RBP/EBP/R13/R13D
|
||||
ce: BSWAP RSI/ESI/R14/R14D
|
||||
cf: BSWAP RDI/EDI/R15/R15D
|
||||
# 0x0f 0xd0-0xdf
|
||||
d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
|
||||
d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
|
||||
d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
|
||||
d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
|
||||
d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
|
||||
d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
|
||||
d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
|
||||
d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
|
||||
d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
|
||||
d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
|
||||
da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
|
||||
db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
|
||||
dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
|
||||
dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
|
||||
de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
|
||||
df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
|
||||
# 0x0f 0xe0-0xef
|
||||
e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
|
||||
e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
|
||||
e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
|
||||
e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
|
||||
e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
|
||||
e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
|
||||
e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
|
||||
e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
|
||||
e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
|
||||
e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
|
||||
ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
|
||||
eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
|
||||
ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
|
||||
ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
|
||||
ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
|
||||
ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
|
||||
# 0x0f 0xf0-0xff
|
||||
f0: vlddqu Vx,Mx (F2)
|
||||
f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
|
||||
f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
|
||||
f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
|
||||
f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
|
||||
f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
|
||||
f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
|
||||
f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
|
||||
f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
|
||||
f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
|
||||
fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
|
||||
fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
|
||||
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
|
||||
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
|
||||
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
|
||||
ff:
|
||||
EndTable
|
||||
|
||||
Table: 3-byte opcode 1 (0x0f 0x38)
|
||||
Referrer: 3-byte escape 1
|
||||
AVXcode: 2
|
||||
# 0x0f 0x38 0x00-0x0f
|
||||
00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
|
||||
01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
|
||||
02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
|
||||
03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
|
||||
04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
|
||||
05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
|
||||
06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
|
||||
07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
|
||||
08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
|
||||
09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
|
||||
0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
|
||||
0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
|
||||
0c: vpermilps Vx,Hx,Wx (66),(v)
|
||||
0d: vpermilpd Vx,Hx,Wx (66),(v)
|
||||
0e: vtestps Vx,Wx (66),(v)
|
||||
0f: vtestpd Vx,Wx (66),(v)
|
||||
# 0x0f 0x38 0x10-0x1f
|
||||
10: pblendvb Vdq,Wdq (66)
|
||||
11:
|
||||
12:
|
||||
13: vcvtph2ps Vx,Wx,Ib (66),(v)
|
||||
14: blendvps Vdq,Wdq (66)
|
||||
15: blendvpd Vdq,Wdq (66)
|
||||
16: vpermps Vqq,Hqq,Wqq (66),(v)
|
||||
17: vptest Vx,Wx (66)
|
||||
18: vbroadcastss Vx,Wd (66),(v)
|
||||
19: vbroadcastsd Vqq,Wq (66),(v)
|
||||
1a: vbroadcastf128 Vqq,Mdq (66),(v)
|
||||
1b:
|
||||
1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
|
||||
1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
|
||||
1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
|
||||
1f:
|
||||
# 0x0f 0x38 0x20-0x2f
|
||||
20: vpmovsxbw Vx,Ux/Mq (66),(v1)
|
||||
21: vpmovsxbd Vx,Ux/Md (66),(v1)
|
||||
22: vpmovsxbq Vx,Ux/Mw (66),(v1)
|
||||
23: vpmovsxwd Vx,Ux/Mq (66),(v1)
|
||||
24: vpmovsxwq Vx,Ux/Md (66),(v1)
|
||||
25: vpmovsxdq Vx,Ux/Mq (66),(v1)
|
||||
26:
|
||||
27:
|
||||
28: vpmuldq Vx,Hx,Wx (66),(v1)
|
||||
29: vpcmpeqq Vx,Hx,Wx (66),(v1)
|
||||
2a: vmovntdqa Vx,Mx (66),(v1)
|
||||
2b: vpackusdw Vx,Hx,Wx (66),(v1)
|
||||
2c: vmaskmovps Vx,Hx,Mx (66),(v)
|
||||
2d: vmaskmovpd Vx,Hx,Mx (66),(v)
|
||||
2e: vmaskmovps Mx,Hx,Vx (66),(v)
|
||||
2f: vmaskmovpd Mx,Hx,Vx (66),(v)
|
||||
# 0x0f 0x38 0x30-0x3f
|
||||
30: vpmovzxbw Vx,Ux/Mq (66),(v1)
|
||||
31: vpmovzxbd Vx,Ux/Md (66),(v1)
|
||||
32: vpmovzxbq Vx,Ux/Mw (66),(v1)
|
||||
33: vpmovzxwd Vx,Ux/Mq (66),(v1)
|
||||
34: vpmovzxwq Vx,Ux/Md (66),(v1)
|
||||
35: vpmovzxdq Vx,Ux/Mq (66),(v1)
|
||||
36: vpermd Vqq,Hqq,Wqq (66),(v)
|
||||
37: vpcmpgtq Vx,Hx,Wx (66),(v1)
|
||||
38: vpminsb Vx,Hx,Wx (66),(v1)
|
||||
39: vpminsd Vx,Hx,Wx (66),(v1)
|
||||
3a: vpminuw Vx,Hx,Wx (66),(v1)
|
||||
3b: vpminud Vx,Hx,Wx (66),(v1)
|
||||
3c: vpmaxsb Vx,Hx,Wx (66),(v1)
|
||||
3d: vpmaxsd Vx,Hx,Wx (66),(v1)
|
||||
3e: vpmaxuw Vx,Hx,Wx (66),(v1)
|
||||
3f: vpmaxud Vx,Hx,Wx (66),(v1)
|
||||
# 0x0f 0x38 0x40-0x8f
|
||||
40: vpmulld Vx,Hx,Wx (66),(v1)
|
||||
41: vphminposuw Vdq,Wdq (66),(v1)
|
||||
42:
|
||||
43:
|
||||
44:
|
||||
45: vpsrlvd/q Vx,Hx,Wx (66),(v)
|
||||
46: vpsravd Vx,Hx,Wx (66),(v)
|
||||
47: vpsllvd/q Vx,Hx,Wx (66),(v)
|
||||
# Skip 0x48-0x57
|
||||
58: vpbroadcastd Vx,Wx (66),(v)
|
||||
59: vpbroadcastq Vx,Wx (66),(v)
|
||||
5a: vbroadcasti128 Vqq,Mdq (66),(v)
|
||||
# Skip 0x5b-0x77
|
||||
78: vpbroadcastb Vx,Wx (66),(v)
|
||||
79: vpbroadcastw Vx,Wx (66),(v)
|
||||
# Skip 0x7a-0x7f
|
||||
80: INVEPT Gy,Mdq (66)
|
||||
81: INVPID Gy,Mdq (66)
|
||||
82: INVPCID Gy,Mdq (66)
|
||||
8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
|
||||
8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
|
||||
# 0x0f 0x38 0x90-0xbf (FMA)
|
||||
90: vgatherdd/q Vx,Hx,Wx (66),(v)
|
||||
91: vgatherqd/q Vx,Hx,Wx (66),(v)
|
||||
92: vgatherdps/d Vx,Hx,Wx (66),(v)
|
||||
93: vgatherqps/d Vx,Hx,Wx (66),(v)
|
||||
94:
|
||||
95:
|
||||
96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
|
||||
97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
|
||||
98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
|
||||
99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
|
||||
9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
|
||||
9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
|
||||
9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
|
||||
a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
|
||||
a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
|
||||
a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
|
||||
ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
|
||||
ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
|
||||
af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
|
||||
b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
|
||||
b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
|
||||
b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
|
||||
bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
|
||||
bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
|
||||
bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
|
||||
# 0x0f 0x38 0xc0-0xff
|
||||
db: VAESIMC Vdq,Wdq (66),(v1)
|
||||
dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
|
||||
dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
|
||||
de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
|
||||
df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
|
||||
f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
|
||||
f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
|
||||
f2: ANDN Gy,By,Ey (v)
|
||||
f3: Grp17 (1A)
|
||||
f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
|
||||
f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
|
||||
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
|
||||
EndTable
|
||||
|
||||
Table: 3-byte opcode 2 (0x0f 0x3a)
|
||||
Referrer: 3-byte escape 2
|
||||
AVXcode: 3
|
||||
# 0x0f 0x3a 0x00-0xff
|
||||
00: vpermq Vqq,Wqq,Ib (66),(v)
|
||||
01: vpermpd Vqq,Wqq,Ib (66),(v)
|
||||
02: vpblendd Vx,Hx,Wx,Ib (66),(v)
|
||||
03:
|
||||
04: vpermilps Vx,Wx,Ib (66),(v)
|
||||
05: vpermilpd Vx,Wx,Ib (66),(v)
|
||||
06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
|
||||
07:
|
||||
08: vroundps Vx,Wx,Ib (66)
|
||||
09: vroundpd Vx,Wx,Ib (66)
|
||||
0a: vroundss Vss,Wss,Ib (66),(v1)
|
||||
0b: vroundsd Vsd,Wsd,Ib (66),(v1)
|
||||
0c: vblendps Vx,Hx,Wx,Ib (66)
|
||||
0d: vblendpd Vx,Hx,Wx,Ib (66)
|
||||
0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
|
||||
0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
|
||||
14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
|
||||
15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
|
||||
16: vpextrd/q Ey,Vdq,Ib (66),(v1)
|
||||
17: vextractps Ed,Vdq,Ib (66),(v1)
|
||||
18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
|
||||
19: vextractf128 Wdq,Vqq,Ib (66),(v)
|
||||
1d: vcvtps2ph Wx,Vx,Ib (66),(v)
|
||||
20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
|
||||
21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
|
||||
22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
|
||||
38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
|
||||
39: vextracti128 Wdq,Vqq,Ib (66),(v)
|
||||
40: vdpps Vx,Hx,Wx,Ib (66)
|
||||
41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
|
||||
42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
|
||||
44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
|
||||
46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
|
||||
4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
|
||||
4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
|
||||
4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
|
||||
60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
|
||||
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
|
||||
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
|
||||
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
|
||||
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
|
||||
f0: RORX Gy,Ey,Ib (F2),(v)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp1
|
||||
0: ADD
|
||||
1: OR
|
||||
2: ADC
|
||||
3: SBB
|
||||
4: AND
|
||||
5: SUB
|
||||
6: XOR
|
||||
7: CMP
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp1A
|
||||
0: POP
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp2
|
||||
0: ROL
|
||||
1: ROR
|
||||
2: RCL
|
||||
3: RCR
|
||||
4: SHL/SAL
|
||||
5: SHR
|
||||
6:
|
||||
7: SAR
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp3_1
|
||||
0: TEST Eb,Ib
|
||||
1:
|
||||
2: NOT Eb
|
||||
3: NEG Eb
|
||||
4: MUL AL,Eb
|
||||
5: IMUL AL,Eb
|
||||
6: DIV AL,Eb
|
||||
7: IDIV AL,Eb
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp3_2
|
||||
0: TEST Ev,Iz
|
||||
1:
|
||||
2: NOT Ev
|
||||
3: NEG Ev
|
||||
4: MUL rAX,Ev
|
||||
5: IMUL rAX,Ev
|
||||
6: DIV rAX,Ev
|
||||
7: IDIV rAX,Ev
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp4
|
||||
0: INC Eb
|
||||
1: DEC Eb
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp5
|
||||
0: INC Ev
|
||||
1: DEC Ev
|
||||
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
|
||||
2: CALLN Ev (f64)
|
||||
3: CALLF Ep
|
||||
4: JMPN Ev (f64)
|
||||
5: JMPF Mp
|
||||
6: PUSH Ev (d64)
|
||||
7:
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp6
|
||||
0: SLDT Rv/Mw
|
||||
1: STR Rv/Mw
|
||||
2: LLDT Ew
|
||||
3: LTR Ew
|
||||
4: VERR Ew
|
||||
5: VERW Ew
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp7
|
||||
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
|
||||
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
|
||||
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
|
||||
3: LIDT Ms
|
||||
4: SMSW Mw/Rv
|
||||
5:
|
||||
6: LMSW Ew
|
||||
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp8
|
||||
4: BT
|
||||
5: BTS
|
||||
6: BTR
|
||||
7: BTC
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp9
|
||||
1: CMPXCHG8B/16B Mq/Mdq
|
||||
6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
|
||||
7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp10
|
||||
EndTable
|
||||
|
||||
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
|
||||
GrpTable: Grp11A
|
||||
0: MOV Eb,Ib
|
||||
7: XABORT Ib (000),(11B)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp11B
|
||||
0: MOV Eb,Iz
|
||||
7: XBEGIN Jz (000),(11B)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp12
|
||||
2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
|
||||
4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
|
||||
6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp13
|
||||
2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
|
||||
4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
|
||||
6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp14
|
||||
2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
|
||||
3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
|
||||
6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
|
||||
7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp15
|
||||
0: fxsave | RDFSBASE Ry (F3),(11B)
|
||||
1: fxstor | RDGSBASE Ry (F3),(11B)
|
||||
2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
|
||||
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
|
||||
4: XSAVE
|
||||
5: XRSTOR | lfence (11B)
|
||||
6: XSAVEOPT | mfence (11B)
|
||||
7: clflush | sfence (11B)
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp16
|
||||
0: prefetch NTA
|
||||
1: prefetch T0
|
||||
2: prefetch T1
|
||||
3: prefetch T2
|
||||
EndTable
|
||||
|
||||
GrpTable: Grp17
|
||||
1: BLSR By,Ey (v)
|
||||
2: BLSMSK By,Ey (v)
|
||||
3: BLSI By,Ey (v)
|
||||
EndTable
|
||||
|
||||
# AMD's Prefetch Group
|
||||
GrpTable: GrpP
|
||||
0: PREFETCH
|
||||
1: PREFETCHW
|
||||
EndTable
|
||||
|
||||
GrpTable: GrpPDLK
|
||||
0: MONTMUL
|
||||
1: XSHA1
|
||||
2: XSHA2
|
||||
EndTable
|
||||
|
||||
GrpTable: GrpRNG
|
||||
0: xstore-rng
|
||||
1: xcrypt-ecb
|
||||
2: xcrypt-cbc
|
||||
4: xcrypt-cfb
|
||||
5: xcrypt-ofb
|
||||
EndTable
|
1911
tools/perf/util/intel-pt.c
Normal file
1911
tools/perf/util/intel-pt.c
Normal file
File diff suppressed because it is too large
Load Diff
51
tools/perf/util/intel-pt.h
Normal file
51
tools/perf/util/intel-pt.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* intel_pt.h: Intel Processor Trace support
|
||||
* Copyright (c) 2013-2015, Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE__PERF_INTEL_PT_H__
|
||||
#define INCLUDE__PERF_INTEL_PT_H__
|
||||
|
||||
#define INTEL_PT_PMU_NAME "intel_pt"
|
||||
|
||||
enum {
|
||||
INTEL_PT_PMU_TYPE,
|
||||
INTEL_PT_TIME_SHIFT,
|
||||
INTEL_PT_TIME_MULT,
|
||||
INTEL_PT_TIME_ZERO,
|
||||
INTEL_PT_CAP_USER_TIME_ZERO,
|
||||
INTEL_PT_TSC_BIT,
|
||||
INTEL_PT_NORETCOMP_BIT,
|
||||
INTEL_PT_HAVE_SCHED_SWITCH,
|
||||
INTEL_PT_SNAPSHOT_MODE,
|
||||
INTEL_PT_PER_CPU_MMAPS,
|
||||
INTEL_PT_AUXTRACE_PRIV_MAX,
|
||||
};
|
||||
|
||||
#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64))
|
||||
|
||||
struct auxtrace_record;
|
||||
struct perf_tool;
|
||||
union perf_event;
|
||||
struct perf_session;
|
||||
struct perf_event_attr;
|
||||
struct perf_pmu;
|
||||
|
||||
struct auxtrace_record *intel_pt_recording_init(int *err);
|
||||
|
||||
int intel_pt_process_auxtrace_info(union perf_event *event,
|
||||
struct perf_session *session);
|
||||
|
||||
struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
|
||||
|
||||
#endif
|
@ -462,8 +462,8 @@ static struct perf_pmu *pmu_lookup(const char *name)
|
||||
LIST_HEAD(aliases);
|
||||
__u32 type;
|
||||
|
||||
/* No support for intel_bts or intel_pt so disallow them */
|
||||
if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt"))
|
||||
/* No support for intel_bts so disallow it */
|
||||
if (!strcmp(name, "intel_bts"))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
|
@ -105,6 +105,30 @@ bool perf_can_record_switch_events(void)
|
||||
return perf_probe_api(perf_probe_context_switch);
|
||||
}
|
||||
|
||||
bool perf_can_record_cpu_wide(void)
|
||||
{
|
||||
struct perf_event_attr attr = {
|
||||
.type = PERF_TYPE_SOFTWARE,
|
||||
.config = PERF_COUNT_SW_CPU_CLOCK,
|
||||
.exclude_kernel = 1,
|
||||
};
|
||||
struct cpu_map *cpus;
|
||||
int cpu, fd;
|
||||
|
||||
cpus = cpu_map__new(NULL);
|
||||
if (!cpus)
|
||||
return false;
|
||||
cpu = cpus->map[0];
|
||||
cpu_map__put(cpus);
|
||||
|
||||
fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
|
||||
if (fd < 0)
|
||||
return false;
|
||||
close(fd);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
|
||||
{
|
||||
struct perf_evsel *evsel;
|
||||
|
@ -875,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle any relocation of vdso necessary because older kernels
|
||||
* attempted to prelink vdso to its virtual address.
|
||||
*/
|
||||
if (dso__is_vdso(dso)) {
|
||||
GElf_Shdr tshdr;
|
||||
|
||||
if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
|
||||
map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset;
|
||||
}
|
||||
|
||||
dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
|
||||
/*
|
||||
* Initial kernel and module mappings do not map to the dso. For
|
||||
|
Loading…
Reference in New Issue
Block a user