perf tools: Speed up thread map generation
When trying to capture perf data on a system running spejbb2013, perf hung for about 15 minutes. This is because it took that long to gather about 10,000 thread maps and process them. I don't think a user wants to wait that long. Instead, recognize that thread maps are roughly equivalent to pid maps and just quickly copy those instead. To do this, I synthesize 'fork' events, this eventually calls thread__fork() and copies the maps over. The overhead goes from 15 minutes down to about a few seconds. -- V2: based on Jiri's comments, moved malloc up a level and made sure the memory was freed Signed-off-by: Don Zickus <dzickus@redhat.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Joe Mario <jmario@redhat.com> Link: http://lkml.kernel.org/r/1394808224-113774-1-git-send-email-dzickus@redhat.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
09a71b97cc
commit
363b785f38
@ -129,6 +129,28 @@ out:
|
|||||||
return tgid;
|
return tgid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int perf_event__synthesize_fork(struct perf_tool *tool,
|
||||||
|
union perf_event *event, pid_t pid,
|
||||||
|
pid_t tgid, perf_event__handler_t process,
|
||||||
|
struct machine *machine)
|
||||||
|
{
|
||||||
|
memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
|
||||||
|
|
||||||
|
/* this is really a clone event but we use fork to synthesize it */
|
||||||
|
event->fork.ppid = tgid;
|
||||||
|
event->fork.ptid = tgid;
|
||||||
|
event->fork.pid = tgid;
|
||||||
|
event->fork.tid = pid;
|
||||||
|
event->fork.header.type = PERF_RECORD_FORK;
|
||||||
|
|
||||||
|
event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
|
||||||
|
|
||||||
|
if (process(tool, event, &synth_sample, machine) != 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
|
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
|
||||||
union perf_event *event,
|
union perf_event *event,
|
||||||
pid_t pid, pid_t tgid,
|
pid_t pid, pid_t tgid,
|
||||||
@ -278,6 +300,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
|
|||||||
|
|
||||||
static int __event__synthesize_thread(union perf_event *comm_event,
|
static int __event__synthesize_thread(union perf_event *comm_event,
|
||||||
union perf_event *mmap_event,
|
union perf_event *mmap_event,
|
||||||
|
union perf_event *fork_event,
|
||||||
pid_t pid, int full,
|
pid_t pid, int full,
|
||||||
perf_event__handler_t process,
|
perf_event__handler_t process,
|
||||||
struct perf_tool *tool,
|
struct perf_tool *tool,
|
||||||
@ -326,9 +349,15 @@ static int __event__synthesize_thread(union perf_event *comm_event,
|
|||||||
if (tgid == -1)
|
if (tgid == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
/* process the thread's maps too */
|
if (_pid == pid) {
|
||||||
rc = perf_event__synthesize_mmap_events(tool, mmap_event, _pid, tgid,
|
/* process the parent's maps too */
|
||||||
process, machine, mmap_data);
|
rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
|
||||||
|
process, machine, mmap_data);
|
||||||
|
} else {
|
||||||
|
/* only fork the tid's map, to save time */
|
||||||
|
rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
|
||||||
|
process, machine);
|
||||||
|
}
|
||||||
|
|
||||||
if (rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
@ -344,7 +373,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
|
|||||||
struct machine *machine,
|
struct machine *machine,
|
||||||
bool mmap_data)
|
bool mmap_data)
|
||||||
{
|
{
|
||||||
union perf_event *comm_event, *mmap_event;
|
union perf_event *comm_event, *mmap_event, *fork_event;
|
||||||
int err = -1, thread, j;
|
int err = -1, thread, j;
|
||||||
|
|
||||||
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
|
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
|
||||||
@ -355,9 +384,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
|
|||||||
if (mmap_event == NULL)
|
if (mmap_event == NULL)
|
||||||
goto out_free_comm;
|
goto out_free_comm;
|
||||||
|
|
||||||
|
fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
|
||||||
|
if (fork_event == NULL)
|
||||||
|
goto out_free_mmap;
|
||||||
|
|
||||||
err = 0;
|
err = 0;
|
||||||
for (thread = 0; thread < threads->nr; ++thread) {
|
for (thread = 0; thread < threads->nr; ++thread) {
|
||||||
if (__event__synthesize_thread(comm_event, mmap_event,
|
if (__event__synthesize_thread(comm_event, mmap_event,
|
||||||
|
fork_event,
|
||||||
threads->map[thread], 0,
|
threads->map[thread], 0,
|
||||||
process, tool, machine,
|
process, tool, machine,
|
||||||
mmap_data)) {
|
mmap_data)) {
|
||||||
@ -383,6 +417,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
|
|||||||
/* if not, generate events for it */
|
/* if not, generate events for it */
|
||||||
if (need_leader &&
|
if (need_leader &&
|
||||||
__event__synthesize_thread(comm_event, mmap_event,
|
__event__synthesize_thread(comm_event, mmap_event,
|
||||||
|
fork_event,
|
||||||
comm_event->comm.pid, 0,
|
comm_event->comm.pid, 0,
|
||||||
process, tool, machine,
|
process, tool, machine,
|
||||||
mmap_data)) {
|
mmap_data)) {
|
||||||
@ -391,6 +426,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
free(fork_event);
|
||||||
|
out_free_mmap:
|
||||||
free(mmap_event);
|
free(mmap_event);
|
||||||
out_free_comm:
|
out_free_comm:
|
||||||
free(comm_event);
|
free(comm_event);
|
||||||
@ -405,7 +442,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
|
|||||||
DIR *proc;
|
DIR *proc;
|
||||||
char proc_path[PATH_MAX];
|
char proc_path[PATH_MAX];
|
||||||
struct dirent dirent, *next;
|
struct dirent dirent, *next;
|
||||||
union perf_event *comm_event, *mmap_event;
|
union perf_event *comm_event, *mmap_event, *fork_event;
|
||||||
int err = -1;
|
int err = -1;
|
||||||
|
|
||||||
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
|
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
|
||||||
@ -416,6 +453,10 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
|
|||||||
if (mmap_event == NULL)
|
if (mmap_event == NULL)
|
||||||
goto out_free_comm;
|
goto out_free_comm;
|
||||||
|
|
||||||
|
fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
|
||||||
|
if (fork_event == NULL)
|
||||||
|
goto out_free_mmap;
|
||||||
|
|
||||||
if (machine__is_default_guest(machine))
|
if (machine__is_default_guest(machine))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@ -423,7 +464,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
|
|||||||
proc = opendir(proc_path);
|
proc = opendir(proc_path);
|
||||||
|
|
||||||
if (proc == NULL)
|
if (proc == NULL)
|
||||||
goto out_free_mmap;
|
goto out_free_fork;
|
||||||
|
|
||||||
while (!readdir_r(proc, &dirent, &next) && next) {
|
while (!readdir_r(proc, &dirent, &next) && next) {
|
||||||
char *end;
|
char *end;
|
||||||
@ -435,12 +476,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
|
|||||||
* We may race with exiting thread, so don't stop just because
|
* We may race with exiting thread, so don't stop just because
|
||||||
* one thread couldn't be synthesized.
|
* one thread couldn't be synthesized.
|
||||||
*/
|
*/
|
||||||
__event__synthesize_thread(comm_event, mmap_event, pid, 1,
|
__event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
|
||||||
process, tool, machine, mmap_data);
|
1, process, tool, machine, mmap_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
err = 0;
|
err = 0;
|
||||||
closedir(proc);
|
closedir(proc);
|
||||||
|
out_free_fork:
|
||||||
|
free(fork_event);
|
||||||
out_free_mmap:
|
out_free_mmap:
|
||||||
free(mmap_event);
|
free(mmap_event);
|
||||||
out_free_comm:
|
out_free_comm:
|
||||||
|
Loading…
Reference in New Issue
Block a user