From 6650b181cc0b0c4218ebff9b0c368a2e56287907 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Oct 2013 18:25:12 -0300 Subject: perf scripting perl: Fix build error on Fedora 12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cast __u64 to u64 to silence this warning on older distros, such as Fedora 12: CC /tmp/build/perf/util/scripting-engines/trace-event-perl.o cc1: warnings being treated as errors util/scripting-engines/trace-event-perl.c: In function ‘perl_process_tracepoint’: util/scripting-engines/trace-event-perl.c:285: error: format ‘%lu’ expects type ‘long unsigned int’, but argument 2 has type ‘__u64’ make[1]: *** [/tmp/build/perf/util/scripting-engines/trace-event-perl.o] Error 1 make: *** [install] Error 2 make: Leaving directory `/home/acme/git/linux/tools/perf' [acme@fedora12 linux]$ Reported-by: Waiman Long Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: Waiman Long Link: http://lkml.kernel.org/n/tip-nlxofdqcdjfm0w9o6bgq4kqv@git.kernel.org Link: http://lkml.kernel.org/r/1381265120-58532-1-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a85e4ae..c0c9795 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -282,7 +282,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %" PRIu64, evsel->attr.config); + die("ug! no event found for type %" PRIu64, (u64)evsel->attr.config); pid = raw_field_value(event, "common_pid", data); -- cgit v0.10.2 From 97119f37bbebbab852899bd37ed52b80396728f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2013 17:34:10 -0300 Subject: perf trace: Split fd -> pathname array handling So that the part that grows the array as needed is untied from the code that reads the /proc/pid/fd symlink and can be used for the vfs_getname hook that will set the fd -> path translation too, when available. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ydo5rumyv9hdc1vsfmqamugs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d0f91fe..763bef4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -975,30 +975,9 @@ struct trace { double runtime_ms; }; -static int thread__read_fd_path(struct thread *thread, int fd) +static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) { struct thread_trace *ttrace = thread->priv; - char linkname[PATH_MAX], pathname[PATH_MAX]; - struct stat st; - int ret; - - if (thread->pid_ == thread->tid) { - scnprintf(linkname, sizeof(linkname), - "/proc/%d/fd/%d", thread->pid_, fd); - } else { - scnprintf(linkname, sizeof(linkname), - "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); - } - - if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) - return -1; - - ret = readlink(linkname, pathname, sizeof(pathname)); - - if (ret < 0 || ret > st.st_size) - return -1; - - pathname[ret] = '\0'; if (fd > ttrace->paths.max) { char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *)); @@ -1022,6 +1001,32 @@ static int thread__read_fd_path(struct thread *thread, int fd) return ttrace->paths.table[fd] != NULL ? 0 : -1; } +static int thread__read_fd_path(struct thread *thread, int fd) +{ + char linkname[PATH_MAX], pathname[PATH_MAX]; + struct stat st; + int ret; + + if (thread->pid_ == thread->tid) { + scnprintf(linkname, sizeof(linkname), + "/proc/%d/fd/%d", thread->pid_, fd); + } else { + scnprintf(linkname, sizeof(linkname), + "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd); + } + + if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) + return -1; + + ret = readlink(linkname, pathname, sizeof(pathname)); + + if (ret < 0 || ret > st.st_size) + return -1; + + pathname[ret] = '\0'; + return trace__set_fd_pathname(thread, fd, pathname); +} + static const char *thread__fd_path(struct thread *thread, int fd, bool live) { struct thread_trace *ttrace = thread->priv; -- cgit v0.10.2 From c522739d72a341a3e74a369ce6298b9412813d3f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Sep 2013 18:06:19 -0300 Subject: perf trace: Use vfs_getname hook if available Initially it tries to find a probe:vfs_getname that should be setup with: perf probe 'vfs_getname=getname_flags:65 pathname=result->name:string' or with slight changes to cope with code flux in the getname_flags code. In the future, if a "vfs:getname" tracepoint becomes available, then it will be preferred. This is not strictly required and more expensive method of reading the /proc/pid/fd/ symlink will be used when the fd->path array entry is not populated by a previous vfs_getname + open syscall ret sequence. As with any other 'perf probe' probe the setup must be done just once and the probe will be left inactive, waiting for users, be it 'perf trace' of any other tool. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ujg8se8glq5izmu8cdkq15po@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 54139c6..7b0497f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -97,6 +97,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Show a summary of syscalls by thread with min, max, and average times (in msec) and relative stddev. +--tool_stats:: + Show tool stats such as number of times fd->pathname was discovered thru + hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 763bef4..86d2b1f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -951,7 +951,10 @@ fail: struct trace { struct perf_tool tool; - int audit_machine; + struct { + int machine; + int open_id; + } audit; struct { int max; struct syscall *table; @@ -965,14 +968,19 @@ struct trace { struct strlist *ev_qualifier; bool not_ev_qualifier; bool live; + const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; bool sched; bool multiple_threads; bool summary; bool show_comm; + bool show_tool_stats; double duration_filter; double runtime_ms; + struct { + u64 vfs_getname, proc_getname; + } stats; }; static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) @@ -1027,7 +1035,8 @@ static int thread__read_fd_path(struct thread *thread, int fd) return trace__set_fd_pathname(thread, fd, pathname); } -static const char *thread__fd_path(struct thread *thread, int fd, bool live) +static const char *thread__fd_path(struct thread *thread, int fd, + struct trace *trace) { struct thread_trace *ttrace = thread->priv; @@ -1037,9 +1046,13 @@ static const char *thread__fd_path(struct thread *thread, int fd, bool live) if (fd < 0) return NULL; - if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL) && - (!live || thread__read_fd_path(thread, fd))) - return NULL; + if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) + if (!trace->live) + return NULL; + ++trace->stats.proc_getname; + if (thread__read_fd_path(thread, fd)) { + return NULL; + } return ttrace->paths.table[fd]; } @@ -1049,7 +1062,7 @@ static size_t syscall_arg__scnprintf_fd(char *bf, size_t size, { int fd = arg->val; size_t printed = scnprintf(bf, size, "%d", fd); - const char *path = thread__fd_path(arg->thread, fd, arg->trace->live); + const char *path = thread__fd_path(arg->thread, fd, arg->trace); if (path) printed += scnprintf(bf + printed, size - printed, "<%s>", path); @@ -1186,7 +1199,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) { char tp_name[128]; struct syscall *sc; - const char *name = audit_syscall_to_name(id, trace->audit_machine); + const char *name = audit_syscall_to_name(id, trace->audit.machine); if (name == NULL) return -1; @@ -1450,6 +1463,12 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__intval(evsel, sample, "ret"); + if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { + trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); + trace->last_vfs_getname = NULL; + ++trace->stats.vfs_getname; + } + ttrace = thread->priv; ttrace->exit_time = sample->time; @@ -1494,6 +1513,13 @@ out: return 0; } +static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, + struct perf_sample *sample) +{ + trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); + return 0; +} + static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel, struct perf_sample *sample) { @@ -1616,6 +1642,22 @@ static int trace__record(int argc, const char **argv) static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); +static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname", + evlist->nr_entries); + if (evsel == NULL) + return; + + if (perf_evsel__field(evsel, "pathname") == NULL) { + perf_evsel__delete(evsel); + return; + } + + evsel->handler.func = trace__vfs_getname; + perf_evlist__add(evlist, evsel); +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = perf_evlist__new(); @@ -1635,6 +1677,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) goto out_error_tp; + perf_evlist__add_vfs_getname(evlist); + if (trace->sched && perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) @@ -1741,12 +1785,22 @@ again: if (done) perf_evlist__disable(evlist); - - goto again; + else + goto again; out_unmap_evlist: - if (!err && trace->summary) - trace__fprintf_thread_summary(trace, trace->output); + if (!err) { + if (trace->summary) + trace__fprintf_thread_summary(trace, trace->output); + + if (trace->show_tool_stats) { + fprintf(trace->output, "Stats:\n " + " vfs_getname : %" PRIu64 "\n" + " proc_getname: %" PRIu64 "\n", + trace->stats.vfs_getname, + trace->stats.proc_getname); + } + } perf_evlist__munmap(evlist); out_close_evlist: @@ -1788,6 +1842,7 @@ static int trace__replay(struct trace *trace) const struct perf_evsel_str_handler handlers[] = { { "raw_syscalls:sys_enter", trace__sys_enter, }, { "raw_syscalls:sys_exit", trace__sys_exit, }, + { "probe:vfs_getname", trace__vfs_getname, }, }; struct perf_session *session; @@ -1997,7 +2052,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; struct trace trace = { - .audit_machine = audit_detect_machine(), + .audit = { + .machine = audit_detect_machine(), + .open_id = audit_name_to_syscall("open", trace.audit.machine), + }, .syscalls = { . max = -1, }, @@ -2019,6 +2077,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const struct option trace_options[] = { OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), + OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of events to trace"), OPT_STRING('o', "output", &output_name, "file", "output file name"), -- cgit v0.10.2 From ba209f856380891a6ea6cdb07b2b068faccbff73 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Oct 2013 11:57:33 -0300 Subject: perf trace: Improve event processing exit We need to differentiate SIGCHLD from SIGINT, the later should cause as immediate as possible exit, while the former should wait to process the events that may be perceived in the ring buffer after the SIGCHLD is handled. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-vf6n57ewm3mjy2sz6r491hus@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 86d2b1f..ec82895 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1098,10 +1098,12 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) } static bool done = false; +static bool interrupted = false; -static void sig_handler(int sig __maybe_unused) +static void sig_handler(int sig) { done = true; + interrupted = sig == SIGINT; } static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, @@ -1771,24 +1773,23 @@ again: handler = evsel->handler.func; handler(trace, evsel, &sample); - if (done) - goto out_unmap_evlist; + if (interrupted) + goto out_disable; } } if (trace->nr_events == before) { - if (done) - goto out_unmap_evlist; + int timeout = done ? 100 : -1; - poll(evlist->pollfd, evlist->nr_fds, -1); + if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0) + goto again; + } else { + goto again; } - if (done) - perf_evlist__disable(evlist); - else - goto again; +out_disable: + perf_evlist__disable(evlist); -out_unmap_evlist: if (!err) { if (trace->summary) trace__fprintf_thread_summary(trace, trace->output); -- cgit v0.10.2 From 6ef068cb8e77784431b6c80adc49d0b0a6a5df66 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Oct 2013 12:07:58 -0300 Subject: perf evlist: Introduce perf_evlist__strerror_tp method Out of 'perf trace', should be used by other tools that uses tracepoints. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Ramkumar Ramachandra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-lyvtxhchz4ga8fwht15x8wou@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ec82895..78b0d6a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1814,27 +1814,11 @@ out: trace->live = false; return err; out_error_tp: - switch(errno) { - case ENOENT: - fputs("Error:\tUnable to find debugfs\n" - "Hint:\tWas your kernel was compiled with debugfs support?\n" - "Hint:\tIs the debugfs filesystem mounted?\n" - "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'\n", - trace->output); - break; - case EACCES: - fprintf(trace->output, - "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" - "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, debugfs_mountpoint); - break; - default: { - char bf[256]; - fprintf(trace->output, "Can't trace: %s\n", - strerror_r(errno, bf, sizeof(bf))); - } - break; - } +{ + char errbuf[BUFSIZ]; + perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); + fprintf(trace->output, "%s\n", errbuf); +} goto out_delete_evlist; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index cb9523f..6737420 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1126,3 +1126,30 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n");; } + +int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, + int err, char *buf, size_t size) +{ + char sbuf[128]; + + switch (err) { + case ENOENT: + scnprintf(buf, size, "%s", + "Error:\tUnable to find debugfs\n" + "Hint:\tWas your kernel was compiled with debugfs support?\n" + "Hint:\tIs the debugfs filesystem mounted?\n" + "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); + break; + case EACCES: + scnprintf(buf, size, + "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" + "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", + debugfs_mountpoint, debugfs_mountpoint); + break; + default: + scnprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); + break; + } + + return 0; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 722618f..386de10 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -168,6 +168,8 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); +int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); + static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; -- cgit v0.10.2 From 97a07f10c38064dea492794c99445f6260afcdc1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Oct 2013 16:33:43 -0300 Subject: perf tools: Introduce filename__read_int helper Just opens a file and calls atoi() in at most its first 64 bytes. To read things like /proc/sys/kernel/perf_event_paranoid. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-669q04c5tou5pnt8jtiz6y2r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 8dc8cf3..c25e57b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -394,3 +394,20 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } + +int filename__read_int(const char *filename, int *value) +{ + char line[64]; + int fd = open(filename, O_RDONLY), err = -1; + + if (fd < 0) + return -1; + + if (read(fd, line, sizeof(line)) > 0) { + *value = atoi(line); + err = 0; + } + + close(fd); + return err; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 42dfba7..c8f362d 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -305,4 +305,6 @@ struct dso; char *get_srcline(struct dso *dso, unsigned long addr); void free_srcline(char *srcline); + +int filename__read_int(const char *filename, int *value); #endif /* GIT_COMPAT_UTIL_H */ -- cgit v0.10.2 From a8f23d8f8af43d49cd3331681913b76e4951e1a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Oct 2013 17:38:29 -0300 Subject: perf trace: Improve messages related to /proc/sys/kernel/perf_event_paranoid kernel/events/core.c has: /* * perf event paranoia level: * -1 - not paranoid at all * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv */ int sysctl_perf_event_paranoid __read_mostly = 1; So, with the default being 1, a non-root user can trace his stuff: [acme@zoo ~]$ cat /proc/sys/kernel/perf_event_paranoid 1 [acme@zoo ~]$ yes > /dev/null & [1] 15338 [acme@zoo ~]$ trace -p 15338 | head -5 0.005 ( 0.005 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 0.045 ( 0.001 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 0.085 ( 0.001 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 0.125 ( 0.001 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 0.165 ( 0.001 ms): write(fd: 1, buf: 0x7fe6db765000, count: 4096 ) = 4096 [acme@zoo ~]$ [acme@zoo ~]$ trace --duration 1 sleep 1 1002.148 (1001.218 ms): nanosleep(rqtp: 0x7fff46c79250 ) = 0 [acme@zoo ~]$ [acme@zoo ~]$ trace -- usleep 1 | tail -5 0.905 ( 0.002 ms): brk( ) = 0x1c82000 0.910 ( 0.003 ms): brk(brk: 0x1ca3000 ) = 0x1ca3000 0.913 ( 0.001 ms): brk( ) = 0x1ca3000 0.990 ( 0.059 ms): nanosleep(rqtp: 0x7fffe31a3280 ) = 0 0.995 ( 0.000 ms): exit_group( [acme@zoo ~]$ But can't do system wide tracing: [acme@zoo ~]$ trace Error: Operation not permitted. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 1. [acme@zoo ~]$ [acme@zoo ~]$ trace --cpu 0 Error: Operation not permitted. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 1. [acme@zoo ~]$ If the paranoid level is >= 2, i.e. turn this perf stuff off for !root users: [acme@zoo ~]$ sudo sh -c 'echo 2 > /proc/sys/kernel/perf_event_paranoid' [acme@zoo ~]$ cat /proc/sys/kernel/perf_event_paranoid 2 [acme@zoo ~]$ [acme@zoo ~]$ trace usleep 1 Error: Permission denied. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For your workloads it needs to be <= 1 Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 2. [acme@zoo ~]$ [acme@zoo ~]$ trace Error: Permission denied. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For your workloads it needs to be <= 1 Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 2. [acme@zoo ~]$ [acme@zoo ~]$ trace --cpu 1 Error: Permission denied. Hint: Check /proc/sys/kernel/perf_event_paranoid setting. Hint: For your workloads it needs to be <= 1 Hint: For system wide tracing it needs to be set to -1. Hint: The current value is 2. [acme@zoo ~]$ If the user manages to get what he/she wants, convincing root not to be paranoid at all... [root@zoo ~]# echo -1 > /proc/sys/kernel/perf_event_paranoid [root@zoo ~]# cat /proc/sys/kernel/perf_event_paranoid -1 [root@zoo ~]# [acme@zoo ~]$ ps -eo user,pid,comm | grep Xorg root 729 Xorg [acme@zoo ~]$ [acme@zoo ~]$ trace -a --duration 0.001 -e \!select,ioctl,writev | grep Xorg | head -5 23.143 ( 0.003 ms): Xorg/729 setitimer(which: REAL, value: 0x7fffaadf16e0 ) = 0 23.152 ( 0.004 ms): Xorg/729 read(fd: 31, buf: 0x2544af0, count: 4096 ) = 8 23.161 ( 0.002 ms): Xorg/729 read(fd: 31, buf: 0x2544af0, count: 4096 ) = -1 EAGAIN Resource temporarily unavailable 23.175 ( 0.002 ms): Xorg/729 setitimer(which: REAL, value: 0x7fffaadf16e0 ) = 0 23.235 ( 0.002 ms): Xorg/729 setitimer(which: REAL, value: 0x7fffaadf16e0 ) = 0 [acme@zoo ~]$ Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-di28olfwd28rvkox7v3hqhu1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 78b0d6a..db959ac 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1713,10 +1713,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } err = perf_evlist__open(evlist); - if (err < 0) { - fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno)); - goto out_delete_maps; - } + if (err < 0) + goto out_error_open; err = perf_evlist__mmap(evlist, UINT_MAX, false); if (err < 0) { @@ -1813,14 +1811,21 @@ out_delete_evlist: out: trace->live = false; return err; -out_error_tp: { char errbuf[BUFSIZ]; + +out_error_tp: perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); + goto out_error; + +out_error_open: + perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + +out_error: fprintf(trace->output, "%s\n", errbuf); -} goto out_delete_evlist; } +} static int trace__replay(struct trace *trace) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6737420..0b5425b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1153,3 +1153,39 @@ int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, return 0; } + +int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, + int err, char *buf, size_t size) +{ + int printed, value; + char sbuf[128], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); + + switch (err) { + case EACCES: + case EPERM: + printed = scnprintf(buf, size, + "Error:\t%s.\n" + "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); + + if (filename__read_int("/proc/sys/kernel/perf_event_paranoid", &value)) + break; + + printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); + + if (value >= 2) { + printed += scnprintf(buf + printed, size - printed, + "For your workloads it needs to be <= 1\nHint:\t"); + } + printed += scnprintf(buf + printed, size - printed, + "For system wide tracing it needs to be set to -1"); + + printed += scnprintf(buf + printed, size - printed, + ".\nHint:\tThe current value is %d.", value); + break; + default: + scnprintf(buf, size, "%s", emsg); + break; + } + + return 0; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 386de10..7f8f1ae 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -169,6 +169,7 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); +int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) { -- cgit v0.10.2 From 40d54ec2f77edc52340dcae236aaabe8c3cc3a07 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:28:58 +0300 Subject: perf evsel: Add missing 'mmap2' from debug print The struct perf_event_attr now has a 'mmap2' member. Add it to perf_event_attr__fprintf(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index bfebc1e..291b18a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -986,6 +986,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp) ret += PRINT_ATTR2(exclude_host, exclude_guest); ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel, "excl.callchain_user", exclude_callchain_user); + ret += PRINT_ATTR_U32(mmap2); ret += PRINT_ATTR_U32(wakeup_events); ret += PRINT_ATTR_U32(wakeup_watermark); -- cgit v0.10.2 From dd44bc6be05e4a948124053c8105cfa581177554 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:01 +0300 Subject: perf evsel: Add missing decrement in id sample parsing The final array decrement in id sample parsing is missing, which may trip up the next person adding a sample format, so add it in. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 291b18a..ec0cc1e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1218,6 +1218,7 @@ static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel, sample->pid = u.val32[0]; sample->tid = u.val32[1]; + array--; } return 0; -- cgit v0.10.2 From 4f624685f92719565981eb6f8d9195bb578522a3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:00 +0300 Subject: perf record: Improve write_output error message Improve the error message from write_output() to say what failed to write and give the error number. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 92ca541..d269dfa 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -88,7 +88,7 @@ static int write_output(struct perf_record *rec, void *buf, size_t size) int ret = write(rec->output, buf, size); if (ret < 0) { - pr_err("failed to write\n"); + pr_err("failed to write perf data, error: %m\n"); return -1; } -- cgit v0.10.2 From 8c16b649606ff9f6d742ad6f71c76fc0ee996c8e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:02 +0300 Subject: perf session: Add missing sample flush for piped events Piped events can be sorted so a final flush is needed. Add that and remove a redundant 'err = 0'. Signed-off-by: Adrian Hunter Reviewed-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d1e4495..d51e62d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1263,7 +1263,9 @@ more: if (!session_done()) goto more; done: - err = 0; + /* do the final flush for ordered samples */ + self->ordered_samples.next_flush = ULLONG_MAX; + err = flush_sample_queue(self, tool); out_err: free(buf); perf_session__warn_about_errors(self, tool); @@ -1392,13 +1394,13 @@ more: "Processing events..."); } - err = 0; if (session_done()) - goto out_err; + goto out; if (file_pos < file_size) goto more; +out: /* do the final flush for ordered samples */ session->ordered_samples.next_flush = ULLONG_MAX; err = flush_sample_queue(session, tool); -- cgit v0.10.2 From 7db5952846dfa015d74e64b5e8656acac979490b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:03 +0300 Subject: perf session: Add missing members to perf_event__attr_swap() The perf_event__attr_swap() method needs to swap all members of struct perf_event_attr. Add missing ones. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d51e62d..d4559ca 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -453,6 +453,9 @@ void perf_event__attr_swap(struct perf_event_attr *attr) attr->bp_type = bswap_32(attr->bp_type); attr->bp_addr = bswap_64(attr->bp_addr); attr->bp_len = bswap_64(attr->bp_len); + attr->branch_sample_type = bswap_64(attr->branch_sample_type); + attr->sample_regs_user = bswap_64(attr->sample_regs_user); + attr->sample_stack_user = bswap_32(attr->sample_stack_user); swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); } -- cgit v0.10.2 From 2af68ef50c6afc1632edc984e9c834545d90f597 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:07 +0300 Subject: perf evlist: Fix 32-bit build error util/evlist.c: In function 'perf_evlist__mmap': util/evlist.c:772:2: error: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t' [-Werror=format] cc1: all warnings being treated as errors Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-11-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 0b5425b..07ba0a4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -769,7 +769,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, evlist->overwrite = overwrite; evlist->mmap_len = perf_evlist__mmap_size(pages); - pr_debug("mmap size %luB\n", evlist->mmap_len); + pr_debug("mmap size %zuB\n", evlist->mmap_len); mask = evlist->mmap_len - page_size - 1; list_for_each_entry(evsel, &evlist->entries, node) { -- cgit v0.10.2 From 9402802a416c96b48b2bd9331c070ba2d7550b36 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:08 +0300 Subject: perf tools: Fix test_on_exit for 32-bit build builtin-record.c:42:12: error: static declaration of 'on_exit' follows non-static declaration In file included from util/util.h:51:0, from builtin.h:4, from builtin-record.c:8: /usr/include/stdlib.h:536:12: note: previous declaration of 'on_exit' was here Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-12-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/test-on-exit.c b/tools/perf/config/feature-checks/test-on-exit.c index 8f64ed3..8e88b16 100644 --- a/tools/perf/config/feature-checks/test-on-exit.c +++ b/tools/perf/config/feature-checks/test-on-exit.c @@ -1,4 +1,5 @@ #include +#include static void exit_fn(int status, void *__data) { -- cgit v0.10.2 From 2100f778d44b9c25bd13d38c24a999e2caf1ae3d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:09 +0300 Subject: perf tools: Fix bench/numa.c for 32-bit build bench/numa.c: In function 'worker_thread': bench/numa.c:1123:20: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare] bench/numa.c:1171:6: error: format '%lx' expects argument of type 'long unsigned int', but argument 5 has type 'u64' [-Werror=format] cc1: all warnings being treated as errors Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-13-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 64fa01c..d4c83c6 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1120,7 +1120,7 @@ static void *worker_thread(void *__tdata) /* Check whether our max runtime timed out: */ if (g->p.nr_secs) { timersub(&stop, &start0, &diff); - if (diff.tv_sec >= g->p.nr_secs) { + if ((u32)diff.tv_sec >= g->p.nr_secs) { g->stop_work = true; break; } @@ -1167,7 +1167,7 @@ static void *worker_thread(void *__tdata) runtime_ns_max += diff.tv_usec * 1000; if (details >= 0) { - printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n", + printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n", process_nr, thread_nr, runtime_ns_max / bytes_done, val); } fflush(stdout); -- cgit v0.10.2 From c83fa7f2549cb60bc4d429de31096546f659ce6d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:12 +0300 Subject: perf evlist: Fix perf_evlist__mmap comments Put the comments into the correct kernel-doc format and correct reference to perf_evlist__read_on_cpu() which should be perf_evlist__mmap_read(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-16-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 07ba0a4..acef94a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -738,20 +738,17 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, return 0; } -/** perf_evlist__mmap - Create per cpu maps to receive events - * - * @evlist - list of events - * @pages - map length in pages - * @overwrite - overwrite older events? - * - * If overwrite is false the user needs to signal event consuption using: - * - * struct perf_mmap *m = &evlist->mmap[cpu]; - * unsigned int head = perf_mmap__read_head(m); +/** + * perf_evlist__mmap - Create mmaps to receive events. + * @evlist: list of events + * @pages: map length in pages + * @overwrite: overwrite older events? * - * perf_mmap__write_tail(m, head) + * If @overwrite is %false the user needs to signal event consumption using + * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this + * automatically. * - * Using perf_evlist__read_on_cpu does this automatically. + * Return: %0 on success, negative error code otherwise. */ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, bool overwrite) -- cgit v0.10.2 From 04e213148c434544e3aa88d227b5375fd375738b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:13 +0300 Subject: perf evlist: Factor out duplicated mmap code The same code is used in perf_evlist__mmap_per_cpu() and perf_evlist__mmap_per_thread(). Factor it out into a separate function perf_evlist__mmap_per_evsel(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-17-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index acef94a..85c4c80 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -608,9 +608,36 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, return 0; } -static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int mask) +static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, + int prot, int mask, int cpu, int thread, + int *output) { struct perf_evsel *evsel; + + list_for_each_entry(evsel, &evlist->entries, node) { + int fd = FD(evsel, cpu, thread); + + if (*output == -1) { + *output = fd; + if (__perf_evlist__mmap(evlist, idx, prot, mask, + *output) < 0) + return -1; + } else { + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) + return -1; + } + + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) + return -1; + } + + return 0; +} + +static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, + int mask) +{ int cpu, thread; int nr_cpus = cpu_map__nr(evlist->cpus); int nr_threads = thread_map__nr(evlist->threads); @@ -620,23 +647,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m int output = -1; for (thread = 0; thread < nr_threads; thread++) { - list_for_each_entry(evsel, &evlist->entries, node) { - int fd = FD(evsel, cpu, thread); - - if (output == -1) { - output = fd; - if (__perf_evlist__mmap(evlist, cpu, - prot, mask, output) < 0) - goto out_unmap; - } else { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0) - goto out_unmap; - } - - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) - goto out_unmap; - } + if (perf_evlist__mmap_per_evsel(evlist, cpu, prot, mask, + cpu, thread, &output)) + goto out_unmap; } } @@ -648,9 +661,9 @@ out_unmap: return -1; } -static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, int mask) +static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, + int mask) { - struct perf_evsel *evsel; int thread; int nr_threads = thread_map__nr(evlist->threads); @@ -658,23 +671,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in for (thread = 0; thread < nr_threads; thread++) { int output = -1; - list_for_each_entry(evsel, &evlist->entries, node) { - int fd = FD(evsel, 0, thread); - - if (output == -1) { - output = fd; - if (__perf_evlist__mmap(evlist, thread, - prot, mask, output) < 0) - goto out_unmap; - } else { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0) - goto out_unmap; - } - - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_add_fd(evlist, evsel, 0, thread, fd) < 0) - goto out_unmap; - } + if (perf_evlist__mmap_per_evsel(evlist, thread, prot, mask, 0, + thread, &output)) + goto out_unmap; } return 0; -- cgit v0.10.2 From 243be3dd7c14454899e51334a6d66d29a41ae6ab Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 18 Oct 2013 15:29:14 +0300 Subject: perf script: Print addr by default for BTS The addr field is not displayed by default for hardware events, however for branch events it is the target of the branch so for BTS display it by default if it was recorded. Signed-off-by: Adrian Hunter Acked-by: David Ahern Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382099356-4918-18-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9c333ff..ebb2b5f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -409,7 +409,9 @@ static void print_sample_bts(union perf_event *event, printf(" => "); /* print branch_to information */ - if (PRINT_FIELD(ADDR)) + if (PRINT_FIELD(ADDR) || + ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && + !output[attr->type].user_set)) print_sample_addr(event, sample, machine, thread, attr); printf("\n"); -- cgit v0.10.2 From f11cfc6f294dbd83b0d58037404df2bd16066238 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 15 Oct 2013 23:07:27 +0300 Subject: perf list: Show error if tracepoints not available Tracepoints are not visible in "perf list" on Fedora 19 because regular users have no permission to /sys/kernel/debug by default. Show an error message so that the user knows about it instead of assuming that tracepoints are not supported on the system. Signed-off-by: Pekka Enberg Acked-by: Ingo Molnar Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1381867647-8594-1-git-send-email-penberg@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9812531..c90e55c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -998,8 +998,10 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) + if (debugfs_valid_mountpoint(tracing_events_path)) { + printf(" [ Tracepoints not available: %s ]\n", strerror(errno)); return; + } sys_dir = opendir(tracing_events_path); if (!sys_dir) -- cgit v0.10.2 From e369517ce5f796945c6af047b4e8b1d650e03458 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 11 Oct 2013 14:15:36 +0900 Subject: perf callchain: Convert children list to rbtree Current collapse stage has a scalability problem which can be reproduced easily with a parallel kernel build. This is because it needs to traverse every children of callchains linearly during the collapse/merge stage. Converting it to a rbtree reduced the overhead significantly. On my 400MB perf.data file which recorded with make -j32 kernel build: $ time perf --no-pager report --stdio > /dev/null before: real 6m22.073s user 6m18.683s sys 0m0.706s after: real 0m20.780s user 0m19.962s sys 0m0.689s During the perf report the overhead on append_chain_children went down from 96.69% to 18.16%: - 18.16% perf perf [.] append_chain_children - append_chain_children - 77.48% append_chain_children + 69.79% merge_chain_branch - 22.96% append_chain_children + 67.44% merge_chain_branch + 30.15% append_chain_children + 2.41% callchain_append + 7.25% callchain_append + 12.26% callchain_append + 10.22% merge_chain_branch + 11.58% perf perf [.] dso__find_symbol + 8.02% perf perf [.] sort__comm_cmp + 5.48% perf libc-2.17.so [.] malloc_consolidate Reported-by: Linus Torvalds Signed-off-by: Namhyung Kim Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 482f680..e3970e3 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -21,12 +21,6 @@ __thread struct callchain_cursor callchain_cursor; -#define chain_for_each_child(child, parent) \ - list_for_each_entry(child, &parent->children, siblings) - -#define chain_for_each_child_safe(child, next, parent) \ - list_for_each_entry_safe(child, next, &parent->children, siblings) - static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, enum chain_mode mode) @@ -71,10 +65,16 @@ static void __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, u64 min_hit) { + struct rb_node *n; struct callchain_node *child; - chain_for_each_child(child, node) + n = rb_first(&node->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); + __sort_chain_flat(rb_root, child, min_hit); + } if (node->hit && node->hit >= min_hit) rb_insert_callchain(rb_root, node, CHAIN_FLAT); @@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, static void __sort_chain_graph_abs(struct callchain_node *node, u64 min_hit) { + struct rb_node *n; struct callchain_node *child; node->rb_root = RB_ROOT; + n = rb_first(&node->rb_root_in); + + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); - chain_for_each_child(child, node) { __sort_chain_graph_abs(child, min_hit); if (callchain_cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, @@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, static void __sort_chain_graph_rel(struct callchain_node *node, double min_percent) { + struct rb_node *n; struct callchain_node *child; u64 min_hit; node->rb_root = RB_ROOT; min_hit = ceil(node->children_hit * min_percent); - chain_for_each_child(child, node) { + n = rb_first(&node->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); + __sort_chain_graph_rel(child, min_percent); if (callchain_cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, @@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children) return NULL; } new->parent = parent; - INIT_LIST_HEAD(&new->children); INIT_LIST_HEAD(&new->val); if (inherit_children) { - struct callchain_node *next; + struct rb_node *n; + struct callchain_node *child; + + new->rb_root_in = parent->rb_root_in; + parent->rb_root_in = RB_ROOT; - list_splice(&parent->children, &new->children); - INIT_LIST_HEAD(&parent->children); + n = rb_first(&new->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + child->parent = new; + n = rb_next(n); + } - chain_for_each_child(next, new) - next->parent = new; + /* make it the first child */ + rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node); + rb_insert_color(&new->rb_node_in, &parent->rb_root_in); } - list_add_tail(&new->siblings, &parent->children); return new; } @@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) } } -static void +static struct callchain_node * add_child(struct callchain_node *parent, struct callchain_cursor *cursor, u64 period) @@ -235,6 +252,19 @@ add_child(struct callchain_node *parent, new->children_hit = 0; new->hit = period; + return new; +} + +static s64 match_chain(struct callchain_cursor_node *node, + struct callchain_list *cnode) +{ + struct symbol *sym = node->sym; + + if (cnode->ms.sym && sym && + callchain_param.key == CCKEY_FUNCTION) + return cnode->ms.sym->start - sym->start; + else + return cnode->ip - node->ip; } /* @@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent, /* create a new child for the new branch if any */ if (idx_total < cursor->nr) { + struct callchain_node *first; + struct callchain_list *cnode; + struct callchain_cursor_node *node; + struct rb_node *p, **pp; + parent->hit = 0; - add_child(parent, cursor, period); parent->children_hit += period; + + node = callchain_cursor_current(cursor); + new = add_child(parent, cursor, period); + + /* + * This is second child since we moved parent's children + * to new (first) child above. + */ + p = parent->rb_root_in.rb_node; + first = rb_entry(p, struct callchain_node, rb_node_in); + cnode = list_first_entry(&first->val, struct callchain_list, + list); + + if (match_chain(node, cnode) < 0) + pp = &p->rb_left; + else + pp = &p->rb_right; + + rb_link_node(&new->rb_node_in, p, pp); + rb_insert_color(&new->rb_node_in, &parent->rb_root_in); } else { parent->hit = period; } @@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root, u64 period) { struct callchain_node *rnode; + struct callchain_cursor_node *node; + struct rb_node **p = &root->rb_root_in.rb_node; + struct rb_node *parent = NULL; + + node = callchain_cursor_current(cursor); + if (!node) + return; /* lookup in childrens */ - chain_for_each_child(rnode, root) { - unsigned int ret = append_chain(rnode, cursor, period); + while (*p) { + s64 ret; + struct callchain_list *cnode; - if (!ret) + parent = *p; + rnode = rb_entry(parent, struct callchain_node, rb_node_in); + cnode = list_first_entry(&rnode->val, struct callchain_list, + list); + + /* just check first entry */ + ret = match_chain(node, cnode); + if (ret == 0) { + append_chain(rnode, cursor, period); goto inc_children_hit; + } + + if (ret < 0) + p = &parent->rb_left; + else + p = &parent->rb_right; } /* nothing in children, add to the current node */ - add_child(root, cursor, period); + rnode = add_child(root, cursor, period); + rb_link_node(&rnode->rb_node_in, parent, p); + rb_insert_color(&rnode->rb_node_in, &root->rb_root_in); inc_children_hit: root->children_hit += period; @@ -325,28 +403,20 @@ append_chain(struct callchain_node *root, */ list_for_each_entry(cnode, &root->val, list) { struct callchain_cursor_node *node; - struct symbol *sym; node = callchain_cursor_current(cursor); if (!node) break; - sym = node->sym; - - if (cnode->ms.sym && sym && - callchain_param.key == CCKEY_FUNCTION) { - if (cnode->ms.sym->start != sym->start) - break; - } else if (cnode->ip != node->ip) + if (match_chain(node, cnode) != 0) break; - if (!found) - found = true; + found = true; callchain_cursor_advance(cursor); } - /* matches not, relay on the parent */ + /* matches not, relay no the parent */ if (!found) { cursor->curr = curr_snap; cursor->pos = start; @@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor, struct callchain_node *dst, struct callchain_node *src) { struct callchain_cursor_node **old_last = cursor->last; - struct callchain_node *child, *next_child; + struct callchain_node *child; struct callchain_list *list, *next_list; + struct rb_node *n; int old_pos = cursor->nr; int err = 0; @@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor, append_chain_children(dst, cursor, src->hit); } - chain_for_each_child_safe(child, next_child, src) { + n = rb_first(&src->rb_root_in); + while (n) { + child = container_of(n, struct callchain_node, rb_node_in); + n = rb_next(n); + rb_erase(&child->rb_node_in, &src->rb_root_in); + err = merge_chain_branch(cursor, dst, child); if (err) break; - list_del(&child->siblings); free(child); } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 2b585bc..7bb3602 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -21,11 +21,11 @@ enum chain_order { struct callchain_node { struct callchain_node *parent; - struct list_head siblings; - struct list_head children; struct list_head val; - struct rb_node rb_node; /* to sort nodes in an rbtree */ - struct rb_root rb_root; /* sorted tree of children */ + struct rb_node rb_node_in; /* to insert nodes in an rbtree */ + struct rb_node rb_node; /* to sort nodes in an output tree */ + struct rb_root rb_root_in; /* input tree of children */ + struct rb_root rb_root; /* sorted output tree of children */ unsigned int val_nr; u64 hit; u64 children_hit; @@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor; static inline void callchain_init(struct callchain_root *root) { - INIT_LIST_HEAD(&root->node.siblings); - INIT_LIST_HEAD(&root->node.children); INIT_LIST_HEAD(&root->node.val); root->node.parent = NULL; root->node.hit = 0; root->node.children_hit = 0; + root->node.rb_root_in = RB_ROOT; root->max_depth = 0; } -- cgit v0.10.2 From 09600e0f9ebb06235b852a646a3644b7d4a71aca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 15 Oct 2013 11:01:56 +0900 Subject: perf tools: Compare dso's also when comparing symbols Linus reported that sometimes 'perf report -s symbol' exits without any message on TUI. David and Jiri found that it's because it failed to add a hist entry due to an invalid symbol length. It turns out that sorting by symbol (address) was broken since it only compares symbol addresses. The symbol address is a relative address within a dso thus just checking its address can result in merging unrelated symbols together. Fix it by checking dso before comparing symbol address. Reported-by: Linus Torvalds Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381802517-18812-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 32c5637..1f9821d 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -182,9 +182,19 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { + int64_t ret; + if (!left->ms.sym && !right->ms.sym) return right->level - left->level; + /* + * comparing symbol address alone is not enough since it's a + * relative address within a dso. + */ + ret = sort__dso_cmp(left, right); + if (ret != 0) + return ret; + return _sort__sym_cmp(left->ms.sym, right->ms.sym); } -- cgit v0.10.2 From f5fc14124c5cefdd052a2b2a6a3f0ed531540113 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 15 Oct 2013 16:27:32 +0200 Subject: perf tools: Add data object to handle perf data file This patch is adding 'struct perf_data_file' object as a placeholder for all attributes regarding perf.data file handling. Changing perf_session__new to take it as an argument. The rest of the functionality will be added later to keep this change simple enough, because all the places using perf_session are changed now. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381847254-28809-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 94f9a8e..95df683 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -28,6 +28,7 @@ #include "util/hist.h" #include "util/session.h" #include "util/tool.h" +#include "util/data.h" #include "arch/common.h" #include @@ -199,9 +200,13 @@ static int __cmd_annotate(struct perf_annotate *ann) struct perf_session *session; struct perf_evsel *pos; u64 total_nr_samples; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = ann->force, + }; - session = perf_session__new(input_name, O_RDONLY, - ann->force, false, &ann->tool); + session = perf_session__new(&file, false, &ann->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 8140b7b..cfede86 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -221,8 +221,12 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp) { - struct perf_session *session = perf_session__new(filename, O_RDONLY, - force, false, NULL); + struct perf_data_file file = { + .path = filename, + .mode = PERF_DATA_MODE_READ, + .force = force, + }; + struct perf_session *session = perf_session__new(&file, false, NULL); if (session == NULL) return -1; diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index e74366a..0164c1c 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -15,6 +15,7 @@ #include "util/parse-options.h" #include "util/session.h" #include "util/symbol.h" +#include "util/data.h" static int sysfs__fprintf_build_id(FILE *fp) { @@ -52,6 +53,11 @@ static bool dso__skip_buildid(struct dso *dso, int with_hits) static int perf_session__list_build_ids(bool force, bool with_hits) { struct perf_session *session; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + .force = force, + }; symbol__elf_init(); /* @@ -60,8 +66,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) if (filename__fprintf_build_id(input_name, stdout)) goto out; - session = perf_session__new(input_name, O_RDONLY, force, false, - &build_id__mark_dso_hit_ops); + session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); if (session == NULL) return -1; /* diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 2a78dc8..419d27d 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -16,6 +16,7 @@ #include "util/sort.h" #include "util/symbol.h" #include "util/util.h" +#include "util/data.h" #include #include @@ -42,7 +43,7 @@ struct diff_hpp_fmt { struct data__file { struct perf_session *session; - const char *file; + struct perf_data_file file; int idx; struct hists *hists; struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX]; @@ -601,7 +602,7 @@ static void data__fprintf(void) data__for_each_file(i, d) fprintf(stdout, "# [%d] %s %s\n", - d->idx, d->file, + d->idx, d->file.path, !d->idx ? "(Baseline)" : ""); fprintf(stdout, "#\n"); @@ -663,17 +664,16 @@ static int __cmd_diff(void) int ret = -EINVAL, i; data__for_each_file(i, d) { - d->session = perf_session__new(d->file, O_RDONLY, force, - false, &tool); + d->session = perf_session__new(&d->file, false, &tool); if (!d->session) { - pr_err("Failed to open %s\n", d->file); + pr_err("Failed to open %s\n", d->file.path); ret = -ENOMEM; goto out_delete; } ret = perf_session__process_events(d->session, &tool); if (ret) { - pr_err("Failed to process %s\n", d->file); + pr_err("Failed to process %s\n", d->file.path); goto out_delete; } @@ -1016,7 +1016,12 @@ static int data_init(int argc, const char **argv) return -ENOMEM; data__for_each_file(i, d) { - d->file = use_default ? defaults[i] : argv[i]; + struct perf_data_file *file = &d->file; + + file->path = use_default ? defaults[i] : argv[i]; + file->mode = PERF_DATA_MODE_READ, + file->force = force, + d->idx = i; } diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 05bd9df..20b0f12 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -14,13 +14,18 @@ #include "util/parse-events.h" #include "util/parse-options.h" #include "util/session.h" +#include "util/data.h" static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) { struct perf_session *session; struct perf_evsel *pos; + struct perf_data_file file = { + .path = file_name, + .mode = PERF_DATA_MODE_READ, + }; - session = perf_session__new(file_name, O_RDONLY, 0, false, NULL); + session = perf_session__new(&file, 0, NULL); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f51a963..4aa6d78 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -15,6 +15,7 @@ #include "util/tool.h" #include "util/debug.h" #include "util/build-id.h" +#include "util/data.h" #include "util/parse-options.h" @@ -345,6 +346,10 @@ static int __cmd_inject(struct perf_inject *inject) { struct perf_session *session; int ret = -EINVAL; + struct perf_data_file file = { + .path = inject->input_name, + .mode = PERF_DATA_MODE_READ, + }; signal(SIGINT, sig_handler); @@ -355,7 +360,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.tracing_data = perf_event__repipe_tracing_data; } - session = perf_session__new(inject->input_name, O_RDONLY, false, true, &inject->tool); + session = perf_session__new(&file, true, &inject->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 9b5f077..1126382 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -13,6 +13,7 @@ #include "util/parse-options.h" #include "util/trace-event.h" +#include "util/data.h" #include "util/debug.h" @@ -486,8 +487,12 @@ static int __cmd_kmem(void) { "kmem:kfree", perf_evsel__process_free_event, }, { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, }; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; - session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem); + session = perf_session__new(&file, false, &perf_kmem); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index cfa0b79..188bb29 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -17,6 +17,7 @@ #include "util/tool.h" #include "util/stat.h" #include "util/top.h" +#include "util/data.h" #include #include @@ -1215,10 +1216,13 @@ static int read_events(struct perf_kvm_stat *kvm) .comm = perf_event__process_comm, .ordered_samples = true, }; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; kvm->tool = eops; - kvm->session = perf_session__new(kvm->file_name, O_RDONLY, 0, false, - &kvm->tool); + kvm->session = perf_session__new(&file, false, &kvm->tool); if (!kvm->session) { pr_err("Initializing perf session failed\n"); return -EINVAL; @@ -1450,6 +1454,9 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, "perf kvm stat live []", NULL }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_WRITE, + }; /* event handling */ @@ -1514,7 +1521,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, /* * perf session */ - kvm->session = perf_session__new(NULL, O_WRONLY, false, false, &kvm->tool); + kvm->session = perf_session__new(&file, false, &kvm->tool); if (kvm->session == NULL) { err = -ENOMEM; goto out; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6a9076f..33c7253 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -15,6 +15,7 @@ #include "util/debug.h" #include "util/session.h" #include "util/tool.h" +#include "util/data.h" #include #include @@ -853,8 +854,12 @@ static int __cmd_report(bool display_info) .comm = perf_event__process_comm, .ordered_samples = true, }; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; - session = perf_session__new(input_name, O_RDONLY, 0, false, &eops); + session = perf_session__new(&file, false, &eops); if (!session) { pr_err("Initializing perf session failed\n"); return -ENOMEM; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 253133a..31c00f1 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -5,6 +5,7 @@ #include "util/trace-event.h" #include "util/tool.h" #include "util/session.h" +#include "util/data.h" #define MEM_OPERATION_LOAD "load" #define MEM_OPERATION_STORE "store" @@ -119,10 +120,14 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; int err = -EINVAL; int ret; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &mem->tool); + struct perf_session *session = perf_session__new(&file, false, + &mem->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d269dfa..4ea46ff 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -24,6 +24,7 @@ #include "util/symbol.h" #include "util/cpumap.h" #include "util/thread_map.h" +#include "util/data.h" #include #include @@ -65,11 +66,10 @@ struct perf_record { struct perf_tool tool; struct perf_record_opts opts; u64 bytes_written; - const char *output_name; + struct perf_data_file file; struct perf_evlist *evlist; struct perf_session *session; const char *progname; - int output; int realtime_prio; bool no_buildid; bool no_buildid_cache; @@ -84,8 +84,10 @@ static void advance_output(struct perf_record *rec, size_t size) static int write_output(struct perf_record *rec, void *buf, size_t size) { + struct perf_data_file *file = &rec->file; + while (size) { - int ret = write(rec->output, buf, size); + int ret = write(file->fd, buf, size); if (ret < 0) { pr_err("failed to write perf data, error: %m\n"); @@ -248,13 +250,15 @@ out: static int process_buildids(struct perf_record *rec) { - u64 size = lseek(rec->output, 0, SEEK_CUR); + struct perf_data_file *file = &rec->file; + struct perf_session *session = rec->session; + u64 size = lseek(file->fd, 0, SEEK_CUR); if (size == 0) return 0; - rec->session->fd = rec->output; - return __perf_session__process_events(rec->session, rec->post_processing_offset, + session->fd = file->fd; + return __perf_session__process_events(session, rec->post_processing_offset, size - rec->post_processing_offset, size, &build_id__mark_dso_hit_ops); } @@ -262,17 +266,18 @@ static int process_buildids(struct perf_record *rec) static void perf_record__exit(int status, void *arg) { struct perf_record *rec = arg; + struct perf_data_file *file = &rec->file; if (status != 0) return; - if (!rec->opts.pipe_output) { + if (!file->is_pipe) { rec->session->header.data_size += rec->bytes_written; if (!rec->no_buildid) process_buildids(rec); perf_session__write_header(rec->session, rec->evlist, - rec->output, true); + file->fd, true); perf_session__delete(rec->session); perf_evlist__delete(rec->evlist); symbol__exit(); @@ -342,14 +347,15 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) { struct stat st; int flags; - int err, output, feat; + int err, feat; unsigned long waking = 0; const bool forks = argc > 0; struct machine *machine; struct perf_tool *tool = &rec->tool; struct perf_record_opts *opts = &rec->opts; struct perf_evlist *evsel_list = rec->evlist; - const char *output_name = rec->output_name; + struct perf_data_file *file = &rec->file; + const char *output_name = file->path; struct perf_session *session; bool disabled = false; @@ -363,13 +369,13 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (!output_name) { if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) - opts->pipe_output = true; + file->is_pipe = true; else - rec->output_name = output_name = "perf.data"; + file->path = output_name = "perf.data"; } if (output_name) { if (!strcmp(output_name, "-")) - opts->pipe_output = true; + file->is_pipe = true; else if (!stat(output_name, &st) && st.st_size) { char oldname[PATH_MAX]; snprintf(oldname, sizeof(oldname), "%s.old", @@ -381,19 +387,16 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) flags = O_CREAT|O_RDWR|O_TRUNC; - if (opts->pipe_output) - output = STDOUT_FILENO; + if (file->is_pipe) + file->fd = STDOUT_FILENO; else - output = open(output_name, flags, S_IRUSR | S_IWUSR); - if (output < 0) { + file->fd = open(output_name, flags, S_IRUSR | S_IWUSR); + if (file->fd < 0) { perror("failed to create output file"); return -1; } - rec->output = output; - - session = perf_session__new(output_name, O_WRONLY, - true, false, NULL); + session = perf_session__new(file, false, NULL); if (session == NULL) { pr_err("Not enough memory for reading perf file header\n"); return -1; @@ -415,7 +418,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (forks) { err = perf_evlist__prepare_workload(evsel_list, &opts->target, - argv, opts->pipe_output, + argv, file->is_pipe, true); if (err < 0) { pr_err("Couldn't run the workload!\n"); @@ -436,13 +439,13 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) */ on_exit(perf_record__exit, rec); - if (opts->pipe_output) { - err = perf_header__write_pipe(output); + if (file->is_pipe) { + err = perf_header__write_pipe(file->fd); if (err < 0) goto out_delete_session; } else { err = perf_session__write_header(session, evsel_list, - output, false); + file->fd, false); if (err < 0) goto out_delete_session; } @@ -455,11 +458,11 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) goto out_delete_session; } - rec->post_processing_offset = lseek(output, 0, SEEK_CUR); + rec->post_processing_offset = lseek(file->fd, 0, SEEK_CUR); machine = &session->machines.host; - if (opts->pipe_output) { + if (file->is_pipe) { err = perf_event__synthesize_attrs(tool, session, process_synthesized_event); if (err < 0) { @@ -476,7 +479,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) * return this more properly and also * propagate errors that now are calling die() */ - err = perf_event__synthesize_tracing_data(tool, output, evsel_list, + err = perf_event__synthesize_tracing_data(tool, file->fd, evsel_list, process_synthesized_event); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); @@ -845,7 +848,7 @@ const struct option record_options[] = { OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", "list of cpus to monitor"), OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), - OPT_STRING('o', "output", &record.output_name, "file", + OPT_STRING('o', "output", &record.file.path, "file", "output file name"), OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit, "child tasks do not inherit counters"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 21b5c2f..60d7f8e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,6 +33,7 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" +#include "util/data.h" #include "arch/common.h" #include @@ -857,6 +858,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Don't show entries under that percent", parse_percent_limit), OPT_END() }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; perf_config(perf_report_config, &report); @@ -886,9 +890,11 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) perf_hpp__init(); } + file.path = input_name; + file.force = report.force; + repeat: - session = perf_session__new(input_name, O_RDONLY, - report.force, false, &report.tool); + session = perf_session__new(&file, false, &report.tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d8c51b2..5a46b10 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1446,8 +1446,12 @@ static int perf_sched__read_events(struct perf_sched *sched, { "sched:sched_migrate_task", process_sched_migrate_task_event, }, }; struct perf_session *session; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; - session = perf_session__new(input_name, O_RDONLY, 0, false, &sched->tool); + session = perf_session__new(&file, false, &sched->tool); if (session == NULL) { pr_debug("No Memory for session\n"); return -1; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ebb2b5f..f0c77a1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -15,6 +15,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/sort.h" +#include "util/data.h" #include static char const *script_name; @@ -1115,10 +1116,14 @@ int find_scripts(char **scripts_array, char **scripts_path_array) char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; struct perf_session *session; + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; char *temp; int i = 0; - session = perf_session__new(input_name, O_RDONLY, 0, false, NULL); + session = perf_session__new(&file, false, NULL); if (!session) return -1; @@ -1319,12 +1324,17 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "perf script [] [script-args]", NULL }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; setup_scripting(); argc = parse_options(argc, argv, options, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); + file.path = input_name; + if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); if (!rec_script_path) @@ -1488,8 +1498,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) if (!script_name) setup_pager(); - session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_script); + session = perf_session__new(&file, false, &perf_script); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index c2e0231..e11c61d 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -36,6 +36,7 @@ #include "util/session.h" #include "util/svghelper.h" #include "util/tool.h" +#include "util/data.h" #define SUPPORT_OLD_POWER_EVENTS 1 #define PWR_EVENT_EXIT -1 @@ -990,8 +991,13 @@ static int __cmd_timechart(const char *output_name) { "power:power_frequency", process_sample_power_frequency }, #endif }; - struct perf_session *session = perf_session__new(input_name, O_RDONLY, - 0, false, &perf_timechart); + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; + + struct perf_session *session = perf_session__new(&file, false, + &perf_timechart); int ret = -EINVAL; if (session == NULL) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 65c49b2..752bebe 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -929,11 +929,15 @@ static int __cmd_top(struct perf_top *top) struct perf_record_opts *opts = &top->record_opts; pthread_t thread; int ret; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_WRITE, + }; + /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. */ - top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL); + top->session = perf_session__new(&file, false, NULL); if (top->session == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index db959ac..fa620bc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1834,7 +1834,10 @@ static int trace__replay(struct trace *trace) { "raw_syscalls:sys_exit", trace__sys_exit, }, { "probe:vfs_getname", trace__vfs_getname, }, }; - + struct perf_data_file file = { + .path = input_name, + .mode = PERF_DATA_MODE_READ, + }; struct perf_session *session; int err = -1; @@ -1857,8 +1860,7 @@ static int trace__replay(struct trace *trace) if (symbol__init() < 0) return -1; - session = perf_session__new(input_name, O_RDONLY, 0, false, - &trace->tool); + session = perf_session__new(&file, false, &trace->tool); if (session == NULL) return -ENOMEM; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 84502e8..f61c230 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -220,7 +220,6 @@ struct perf_record_opts { bool no_delay; bool no_inherit; bool no_samples; - bool pipe_output; bool raw_samples; bool sample_address; bool sample_weight; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h new file mode 100644 index 0000000..ffa0186 --- /dev/null +++ b/tools/perf/util/data.h @@ -0,0 +1,29 @@ +#ifndef __PERF_DATA_H +#define __PERF_DATA_H + +#include + +enum perf_data_mode { + PERF_DATA_MODE_WRITE, + PERF_DATA_MODE_READ, +}; + +struct perf_data_file { + const char *path; + int fd; + bool is_pipe; + bool force; + enum perf_data_mode mode; +}; + +static inline bool perf_data_file__is_read(struct perf_data_file *file) +{ + return file->mode == PERF_DATA_MODE_READ; +} + +static inline bool perf_data_file__is_write(struct perf_data_file *file) +{ + return file->mode == PERF_DATA_MODE_WRITE; +} + +#endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d4559ca..e3f63df 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -106,11 +106,11 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self) machines__destroy_kernel_maps(&self->machines); } -struct perf_session *perf_session__new(const char *filename, int mode, - bool force, bool repipe, - struct perf_tool *tool) +struct perf_session *perf_session__new(struct perf_data_file *file, + bool repipe, struct perf_tool *tool) { struct perf_session *self; + const char *filename = file->path; struct stat st; size_t len; @@ -134,11 +134,11 @@ struct perf_session *perf_session__new(const char *filename, int mode, INIT_LIST_HEAD(&self->ordered_samples.to_free); machines__init(&self->machines); - if (mode == O_RDONLY) { - if (perf_session__open(self, force) < 0) + if (perf_data_file__is_read(file)) { + if (perf_session__open(self, file->force) < 0) goto out_delete; perf_session__set_id_hdr_size(self); - } else if (mode == O_WRONLY) { + } else if (perf_data_file__is_write(file)) { /* * In O_RDONLY mode this will be performed when reading the * kernel MMAP event, in perf_event__process_mmap(). diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 04bf737..f2f6251 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -7,6 +7,7 @@ #include "machine.h" #include "symbol.h" #include "thread.h" +#include "data.h" #include #include @@ -49,9 +50,8 @@ struct perf_session { struct perf_tool; -struct perf_session *perf_session__new(const char *filename, int mode, - bool force, bool repipe, - struct perf_tool *tool); +struct perf_session *perf_session__new(struct perf_data_file *file, + bool repipe, struct perf_tool *tool); void perf_session__delete(struct perf_session *session); void perf_event_header__bswap(struct perf_event_header *self); -- cgit v0.10.2 From 6a4d98d787b38a130a67e78b64182b419899623a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 15 Oct 2013 16:27:33 +0200 Subject: perf tools: Add perf_data_file__open interface to data object Adding perf_data_file__open interface to data object to open the perf.data file for both read and write. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381847254-28809-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c873e03..326a26e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -365,6 +365,7 @@ LIB_OBJS += $(OUTPUT)util/vdso.o LIB_OBJS += $(OUTPUT)util/stat.o LIB_OBJS += $(OUTPUT)util/record.o LIB_OBJS += $(OUTPUT)util/srcline.o +LIB_OBJS += $(OUTPUT)util/data.o LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4ea46ff..428e28f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -345,8 +345,6 @@ out: static int __cmd_record(struct perf_record *rec, int argc, const char **argv) { - struct stat st; - int flags; int err, feat; unsigned long waking = 0; const bool forks = argc > 0; @@ -355,7 +353,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) struct perf_record_opts *opts = &rec->opts; struct perf_evlist *evsel_list = rec->evlist; struct perf_data_file *file = &rec->file; - const char *output_name = file->path; struct perf_session *session; bool disabled = false; @@ -367,35 +364,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) signal(SIGUSR1, sig_handler); signal(SIGTERM, sig_handler); - if (!output_name) { - if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) - file->is_pipe = true; - else - file->path = output_name = "perf.data"; - } - if (output_name) { - if (!strcmp(output_name, "-")) - file->is_pipe = true; - else if (!stat(output_name, &st) && st.st_size) { - char oldname[PATH_MAX]; - snprintf(oldname, sizeof(oldname), "%s.old", - output_name); - unlink(oldname); - rename(output_name, oldname); - } - } - - flags = O_CREAT|O_RDWR|O_TRUNC; - - if (file->is_pipe) - file->fd = STDOUT_FILENO; - else - file->fd = open(output_name, flags, S_IRUSR | S_IWUSR); - if (file->fd < 0) { - perror("failed to create output file"); - return -1; - } - session = perf_session__new(file, false, NULL); if (session == NULL) { pr_err("Not enough memory for reading perf file header\n"); @@ -586,7 +554,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", (double)rec->bytes_written / 1024.0 / 1024.0, - output_name, + file->path, rec->bytes_written / 24); return 0; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 752bebe..d934f70 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -929,15 +929,8 @@ static int __cmd_top(struct perf_top *top) struct perf_record_opts *opts = &top->record_opts; pthread_t thread; int ret; - struct perf_data_file file = { - .mode = PERF_DATA_MODE_WRITE, - }; - /* - * FIXME: perf_session__new should allow passing a O_MMAP, so that all this - * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. - */ - top->session = perf_session__new(&file, false, NULL); + top->session = perf_session__new(NULL, false, NULL); if (top->session == NULL) return -ENOMEM; diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c new file mode 100644 index 0000000..7d09faf --- /dev/null +++ b/tools/perf/util/data.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include + +#include "data.h" +#include "util.h" + +static bool check_pipe(struct perf_data_file *file) +{ + struct stat st; + bool is_pipe = false; + int fd = perf_data_file__is_read(file) ? + STDIN_FILENO : STDOUT_FILENO; + + if (!file->path) { + if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) + is_pipe = true; + } else { + if (!strcmp(file->path, "-")) + is_pipe = true; + } + + if (is_pipe) + file->fd = fd; + + return file->is_pipe = is_pipe; +} + +static int check_backup(struct perf_data_file *file) +{ + struct stat st; + + if (!stat(file->path, &st) && st.st_size) { + /* TODO check errors properly */ + char oldname[PATH_MAX]; + snprintf(oldname, sizeof(oldname), "%s.old", + file->path); + unlink(oldname); + rename(file->path, oldname); + } + + return 0; +} + +static int open_file_read(struct perf_data_file *file) +{ + struct stat st; + int fd; + + fd = open(file->path, O_RDONLY); + if (fd < 0) { + int err = errno; + + pr_err("failed to open %s: %s", file->path, strerror(err)); + if (err == ENOENT && !strcmp(file->path, "perf.data")) + pr_err(" (try 'perf record' first)"); + pr_err("\n"); + return -err; + } + + if (fstat(fd, &st) < 0) + goto out_close; + + if (!file->force && st.st_uid && (st.st_uid != geteuid())) { + pr_err("file %s not owned by current user or root\n", + file->path); + goto out_close; + } + + if (!st.st_size) { + pr_info("zero-sized file (%s), nothing to do!\n", + file->path); + goto out_close; + } + + file->size = st.st_size; + return fd; + + out_close: + close(fd); + return -1; +} + +static int open_file_write(struct perf_data_file *file) +{ + if (check_backup(file)) + return -1; + + return open(file->path, O_CREAT|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR); +} + +static int open_file(struct perf_data_file *file) +{ + int fd; + + fd = perf_data_file__is_read(file) ? + open_file_read(file) : open_file_write(file); + + file->fd = fd; + return fd < 0 ? -1 : 0; +} + +int perf_data_file__open(struct perf_data_file *file) +{ + if (check_pipe(file)) + return 0; + + if (!file->path) + file->path = "perf.data"; + + return open_file(file); +} + +void perf_data_file__close(struct perf_data_file *file) +{ + close(file->fd); +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index ffa0186..d6c262e 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -13,6 +13,7 @@ struct perf_data_file { int fd; bool is_pipe; bool force; + unsigned long size; enum perf_data_mode mode; }; @@ -26,4 +27,7 @@ static inline bool perf_data_file__is_write(struct perf_data_file *file) return file->mode == PERF_DATA_MODE_WRITE; } +int perf_data_file__open(struct perf_data_file *file); +void perf_data_file__close(struct perf_data_file *file); + #endif /* __PERF_DATA_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e3f63df..d857c18 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -16,73 +16,35 @@ #include "perf_regs.h" #include "vdso.h" -static int perf_session__open(struct perf_session *self, bool force) +static int perf_session__open(struct perf_session *self) { - struct stat input_stat; - - if (!strcmp(self->filename, "-")) { - self->fd_pipe = true; - self->fd = STDIN_FILENO; - + if (self->fd_pipe) { if (perf_session__read_header(self) < 0) pr_err("incompatible file format (rerun with -v to learn more)"); - return 0; } - self->fd = open(self->filename, O_RDONLY); - if (self->fd < 0) { - int err = errno; - - pr_err("failed to open %s: %s", self->filename, strerror(err)); - if (err == ENOENT && !strcmp(self->filename, "perf.data")) - pr_err(" (try 'perf record' first)"); - pr_err("\n"); - return -errno; - } - - if (fstat(self->fd, &input_stat) < 0) - goto out_close; - - if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - pr_err("file %s not owned by current user or root\n", - self->filename); - goto out_close; - } - - if (!input_stat.st_size) { - pr_info("zero-sized file (%s), nothing to do!\n", - self->filename); - goto out_close; - } - if (perf_session__read_header(self) < 0) { pr_err("incompatible file format (rerun with -v to learn more)"); - goto out_close; + return -1; } if (!perf_evlist__valid_sample_type(self->evlist)) { pr_err("non matching sample_type"); - goto out_close; + return -1; } if (!perf_evlist__valid_sample_id_all(self->evlist)) { pr_err("non matching sample_id_all"); - goto out_close; + return -1; } if (!perf_evlist__valid_read_format(self->evlist)) { pr_err("non matching read_format"); - goto out_close; + return -1; } - self->size = input_stat.st_size; return 0; - -out_close: - close(self->fd); - self->fd = -1; - return -1; } void perf_session__set_id_hdr_size(struct perf_session *session) @@ -110,35 +72,35 @@ struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool) { struct perf_session *self; - const char *filename = file->path; - struct stat st; - size_t len; - - if (!filename || !strlen(filename)) { - if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) - filename = "-"; - else - filename = "perf.data"; - } - len = strlen(filename); - self = zalloc(sizeof(*self) + len); - - if (self == NULL) + self = zalloc(sizeof(*self)); + if (!self) goto out; - memcpy(self->filename, filename, len); self->repipe = repipe; INIT_LIST_HEAD(&self->ordered_samples.samples); INIT_LIST_HEAD(&self->ordered_samples.sample_cache); INIT_LIST_HEAD(&self->ordered_samples.to_free); machines__init(&self->machines); - if (perf_data_file__is_read(file)) { - if (perf_session__open(self, file->force) < 0) + if (file) { + if (perf_data_file__open(file)) goto out_delete; - perf_session__set_id_hdr_size(self); - } else if (perf_data_file__is_write(file)) { + + self->fd = file->fd; + self->fd_pipe = file->is_pipe; + self->filename = file->path; + self->size = file->size; + + if (perf_data_file__is_read(file)) { + if (perf_session__open(self) < 0) + goto out_close; + + perf_session__set_id_hdr_size(self); + } + } + + if (!file || perf_data_file__is_write(file)) { /* * In O_RDONLY mode this will be performed when reading the * kernel MMAP event, in perf_event__process_mmap(). @@ -153,10 +115,13 @@ struct perf_session *perf_session__new(struct perf_data_file *file, tool->ordered_samples = false; } -out: return self; -out_delete: + + out_close: + perf_data_file__close(file); + out_delete: perf_session__delete(self); + out: return NULL; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index f2f6251..e1ca2d0 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -39,7 +39,7 @@ struct perf_session { bool fd_pipe; bool repipe; struct ordered_samples ordered_samples; - char filename[1]; + const char *filename; }; #define PRINT_IP_OPT_IP (1<<0) -- cgit v0.10.2 From cc9784bd9fa9d8e27fdea61142398cb85ce401a8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 15 Oct 2013 16:27:34 +0200 Subject: perf session: Separating data file properties from session Removing 'fd, fd_pipe, filename, size' from struct perf_session and replacing them with struct perf_data_file object. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1381847254-28809-4-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 95df683..03cfa59 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -259,7 +259,7 @@ static int __cmd_annotate(struct perf_annotate *ann) } if (total_nr_samples == 0) { - ui__error("The %s file has no samples!\n", session->filename); + ui__error("The %s file has no samples!\n", file.path); goto out_delete; } diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 0164c1c..ed3873b 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -73,7 +73,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID */ - if (with_hits || session->fd_pipe) + if (with_hits || perf_data_file__is_pipe(&file)) perf_session__process_events(session, &build_id__mark_dso_hit_ops); perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 428e28f..ab8d15e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -257,7 +257,6 @@ static int process_buildids(struct perf_record *rec) if (size == 0) return 0; - session->fd = file->fd; return __perf_session__process_events(session, rec->post_processing_offset, size - rec->post_processing_offset, size, &build_id__mark_dso_hit_ops); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 60d7f8e..fa68a36 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -368,8 +368,9 @@ static int perf_report__setup_sample_type(struct perf_report *rep) { struct perf_session *self = rep->session; u64 sample_type = perf_evlist__combined_sample_type(self->evlist); + bool is_pipe = perf_data_file__is_pipe(self->file); - if (!self->fd_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { + if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { ui__error("Selected --sort parent, but no " "callchain data. Did you call " @@ -392,7 +393,7 @@ static int perf_report__setup_sample_type(struct perf_report *rep) } if (sort__mode == SORT_MODE__BRANCH) { - if (!self->fd_pipe && + if (!is_pipe && !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { ui__error("Selected -b but no branch data. " "Did you call perf record without -b?\n"); @@ -488,6 +489,7 @@ static int __cmd_report(struct perf_report *rep) struct map *kernel_map; struct kmap *kernel_kmap; const char *help = "For a higher level overview, try: perf report --sort comm,dso"; + struct perf_data_file *file = session->file; signal(SIGINT, sig_handler); @@ -572,7 +574,7 @@ static int __cmd_report(struct perf_report *rep) return 0; if (nr_samples == 0) { - ui__error("The %s file has no samples!\n", session->filename); + ui__error("The %s file has no samples!\n", file->path); return 0; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f0c77a1..27de606 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1525,7 +1525,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } - input = open(session->filename, O_RDONLY); /* input_name */ + input = open(file.path, O_RDONLY); /* input_name */ if (input < 0) { perror("failed to open file"); return -1; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index d6c262e..8c2df80 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -27,6 +27,21 @@ static inline bool perf_data_file__is_write(struct perf_data_file *file) return file->mode == PERF_DATA_MODE_WRITE; } +static inline int perf_data_file__is_pipe(struct perf_data_file *file) +{ + return file->is_pipe; +} + +static inline int perf_data_file__fd(struct perf_data_file *file) +{ + return file->fd; +} + +static inline unsigned long perf_data_file__size(struct perf_data_file *file) +{ + return file->size; +} + int perf_data_file__open(struct perf_data_file *file); void perf_data_file__close(struct perf_data_file *file); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index c3e5a3b..26d9520 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -22,6 +22,7 @@ #include "vdso.h" #include "strbuf.h" #include "build-id.h" +#include "data.h" static bool no_buildid_cache = false; @@ -2189,7 +2190,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) { struct header_print_data hd; struct perf_header *header = &session->header; - int fd = session->fd; + int fd = perf_data_file__fd(session->file); hd.fp = fp; hd.full = full; @@ -2650,7 +2651,8 @@ static int perf_header__read_pipe(struct perf_session *session) struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; - if (perf_file_header__read_pipe(&f_header, header, session->fd, + if (perf_file_header__read_pipe(&f_header, header, + perf_data_file__fd(session->file), session->repipe) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; @@ -2751,18 +2753,19 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist, int perf_session__read_header(struct perf_session *session) { + struct perf_data_file *file = session->file; struct perf_header *header = &session->header; struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; int nr_attrs, nr_ids, i, j; - int fd = session->fd; + int fd = perf_data_file__fd(file); session->evlist = perf_evlist__new(); if (session->evlist == NULL) return -ENOMEM; - if (session->fd_pipe) + if (perf_data_file__is_pipe(file)) return perf_header__read_pipe(session); if (perf_file_header__read(&f_header, header, fd) < 0) @@ -2777,7 +2780,7 @@ int perf_session__read_header(struct perf_session *session) if (f_header.data.size == 0) { pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n" "Was the 'perf record' command properly terminated?\n", - session->filename); + file->path); } nr_attrs = f_header.attrs.size / f_header.attr_size; @@ -2990,18 +2993,19 @@ int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused, struct perf_session *session) { ssize_t size_read, padding, size = event->tracing_data.size; - off_t offset = lseek(session->fd, 0, SEEK_CUR); + int fd = perf_data_file__fd(session->file); + off_t offset = lseek(fd, 0, SEEK_CUR); char buf[BUFSIZ]; /* setup for reading amidst mmap */ - lseek(session->fd, offset + sizeof(struct tracing_data_event), + lseek(fd, offset + sizeof(struct tracing_data_event), SEEK_SET); - size_read = trace_report(session->fd, &session->pevent, + size_read = trace_report(fd, &session->pevent, session->repipe); padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; - if (readn(session->fd, buf, padding) < 0) { + if (readn(fd, buf, padding) < 0) { pr_err("%s: reading input file", __func__); return -1; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d857c18..19fc716 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -18,17 +18,16 @@ static int perf_session__open(struct perf_session *self) { - if (self->fd_pipe) { - if (perf_session__read_header(self) < 0) - pr_err("incompatible file format (rerun with -v to learn more)"); - return 0; - } + struct perf_data_file *file = self->file; if (perf_session__read_header(self) < 0) { pr_err("incompatible file format (rerun with -v to learn more)"); return -1; } + if (perf_data_file__is_pipe(file)) + return 0; + if (!perf_evlist__valid_sample_type(self->evlist)) { pr_err("non matching sample_type"); return -1; @@ -87,10 +86,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file, if (perf_data_file__open(file)) goto out_delete; - self->fd = file->fd; - self->fd_pipe = file->is_pipe; - self->filename = file->path; - self->size = file->size; + self->file = file; if (perf_data_file__is_read(file)) { if (perf_session__open(self) < 0) @@ -158,7 +154,8 @@ void perf_session__delete(struct perf_session *self) perf_session__delete_threads(self); perf_session_env__delete(&self->header.env); machines__exit(&self->machines); - close(self->fd); + if (self->file) + perf_data_file__close(self->file); free(self); vdso__exit(); } @@ -1015,6 +1012,7 @@ static int perf_session_deliver_event(struct perf_session *session, static int perf_session__process_user_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool, u64 file_offset) { + int fd = perf_data_file__fd(session->file); int err; dump_event(session, event, file_offset, NULL); @@ -1028,7 +1026,7 @@ static int perf_session__process_user_event(struct perf_session *session, union return err; case PERF_RECORD_HEADER_TRACING_DATA: /* setup for reading amidst mmap */ - lseek(session->fd, file_offset, SEEK_SET); + lseek(fd, file_offset, SEEK_SET); return tool->tracing_data(tool, event, session); case PERF_RECORD_HEADER_BUILD_ID: return tool->build_id(tool, event, session); @@ -1154,6 +1152,7 @@ volatile int session_done; static int __perf_session__process_pipe_events(struct perf_session *self, struct perf_tool *tool) { + int fd = perf_data_file__fd(self->file); union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -1172,7 +1171,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self, return -errno; more: event = buf; - err = readn(self->fd, event, sizeof(struct perf_event_header)); + err = readn(fd, event, sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) goto done; @@ -1204,7 +1203,7 @@ more: p += sizeof(struct perf_event_header); if (size - sizeof(struct perf_event_header)) { - err = readn(self->fd, p, size - sizeof(struct perf_event_header)); + err = readn(fd, p, size - sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) { pr_err("unexpected end of event stream\n"); @@ -1285,6 +1284,7 @@ int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, u64 file_size, struct perf_tool *tool) { + int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, progress_next; int err, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; @@ -1317,7 +1317,7 @@ int __perf_session__process_events(struct perf_session *session, mmap_flags = MAP_PRIVATE; } remap: - buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd, + buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd, file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); @@ -1382,16 +1382,17 @@ out_err: int perf_session__process_events(struct perf_session *self, struct perf_tool *tool) { + u64 size = perf_data_file__size(self->file); int err; if (perf_session__register_idle_thread(self) == NULL) return -ENOMEM; - if (!self->fd_pipe) + if (!perf_data_file__is_pipe(self->file)) err = __perf_session__process_events(self, self->header.data_offset, self->header.data_size, - self->size, tool); + size, tool); else err = __perf_session__process_pipe_events(self, tool); @@ -1615,13 +1616,14 @@ int perf_session__cpu_bitmap(struct perf_session *session, void perf_session__fprintf_info(struct perf_session *session, FILE *fp, bool full) { + int fd = perf_data_file__fd(session->file); struct stat st; int ret; if (session == NULL || fp == NULL) return; - ret = fstat(session->fd, &st); + ret = fstat(fd, &st); if (ret == -1) return; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index e1ca2d0..27c74d3 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -30,16 +30,13 @@ struct ordered_samples { struct perf_session { struct perf_header header; - unsigned long size; struct machines machines; struct perf_evlist *evlist; struct pevent *pevent; struct events_stats stats; - int fd; - bool fd_pipe; bool repipe; struct ordered_samples ordered_samples; - const char *filename; + struct perf_data_file *file; }; #define PRINT_IP_OPT_IP (1<<0) -- cgit v0.10.2 From 91e95617429cb272fd908b1928a1915b37b9655f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 18 Oct 2013 10:38:48 -0400 Subject: perf report: Add --max-stack option to limit callchain stack scan When callgraph data was included in the perf data file, it may take a long time to scan all those data and merge them together especially if the stored callchains are long and the perf data file itself is large, like a Gbyte or so. The callchain stack is currently limited to PERF_MAX_STACK_DEPTH (127). This is a large value. Usually the callgraph data that developers are most interested in are the first few levels, the rests are usually not looked at. This patch adds a new --max-stack option to perf-report to limit the depth of callchain stack data to look at to reduce the time it takes for perf-report to finish its processing. It trades the presence of trailing stack information with faster speed. The following table shows the elapsed time of doing perf-report on a perf.data file of size 985,531,828 bytes. --max_stack Elapsed Time Output data size ----------- ------------ ---------------- not set 88.0s 124,422,651 64 87.5s 116,303,213 32 87.2s 112,023,804 16 86.6s 94,326,380 8 59.9s 33,697,248 4 40.7s 10,116,637 -g none 27.1s 2,555,810 Signed-off-by: Waiman Long Acked-by: David Ahern Cc: Adrian Hunter Cc: Aswin Chandramouleeswaran Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382107129-2010-4-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index be5ad87..10a2798 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -141,6 +141,14 @@ OPTIONS Default: fractal,0.5,callee,function. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + + Default: 127 + -G:: --inverted:: alias for inverted caller based call graph. diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa68a36..81addca 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -49,6 +49,7 @@ struct perf_report { bool show_threads; bool inverted_callchain; bool mem_mode; + int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; const char *cpu_list; @@ -90,7 +91,8 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -181,7 +183,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -244,18 +247,21 @@ out: return err; } -static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, +static int perf_evsel__add_hist_entry(struct perf_tool *tool, + struct perf_evsel *evsel, struct addr_location *al, struct perf_sample *sample, struct machine *machine) { + struct perf_report *rep = container_of(tool, struct perf_report, tool); struct symbol *parent = NULL; int err = 0; struct hist_entry *he; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = machine__resolve_callchain(machine, evsel, al->thread, - sample, &parent, al); + sample, &parent, al, + rep->max_stack); if (err) return err; } @@ -332,7 +338,8 @@ static int process_sample_event(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine); + ret = perf_evsel__add_hist_entry(tool, evsel, &al, sample, + machine); if (ret < 0) pr_debug("problem incrementing symbol period, skipping event\n"); } @@ -772,6 +779,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_samples = true, .ordering_requires_timestamps = true, }, + .max_stack = PERF_MAX_STACK_DEPTH, .pretty_printing_style = "normal", }; const struct option options[] = { @@ -812,6 +820,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt), + OPT_INTEGER(0, "max-stack", &report.max_stack, + "Set the maximum stack depth when parsing the callchain, " + "anything beyond the specified depth will be ignored. " + "Default: " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d934f70..112cb7d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -770,7 +770,8 @@ static void perf_event__process_sample(struct perf_tool *tool, sample->callchain) { err = machine__resolve_callchain(machine, evsel, al.thread, sample, - &parent, &al); + &parent, &al, + PERF_MAX_STACK_DEPTH); if (err) return; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6b861ae..ea93425 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1253,10 +1253,12 @@ static int machine__resolve_callchain_sample(struct machine *machine, struct thread *thread, struct ip_callchain *chain, struct symbol **parent, - struct addr_location *root_al) + struct addr_location *root_al, + int max_stack) { u8 cpumode = PERF_RECORD_MISC_USER; - unsigned int i; + int chain_nr = min(max_stack, (int)chain->nr); + int i; int err; callchain_cursor_reset(&callchain_cursor); @@ -1266,7 +1268,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, return 0; } - for (i = 0; i < chain->nr; i++) { + for (i = 0; i < chain_nr; i++) { u64 ip; struct addr_location al; @@ -1338,12 +1340,14 @@ int machine__resolve_callchain(struct machine *machine, struct thread *thread, struct perf_sample *sample, struct symbol **parent, - struct addr_location *root_al) + struct addr_location *root_al, + int max_stack) { int ret; ret = machine__resolve_callchain_sample(machine, thread, - sample->callchain, parent, root_al); + sample->callchain, parent, + root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d44c09b..4c1f5d5 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -92,7 +92,8 @@ int machine__resolve_callchain(struct machine *machine, struct thread *thread, struct perf_sample *sample, struct symbol **parent, - struct addr_location *root_al); + struct addr_location *root_al, + int max_stack); /* * Default guest kernel is defined by parameter --guestkallsyms diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 19fc716..854c5aa 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1512,7 +1512,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, if (symbol_conf.use_callchain && sample->callchain) { if (machine__resolve_callchain(machine, evsel, al.thread, - sample, NULL, NULL) != 0) { + sample, NULL, NULL, + PERF_MAX_STACK_DEPTH) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; -- cgit v0.10.2 From 5dbb6e81d85e55ee2b4cf523c1738e16f63e5400 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 18 Oct 2013 10:38:49 -0400 Subject: perf top: Add --max-stack option to limit callchain stack scan When the callgraph function is enabled (-G), it may take a long time to scan all the stack data and merge them accordingly. This patch adds a new --max-stack option to perf-top to limit the depth of callchain stack data to look at to reduce the time it takes for perf-top to finish its processing. It reduces the amount of information provided to the user in exchange for faster speed. Signed-off-by: Waiman Long Acked-by: David Ahern Tested-by: Davidlohr Bueso Cc: Adrian Hunter Cc: Aswin Chandramouleeswaran Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Scott J Norton Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1382107129-2010-5-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index f65777c..c16a09e 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -158,6 +158,14 @@ Default is to monitor all CPUS. Default: fractal,0.5,callee. +--max-stack:: + Set the stack depth limit when parsing the callchain, anything + beyond the specified depth will be ignored. This is a trade-off + between information loss and faster processing especially for + workloads that can have a very long callchain stack. + + Default: 127 + --ignore-callees=:: Ignore callees of the function(s) matching the given regex. This has the effect of collecting the callers of each such diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 112cb7d..386d833 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -771,7 +771,7 @@ static void perf_event__process_sample(struct perf_tool *tool, err = machine__resolve_callchain(machine, evsel, al.thread, sample, &parent, &al, - PERF_MAX_STACK_DEPTH); + top->max_stack); if (err) return; } @@ -1048,10 +1048,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, .freq = 4000, /* 4 KHz */ - .target = { + .target = { .uses_mmap = true, }, }, + .max_stack = PERF_MAX_STACK_DEPTH, .sym_pcnt_filter = 5, }; struct perf_record_opts *opts = &top.record_opts; @@ -1110,6 +1111,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, "mode[,dump_size]", record_callchain_help, &parse_callchain_opt, "fp"), + OPT_INTEGER(0, "max-stack", &top.max_stack, + "Set the maximum stack depth when parsing the callchain. " + "Default: " __stringify(PERF_MAX_STACK_DEPTH)), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", "ignore callees of these functions in call graphs", report_parse_ignore_callees_opt), diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index b554ffc..88cfeaf 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -24,6 +24,7 @@ struct perf_top { u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; + int max_stack; bool hide_kernel_symbols, hide_user_symbols, zero; bool use_tui, use_stdio; bool kptr_restrict_warned; -- cgit v0.10.2