From 66af43d56345a7ca549ba1089fe11a6953072417 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 30 Jan 2015 11:33:27 +0900 Subject: perf test: Fix dso cache testcase The current dso cache permits to keep dso->data.fd is open under a half of open file limit. But test__dso_data_cache() sets dso_cnt to limit / 2 + 1 so it'll reach the limit in the loop even though the loop count is one less than the dso_cnt and it makes the final dso__data_fd() after the loop meaningless. I guess the intention was dsos[0]->data.fd is open before the last open and gets closed after it. So add an assert before the last open. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1422585209-32742-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index caaf37f..22a8c42 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -243,8 +243,8 @@ int test__dso_data_cache(void) limit = nr * 4; TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit)); - /* and this is now our dso open FDs limit + 1 extra */ - dso_cnt = limit / 2 + 1; + /* and this is now our dso open FDs limit */ + dso_cnt = limit / 2; TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(dso_cnt, TEST_FILE_SIZE)); @@ -268,7 +268,10 @@ int test__dso_data_cache(void) } } - /* open +1 dso over the allowed limit */ + /* verify the first one is already open */ + TEST_ASSERT_VAL("dsos[0] is not open", dsos[0]->data.fd != -1); + + /* open +1 dso to reach the allowed limit */ fd = dso__data_fd(dsos[i], &machine); TEST_ASSERT_VAL("failed to get fd", fd > 0); -- cgit v0.10.2 From 63d3c6f3835d011c783c606c8a1583b041f579aa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 30 Jan 2015 11:33:28 +0900 Subject: perf tests: Do not rely on dso__data_read_offset() to open dso Do not rely on dso__data_read_offset() will always call dso__data_fd() internally. With multi-thread support, accessing a fd will be protected by a lock and it'll cause a huge contention. It can be avoided since we can skip reading from file if there's a data in the dso cache. If one needs to call the dso__data_read_offset(), [s]he also needs to call dso__data_fd() (or set dso->binary_type at least) first like the dwarf unwind code does. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1422585209-32742-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 22a8c42..513e5fe 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -112,6 +112,9 @@ int test__dso_data(void) dso = dso__new((const char *)file); + TEST_ASSERT_VAL("Failed to access to dso", + dso__data_fd(dso, &machine) >= 0); + /* Basic 10 bytes tests. */ for (i = 0; i < ARRAY_SIZE(offsets); i++) { struct test_data_offset *data = &offsets[i]; @@ -252,13 +255,13 @@ int test__dso_data_cache(void) struct dso *dso = dsos[i]; /* - * Open dsos via dso__data_fd or dso__data_read_offset. - * Both opens the data file and keep it open. + * Open dsos via dso__data_fd(), it opens the data + * file and keep it open (unless open file limit). */ + fd = dso__data_fd(dso, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + if (i % 2) { - fd = dso__data_fd(dso, &machine); - TEST_ASSERT_VAL("failed to get fd", fd > 0); - } else { #define BUFSIZE 10 u8 buf[BUFSIZE]; ssize_t n; -- cgit v0.10.2 From a3c0cc2ac03bd9db032f590d59cdbf0b447503b8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 30 Jan 2015 11:33:29 +0900 Subject: perf tools: Fix a dso open fail message It's not related to mmap, remove it from the message. Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1422585209-32742-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index c2f7d3b..a8b3f18 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -240,7 +240,7 @@ static int do_open(char *name) if (fd >= 0) return fd; - pr_debug("dso open failed, mmap: %s\n", + pr_debug("dso open failed: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); if (!dso__data_open_cnt || errno != EMFILE) break; -- cgit v0.10.2 From e1ecbbc3fa834cc6b4b344edb1968e734d57189b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 30 Jan 2015 18:37:44 +0900 Subject: perf probe: Fix to handle optimized not-inlined functions Fix to handle optimized no-inline functions which have only function definition but no actual instance at that point. To fix this problem, we need to find actual instance of the function. Without this patch: ---- # perf probe -a __up Failed to get entry address of __up. Error: Failed to add events. # perf probe -L __up Specified source line is not found. Error: Failed to show lines. ---- With this patch: ---- # perf probe -a __up Added new event: probe:__up (on __up) You can now use it in all perf tools, such as: perf record -e probe:__up -aR sleep 1 # perf probe -L __up <__up@/home/fedora/ksrc/linux-3/kernel/locking/semaphore.c:0> 0 static noinline void __sched __up(struct semaphore *sem) { struct semaphore_waiter *waiter = list_first_entry(&sem->wait_ struct semaphore_waite 4 list_del(&waiter->list); 5 waiter->up = true; 6 wake_up_process(waiter->task); 7 } ---- Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150130093744.30575.43290.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index cc66c40..780b2bc 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -278,6 +278,21 @@ bool die_is_func_def(Dwarf_Die *dw_die) } /** + * die_is_func_instance - Ensure that this DIE is an instance of a subprogram + * @dw_die: a DIE + * + * Ensure that this DIE is an instance (which has an entry address). + * This returns true if @dw_die is a function instance. If not, you need to + * call die_walk_instances() to find actual instances. + **/ +bool die_is_func_instance(Dwarf_Die *dw_die) +{ + Dwarf_Addr tmp; + + /* Actually gcc optimizes non-inline as like as inlined */ + return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0; +} +/** * die_get_data_member_location - Get the data-member offset * @mb_die: a DIE of a member of a data structure * @offs: The offset of the member in the data structure diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index b4fe90c..af7dbcd 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -41,6 +41,9 @@ extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, /* Ensure that this DIE is a subprogram and definition (not declaration) */ extern bool die_is_func_def(Dwarf_Die *dw_die); +/* Ensure that this DIE is an instance of a subprogram */ +extern bool die_is_func_instance(Dwarf_Die *dw_die); + /* Compare diename and tname */ extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b5247d7..d141935 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -915,17 +915,13 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; param->retval = find_probe_point_by_line(pf); - } else if (!dwarf_func_inline(sp_die)) { + } else if (die_is_func_instance(sp_die)) { + /* Instances always have the entry address */ + dwarf_entrypc(sp_die, &pf->addr); /* Real function */ if (pp->lazy_line) param->retval = find_probe_point_lazy(sp_die, pf); else { - if (dwarf_entrypc(sp_die, &pf->addr) != 0) { - pr_warning("Failed to get entry address of " - "%s.\n", dwarf_diename(sp_die)); - param->retval = -ENOENT; - return DWARF_CB_ABORT; - } pf->addr += pp->offset; /* TODO: Check the address in this function */ param->retval = call_probe_finder(sp_die, pf); @@ -1536,7 +1532,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); lr->start = lf->lno_s; lr->end = lf->lno_e; - if (dwarf_func_inline(sp_die)) + if (!die_is_func_instance(sp_die)) param->retval = die_walk_instances(sp_die, line_range_inline_cb, lf); else -- cgit v0.10.2 From 8b72805fd1dbfd697c5d4492d0cf1ebbd994950d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 30 Jan 2015 18:37:46 +0900 Subject: perf probe: Update man page Update Documentation/perf-probe.txt to add descriptions of some newer options. Signed-off-by: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150130093746.30575.8571.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index aaa869b..239609c 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -47,6 +47,12 @@ OPTIONS -v:: --verbose:: Be more verbose (show parsed arguments, etc). + Can not use with -q. + +-q:: +--quiet:: + Be quiet (do not show any messages including errors). + Can not use with -v. -a:: --add=:: @@ -96,7 +102,7 @@ OPTIONS Dry run. With this option, --add and --del doesn't execute actual adding and removal operations. ---max-probes:: +--max-probes=NUM:: Set the maximum number of probe points for an event. Default is 128. -x:: @@ -104,8 +110,13 @@ OPTIONS Specify path to the executable or shared library file for user space tracing. Can also be used with --funcs option. +--demangle:: + Demangle application symbols. --no-demangle is also available + for disabling demangling. + --demangle-kernel:: - Demangle kernel symbols. + Demangle kernel symbols. --no-demangle-kernel is also available + for disabling kernel demangling. In absence of -m/-x options, perf probe checks if the first argument after the options is an absolute path name. If its an absolute path, perf probe @@ -137,6 +148,7 @@ Each probe argument follows below syntax. [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) +'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point. 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. -- cgit v0.10.2 From 4886f2ca19f6ff22ebfbe8e78c79c699e572b89f Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Mon, 26 Jan 2015 22:34:01 -0800 Subject: perf symbols: Ignore mapping symbols on aarch64 Aarch64 ELF files use mapping symbols with special names $x, $d to identify regions of Aarch64 code (see Aarch64 ELF ABI - "ARM IHI 0056B", section "4.5.4 Mapping symbols"). The patch filters out these symbols at load time, similar to "696b97a perf symbols: Ignore mapping symbols on ARM" changes done for ARM before V8. Also added handling of mapping symbols that has format "$d." and similar for both cases. Note we are not making difference between EM_ARM and EM_AARCH64 mapping symbols instead code handles superset of both. Signed-off-by: Victor Kamensky Acked-by: Namhyung Kim Acked-by: Will Deacon Cc: Adrian Hunter Cc: Anton Blanchard Cc: Avi Kivity Cc: Dave Martin Cc: David Ahern Cc: Jiri Olsa Cc: linux-arm-kernel@lists.infradead.org Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Russell King Link: http://lkml.kernel.org/r/1422340442-4673-2-git-send-email-victor.kamensky@linaro.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index b24f9d8..225eb73 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -859,10 +859,9 @@ int dso__load_sym(struct dso *dso, struct map *map, /* Reject ARM ELF "mapping symbols": these aren't unique and * don't identify functions, so will confuse the profile * output: */ - if (ehdr.e_machine == EM_ARM) { - if (!strcmp(elf_name, "$a") || - !strcmp(elf_name, "$d") || - !strcmp(elf_name, "$t")) + if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) { + if (elf_name[0] == '$' && strchr("adtx", elf_name[1]) + && (elf_name[2] == '\0' || elf_name[2] == '.')) continue; } -- cgit v0.10.2 From dc6254cf870732804b76a83ff2d8a72fea4365f6 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Mon, 26 Jan 2015 22:34:02 -0800 Subject: perf symbols: debuglink should take symfs option into account Currently code that tries to read corresponding debug symbol file from .gnu_debuglink section (DSO_BINARY_TYPE__DEBUGLINK) does not take in account symfs option, so filename__read_debuglink function cannot open ELF file, if symfs option is used. Fix is to add proper handling of symfs as it is done in other places: use __symbol__join_symfs function to get real file name of target ELF file. Signed-off-by: Victor Kamensky Tested-by: David Ahern Acked-by: David Ahern Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Anton Blanchard Cc: Avi Kivity Cc: Dave Martin Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Russell King Cc: Waiman Long Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1422340442-4673-3-git-send-email-victor.kamensky@linaro.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index a8b3f18..814554d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -45,13 +45,13 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__DEBUGLINK: { char *debuglink; - strncpy(filename, dso->long_name, size); - debuglink = filename + dso->long_name_len; + len = __symbol__join_symfs(filename, size, dso->long_name); + debuglink = filename + len; while (debuglink != filename && *debuglink != '/') debuglink--; if (*debuglink == '/') debuglink++; - ret = filename__read_debuglink(dso->long_name, debuglink, + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } break; -- cgit v0.10.2 From 402bb4e6ec6507ccbb2e556d7996bbc989db7f27 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Feb 2015 12:44:09 -0300 Subject: tools lib traceevent: Introduce trace_seq_do_fprintf function So that we can specify a FILE object where to direct the formatted output. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-a49bhdrx8851f04hppn8bqxq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 7a3873f..5b4efc0 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -91,6 +92,7 @@ extern int trace_seq_putc(struct trace_seq *s, unsigned char c); extern void trace_seq_terminate(struct trace_seq *s); +extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); extern int trace_seq_do_printf(struct trace_seq *s); diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index ec3bd16..292dc9f 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -231,19 +231,24 @@ void trace_seq_terminate(struct trace_seq *s) s->buffer[s->len] = 0; } -int trace_seq_do_printf(struct trace_seq *s) +int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp) { TRACE_SEQ_CHECK(s); switch (s->state) { case TRACE_SEQ__GOOD: - return printf("%.*s", s->len, s->buffer); + return fprintf(fp, "%.*s", s->len, s->buffer); case TRACE_SEQ__BUFFER_POISONED: - puts("Usage of trace_seq after it was destroyed"); + fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed"); break; case TRACE_SEQ__MEM_ALLOC_FAILED: - puts("Can't allocate trace_seq buffer memory"); + fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory"); break; } return -1; } + +int trace_seq_do_printf(struct trace_seq *s) +{ + return trace_seq_do_fprintf(s, stdout); +} -- cgit v0.10.2 From aa1aac17a15cbf64236bd6f3b855262dcfb845c9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Feb 2015 12:46:58 -0300 Subject: perf tools: Introduce event_format__fprintf method The existing one, event_format__print() uses stdout unconditionally, and 'perf trace' needs to use it to format into a file that may have been set by the user, i.e. 'trace -o file.output'. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-7l0mgm91hwg0bby00s5pse8r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c36636f..25d6c73 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -112,8 +112,8 @@ unsigned long long read_size(struct event_format *event, void *ptr, int size) return pevent_read_number(event->pevent, ptr, size); } -void event_format__print(struct event_format *event, - int cpu, void *data, int size) +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp) { struct pevent_record record; struct trace_seq s; @@ -125,10 +125,16 @@ void event_format__print(struct event_format *event, trace_seq_init(&s); pevent_event_info(&s, event, &record); - trace_seq_do_printf(&s); + trace_seq_do_fprintf(&s, fp); trace_seq_destroy(&s); } +void event_format__print(struct event_format *event, + int cpu, void *data, int size) +{ + return event_format__fprintf(event, cpu, data, size, stdout); +} + void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size __maybe_unused) { diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 52aaa19..356629a 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -23,6 +23,9 @@ trace_event__tp_format(const char *sys, const char *name); int bigendian(void); +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp); + void event_format__print(struct event_format *event, int cpu, void *data, int size); -- cgit v0.10.2 From f7aa222ff397bd99153a039578864c1a36b8f391 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Feb 2015 13:25:39 -0300 Subject: perf trace: No need to enable evsels for workload started from perf As they will have perf_event_attr.enable_on_exec set, starting as soon as we exec() the workload. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-vmj3f6o3vxrg7mrdipts09li@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7e935f10..66300ae 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2109,10 +2109,10 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_mmap; - perf_evlist__enable(evlist); - if (forks) perf_evlist__start_workload(evlist); + else + perf_evlist__enable(evlist); trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; again: -- cgit v0.10.2 From 20f86fc1fde14c6d913ebf5f569ee85e058a7dbd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Feb 2015 13:29:05 -0300 Subject: perf evlist: Fix typo in comment Link: http://lkml.kernel.org/n/tip-qzg2qrdgta6dmcrxqdeexthu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 28b8ce8..c602ebb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1329,7 +1329,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar * writing exactly one byte, in workload.cork_fd, usually via * perf_evlist__start_workload(). * - * For cancelling the workload without actuallin running it, + * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() * here. -- cgit v0.10.2 From 5693c92660970851e95f769ff27397f5098a6296 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 2 Feb 2015 14:35:02 -0500 Subject: perf tools: Do not check debugfs MAGIC for tracing files It's rather strange to be checking the debugfs MAGIC number for the tracing directory. A system admin may want to have a custom set of events to trace and it should be allowed to let the admin make a temp file (even for tracing virtual boxes, this is useful). Also with the coming tracefs, the files may not even be under debugfs, so checking the debugfs MAGIC number is pointless. Signed-off-by: Steven Rostedt Acked-by: Jiri Olsa Cc: Andrew Morton Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20150202193552.546175764@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index d2b18e8..d21d4d6 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -20,6 +20,20 @@ static const char * const debugfs_known_mountpoints[] = { static bool debugfs_found; +/* verify that a mountpoint is actually a debugfs instance */ + +static int debugfs_valid_mountpoint(const char *debugfs) +{ + struct statfs st_fs; + + if (statfs(debugfs, &st_fs) < 0) + return -ENOENT; + else if ((long)st_fs.f_type != (long)DEBUGFS_MAGIC) + return -ENOENT; + + return 0; +} + /* find the path to the mounted debugfs */ const char *debugfs_find_mountpoint(void) { @@ -60,20 +74,6 @@ const char *debugfs_find_mountpoint(void) return debugfs_mountpoint; } -/* verify that a mountpoint is actually a debugfs instance */ - -int debugfs_valid_mountpoint(const char *debugfs) -{ - struct statfs st_fs; - - if (statfs(debugfs, &st_fs) < 0) - return -ENOENT; - else if ((long)st_fs.f_type != (long)DEBUGFS_MAGIC) - return -ENOENT; - - return 0; -} - /* mount the debugfs somewhere if it's not mounted */ char *debugfs_mount(const char *mountpoint) { diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index 0739881..77bb36a 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -21,7 +21,6 @@ #endif const char *debugfs_find_mountpoint(void); -int debugfs_valid_mountpoint(const char *debugfs); char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7f8ec6c..ecf069b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -175,9 +175,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return NULL; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return NULL; @@ -473,12 +470,6 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event) { - int ret; - - ret = debugfs_valid_mountpoint(tracing_events_path); - if (ret) - return ret; - if (strpbrk(sys, "*?")) return add_tracepoint_multi_sys(list, idx, sys, event); else @@ -1109,13 +1100,6 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - char sbuf[STRERR_BUFSIZE]; - - if (debugfs_valid_mountpoint(tracing_events_path)) { - printf(" [ Tracepoints not available: %s ]\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - return; - } sys_dir = opendir(tracing_events_path); if (!sys_dir) @@ -1163,9 +1147,6 @@ int is_valid_tracepoint(const char *event_string) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return 0; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return 0; -- cgit v0.10.2 From cde164aee9e0343831467035eb96dd5506742648 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 2 Feb 2015 14:35:03 -0500 Subject: tools lib fs: Add helper to find mounted file systems In preparation for adding tracefs for perf to use, create a findfs helper utility that find_debugfs uses instead of hard coding the search in the code. This will allow for a find_tracefs to be used as well. Signed-off-by: Steven Rostedt Acked-by: Jiri Olsa Cc: Andrew Morton Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20150202193552.735023362@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 36c08b1..22b2f15 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -9,11 +9,13 @@ LIB_H= LIB_OBJS= LIB_H += fs/debugfs.h +LIB_H += fs/findfs.h LIB_H += fs/fs.h # See comment below about piggybacking... LIB_H += fd/array.h LIB_OBJS += $(OUTPUT)fs/debugfs.o +LIB_OBJS += $(OUTPUT)fs/findfs.o LIB_OBJS += $(OUTPUT)fs/fs.o # XXX piggybacking here, need to introduce libapikfd, or rename this # to plain libapik.a and make it have it all api goodies diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index d21d4d6..91e1668 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -20,58 +20,21 @@ static const char * const debugfs_known_mountpoints[] = { static bool debugfs_found; -/* verify that a mountpoint is actually a debugfs instance */ - -static int debugfs_valid_mountpoint(const char *debugfs) -{ - struct statfs st_fs; - - if (statfs(debugfs, &st_fs) < 0) - return -ENOENT; - else if ((long)st_fs.f_type != (long)DEBUGFS_MAGIC) - return -ENOENT; - - return 0; -} - /* find the path to the mounted debugfs */ const char *debugfs_find_mountpoint(void) { - const char * const *ptr; - char type[100]; - FILE *fp; + const char *ret; if (debugfs_found) return (const char *)debugfs_mountpoint; - ptr = debugfs_known_mountpoints; - while (*ptr) { - if (debugfs_valid_mountpoint(*ptr) == 0) { - debugfs_found = true; - strcpy(debugfs_mountpoint, *ptr); - return debugfs_mountpoint; - } - ptr++; - } + ret = find_mountpoint("debugfs", (long) DEBUGFS_MAGIC, + debugfs_mountpoint, PATH_MAX + 1, + debugfs_known_mountpoints); + if (ret) + debugfs_found = true; - /* give up and parse /proc/mounts */ - fp = fopen("/proc/mounts", "r"); - if (fp == NULL) - return NULL; - - while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", - debugfs_mountpoint, type) == 2) { - if (strcmp(type, "debugfs") == 0) - break; - } - fclose(fp); - - if (strcmp(type, "debugfs") != 0) - return NULL; - - debugfs_found = true; - - return debugfs_mountpoint; + return ret; } /* mount the debugfs somewhere if it's not mounted */ diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index 77bb36a..1074ac8 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -1,16 +1,7 @@ #ifndef __API_DEBUGFS_H__ #define __API_DEBUGFS_H__ -#define _STR(x) #x -#define STR(x) _STR(x) - -/* - * On most systems would have given us this, but not on some systems - * (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif +#include "findfs.h" #ifndef DEBUGFS_MAGIC #define DEBUGFS_MAGIC 0x64626720 diff --git a/tools/lib/api/fs/findfs.c b/tools/lib/api/fs/findfs.c new file mode 100644 index 0000000..49946cb --- /dev/null +++ b/tools/lib/api/fs/findfs.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include + +#include "findfs.h" + +/* verify that a mountpoint is actually the type we want */ + +int valid_mountpoint(const char *mount, long magic) +{ + struct statfs st_fs; + + if (statfs(mount, &st_fs) < 0) + return -ENOENT; + else if ((long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +/* find the path to a mounted file system */ +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints) +{ + const char * const *ptr; + char format[128]; + char type[100]; + FILE *fp; + + if (known_mountpoints) { + ptr = known_mountpoints; + while (*ptr) { + if (valid_mountpoint(*ptr, magic) == 0) { + strncpy(mountpoint, *ptr, len - 1); + mountpoint[len-1] = 0; + return mountpoint; + } + ptr++; + } + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + snprintf(format, 128, "%%*s %%%ds %%99s %%*s %%*d %%*d\n", len); + + while (fscanf(fp, format, mountpoint, type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + fclose(fp); + + if (strcmp(type, fstype) != 0) + return NULL; + + return mountpoint; +} diff --git a/tools/lib/api/fs/findfs.h b/tools/lib/api/fs/findfs.h new file mode 100644 index 0000000..9e7d876 --- /dev/null +++ b/tools/lib/api/fs/findfs.h @@ -0,0 +1,21 @@ +#ifndef __API_FINDFS_H__ +#define __API_FINDFS_H__ + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * On most systems would have given us this, but not on some systems + * (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints); + +int valid_mountpoint(const char *mount, long magic); + +#endif /* __API_FINDFS_H__ */ -- cgit v0.10.2 From 4ef92c2ecd96ebad171e554020c83ce9fdb343ae Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 2 Feb 2015 14:35:04 -0500 Subject: tools lib api fs: Add tracefs mount helper functions Since tracefs will now hold the event directory for perf, and even though by default, debugfs still mounts tracefs on the debugfs/tracing directory, the system admin may now choose to not mount debugfs and instead just mount tracefs instead. Having tracefs helper functions will facilitate having perf look for tracefs first, and then try debugfs as a fallback. Signed-off-by: Steven Rostedt Acked-by: Jiri Olsa Cc: Andrew Morton Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20150202193552.898934751@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 22b2f15..212aa4f 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -9,12 +9,14 @@ LIB_H= LIB_OBJS= LIB_H += fs/debugfs.h +LIB_H += fs/tracefs.h LIB_H += fs/findfs.h LIB_H += fs/fs.h # See comment below about piggybacking... LIB_H += fd/array.h LIB_OBJS += $(OUTPUT)fs/debugfs.o +LIB_OBJS += $(OUTPUT)fs/tracefs.o LIB_OBJS += $(OUTPUT)fs/findfs.o LIB_OBJS += $(OUTPUT)fs/fs.o # XXX piggybacking here, need to introduce libapikfd, or rename this diff --git a/tools/lib/api/fs/tracefs.c b/tools/lib/api/fs/tracefs.c new file mode 100644 index 0000000..ef40d15 --- /dev/null +++ b/tools/lib/api/fs/tracefs.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tracefs.h" + +#ifndef TRACEFS_DEFAULT_PATH +#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing" +#endif + +char tracefs_mountpoint[PATH_MAX + 1] = TRACEFS_DEFAULT_PATH; + +static const char * const tracefs_known_mountpoints[] = { + TRACEFS_DEFAULT_PATH, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, +}; + +static bool tracefs_found; + +/* find the path to the mounted tracefs */ +const char *tracefs_find_mountpoint(void) +{ + const char *ret; + + if (tracefs_found) + return (const char *)tracefs_mountpoint; + + ret = find_mountpoint("tracefs", (long) TRACEFS_MAGIC, + tracefs_mountpoint, PATH_MAX + 1, + tracefs_known_mountpoints); + + if (ret) + tracefs_found = true; + + return ret; +} + +/* mount the tracefs somewhere if it's not mounted */ +char *tracefs_mount(const char *mountpoint) +{ + /* see if it's already mounted */ + if (tracefs_find_mountpoint()) + goto out; + + /* if not mounted and no argument */ + if (mountpoint == NULL) { + /* see if environment variable set */ + mountpoint = getenv(PERF_TRACEFS_ENVIRONMENT); + /* if no environment variable, use default */ + if (mountpoint == NULL) + mountpoint = TRACEFS_DEFAULT_PATH; + } + + if (mount(NULL, mountpoint, "tracefs", 0, NULL) < 0) + return NULL; + + /* save the mountpoint */ + tracefs_found = true; + strncpy(tracefs_mountpoint, mountpoint, sizeof(tracefs_mountpoint)); +out: + return tracefs_mountpoint; +} diff --git a/tools/lib/api/fs/tracefs.h b/tools/lib/api/fs/tracefs.h new file mode 100644 index 0000000..e6f7f51 --- /dev/null +++ b/tools/lib/api/fs/tracefs.h @@ -0,0 +1,20 @@ +#ifndef __API_TRACEFS_H__ +#define __API_TRACEFS_H__ + +#include "findfs.h" + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +#ifndef PERF_TRACEFS_ENVIRONMENT +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" +#endif + +const char *tracefs_find_mountpoint(void); +int tracefs_valid_mountpoint(const char *debugfs); +char *tracefs_mount(const char *mountpoint); + +extern char tracefs_mountpoint[]; + +#endif /* __API_DEBUGFS_H__ */ -- cgit v0.10.2 From a9edf60749a9483341facfa7c28bcf8afb3c8311 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 2 Feb 2015 14:35:05 -0500 Subject: tools lib api debugfs: Add DEBUGFS_DEFAULT_PATH macro Instead of hard coding "/sys/kernel/debug" everywhere, create a macro to hold where the default path exists. Signed-off-by: Steven Rostedt Acked-by: Jiri Olsa Cc: Andrew Morton Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20150202193553.032117017@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index 91e1668..07d74b0 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -10,10 +10,14 @@ #include "debugfs.h" -char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug"; +#ifndef DEBUGFS_DEFAULT_PATH +#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" +#endif + +char debugfs_mountpoint[PATH_MAX + 1] = DEBUGFS_DEFAULT_PATH; static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug", + DEBUGFS_DEFAULT_PATH, "/debug", 0, }; @@ -50,7 +54,7 @@ char *debugfs_mount(const char *mountpoint) mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT); /* if no environment variable, use default */ if (mountpoint == NULL) - mountpoint = "/sys/kernel/debug"; + mountpoint = DEBUGFS_DEFAULT_PATH; } if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0) -- cgit v0.10.2 From dd6dda27a8be563eaebb3f38b1d1d50920bb7991 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 2 Feb 2015 14:35:06 -0500 Subject: tools lib api fs: Add {tracefs,debugfs}_configured() functions Add tracefs_configured() to return true if tracefs is configured in the kernel (succeeds to find tracefs), and debugfs_configured() if debugfs is configured in the kernel (succeeds to find debugfs). Signed-off-by: Steven Rostedt Acked-by: Jiri Olsa Cc: Andrew Morton Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20150202193553.190606690@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index 07d74b0..8305b3e 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -3,8 +3,11 @@ #include #include #include +#include #include #include +#include +#include #include #include @@ -24,6 +27,11 @@ static const char * const debugfs_known_mountpoints[] = { static bool debugfs_found; +bool debugfs_configured(void) +{ + return debugfs_find_mountpoint() != NULL; +} + /* find the path to the mounted debugfs */ const char *debugfs_find_mountpoint(void) { diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index 1074ac8..4550236 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -11,6 +11,7 @@ #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" #endif +bool debugfs_configured(void); const char *debugfs_find_mountpoint(void); char *debugfs_mount(const char *mountpoint); diff --git a/tools/lib/api/fs/findfs.h b/tools/lib/api/fs/findfs.h index 9e7d876..b6f5d05 100644 --- a/tools/lib/api/fs/findfs.h +++ b/tools/lib/api/fs/findfs.h @@ -1,6 +1,8 @@ #ifndef __API_FINDFS_H__ #define __API_FINDFS_H__ +#include + #define _STR(x) #x #define STR(x) _STR(x) diff --git a/tools/lib/api/fs/tracefs.c b/tools/lib/api/fs/tracefs.c index ef40d15..e4aa968 100644 --- a/tools/lib/api/fs/tracefs.c +++ b/tools/lib/api/fs/tracefs.c @@ -2,8 +2,11 @@ #include #include #include +#include #include #include +#include +#include #include #include @@ -25,6 +28,11 @@ static const char * const tracefs_known_mountpoints[] = { static bool tracefs_found; +bool tracefs_configured(void) +{ + return tracefs_find_mountpoint() != NULL; +} + /* find the path to the mounted tracefs */ const char *tracefs_find_mountpoint(void) { diff --git a/tools/lib/api/fs/tracefs.h b/tools/lib/api/fs/tracefs.h index e6f7f51..da780ac 100644 --- a/tools/lib/api/fs/tracefs.h +++ b/tools/lib/api/fs/tracefs.h @@ -11,6 +11,7 @@ #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" #endif +bool tracefs_configured(void); const char *tracefs_find_mountpoint(void); int tracefs_valid_mountpoint(const char *debugfs); char *tracefs_mount(const char *mountpoint); -- cgit v0.10.2 From 23773ca18b399051eb94f98b689cf7a9173c795b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 2 Feb 2015 14:35:07 -0500 Subject: perf tools: Make perf aware of tracefs As tracefs may be mounted instead of debugfs to get to the event directories, have perf know about tracefs, and use that file system over debugfs if it is present. Signed-off-by: Steven Rostedt Acked-by: Jiri Olsa Cc: Andrew Morton Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20150202193553.340946602@goodmis.org [ Fixed up error messages about tracefs pointed out by Namhyung ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c index 8fa82d1..3ec885c 100644 --- a/tools/perf/tests/open-syscall-all-cpus.c +++ b/tools/perf/tests/open-syscall-all-cpus.c @@ -29,7 +29,12 @@ int test__open_syscall_event_on_all_cpus(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c index a33b2da..07aa319 100644 --- a/tools/perf/tests/open-syscall.c +++ b/tools/perf/tests/open-syscall.c @@ -18,7 +18,12 @@ int test__open_syscall_event(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 1cdab0c..ac243eb 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -3,6 +3,7 @@ #include "evsel.h" #include "evlist.h" #include +#include #include #include "tests.h" #include "debug.h" @@ -1192,11 +1193,19 @@ static int count_tracepoints(void) { char events_path[PATH_MAX]; struct dirent *events_ent; + const char *mountpoint; DIR *events_dir; int cnt = 0; - scnprintf(events_path, PATH_MAX, "%s/tracing/events", - debugfs_find_mountpoint()); + mountpoint = tracefs_find_mountpoint(); + if (mountpoint) { + scnprintf(events_path, PATH_MAX, "%s/events", + mountpoint); + } else { + mountpoint = debugfs_find_mountpoint(); + scnprintf(events_path, PATH_MAX, "%s/tracing/events", + mountpoint); + } events_dir = opendir(events_path); diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index d04d770..fbcca21 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -17,6 +17,7 @@ #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c602ebb..a8b2c57 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -7,7 +7,6 @@ * Released under the GPL v2. (and only v2, not any later version) */ #include "util.h" -#include #include #include #include "cpumap.h" diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ff6e1fa..39c3b57 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -122,6 +122,6 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); -extern int valid_debugfs_mount(const char *debugfs); +int valid_event_mount(const char *eventfs); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 919937e..9dfbed9 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -41,6 +41,7 @@ #include "symbol.h" #include "thread.h" #include +#include #include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" @@ -1805,7 +1806,7 @@ static void print_open_warning(int err, bool is_kprobe) " - please rebuild kernel with %s.\n", is_kprobe ? 'k' : 'u', config); } else if (err == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else pr_warning("Failed to open %cprobe_events: %s\n", is_kprobe ? 'k' : 'u', @@ -1816,7 +1817,7 @@ static void print_both_open_warning(int kerr, int uerr) { /* Both kprobes and uprobes are disabled, warn it. */ if (kerr == -ENOTSUP && uerr == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else if (kerr == -ENOENT && uerr == -ENOENT) pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " "or/and CONFIG_UPROBE_EVENTS.\n"); @@ -1833,13 +1834,20 @@ static int open_probe_events(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; const char *__debugfs; + const char *tracing_dir = ""; int ret; - __debugfs = debugfs_find_mountpoint(); - if (__debugfs == NULL) - return -ENOTSUP; + __debugfs = tracefs_find_mountpoint(); + if (__debugfs == NULL) { + tracing_dir = "tracing/"; - ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file); + __debugfs = debugfs_find_mountpoint(); + if (__debugfs == NULL) + return -ENOTSUP; + } + + ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", + __debugfs, tracing_dir, trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) @@ -1855,12 +1863,12 @@ static int open_probe_events(const char *trace_file, bool readwrite) static int open_kprobe_events(bool readwrite) { - return open_probe_events("tracing/kprobe_events", readwrite); + return open_probe_events("kprobe_events", readwrite); } static int open_uprobe_events(bool readwrite) { - return open_probe_events("tracing/uprobe_events", readwrite); + return open_probe_events("uprobe_events", readwrite); } /* Get raw string list of current kprobe_events or uprobe_events */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b86744f..92db3f1 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -303,13 +303,26 @@ void set_term_quiet_input(struct termios *old) tcsetattr(0, TCSANOW, &tc); } -static void set_tracing_events_path(const char *mountpoint) +static void set_tracing_events_path(const char *tracing, const char *mountpoint) { - snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", - mountpoint, "tracing/events"); + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", + mountpoint, tracing, "events"); } -const char *perf_debugfs_mount(const char *mountpoint) +static const char *__perf_tracefs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = tracefs_mount(mountpoint); + if (!mnt) + return NULL; + + set_tracing_events_path("", mnt); + + return mnt; +} + +static const char *__perf_debugfs_mount(const char *mountpoint) { const char *mnt; @@ -317,7 +330,20 @@ const char *perf_debugfs_mount(const char *mountpoint) if (!mnt) return NULL; - set_tracing_events_path(mnt); + set_tracing_events_path("tracing/", mnt); + + return mnt; +} + +const char *perf_debugfs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = __perf_tracefs_mount(mountpoint); + if (mnt) + return mnt; + + mnt = __perf_debugfs_mount(mountpoint); return mnt; } @@ -325,12 +351,19 @@ const char *perf_debugfs_mount(const char *mountpoint) void perf_debugfs_set_path(const char *mntpt) { snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt); - set_tracing_events_path(mntpt); + set_tracing_events_path("tracing/", mntpt); +} + +static const char *find_tracefs(void) +{ + const char *path = __perf_tracefs_mount(NULL); + + return path; } static const char *find_debugfs(void) { - const char *path = perf_debugfs_mount(NULL); + const char *path = __perf_debugfs_mount(NULL); if (!path) fprintf(stderr, "Your kernel does not support the debugfs filesystem"); @@ -344,6 +377,7 @@ static const char *find_debugfs(void) */ const char *find_tracing_dir(void) { + const char *tracing_dir = ""; static char *tracing; static int tracing_found; const char *debugfs; @@ -351,11 +385,15 @@ const char *find_tracing_dir(void) if (tracing_found) return tracing; - debugfs = find_debugfs(); - if (!debugfs) - return NULL; + debugfs = find_tracefs(); + if (!debugfs) { + tracing_dir = "/tracing"; + debugfs = find_debugfs(); + if (!debugfs) + return NULL; + } - if (asprintf(&tracing, "%s/tracing", debugfs) < 0) + if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0) return NULL; tracing_found = 1; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 027a515..73c2f8e 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 4e31050f482c02c822b150d71cf1ea5be7c9d6e4 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 9 Feb 2015 16:29:37 -0800 Subject: perf symbols: Define STT_GNU_IFUNC for glibc 2.9 and older. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The token STT_GNU_IFUNC is not available with glibc 2.9 and older. Define this token if it is not already defined. This patch fixes this build errors with older versions of glibc. CC util/symbol-elf.o util/symbol-elf.c: In function ‘elf_sym__is_function’: util/symbol-elf.c:75: error: ‘STT_GNU_IFUNC’ undeclared (first use in this function) util/symbol-elf.c:75: error: (Each undeclared identifier is reported only once util/symbol-elf.c:75: error: for each function it appears in.) make: *** [util/symbol-elf.o] Error 1 Signed-off-by: Vinson Lee Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Anton Blanchard Cc: Avi Kivity Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Waiman Long Cc: stable@vger.kernel.org # 3.17+ Link: http://lkml.kernel.org/r/1423528286-13630-1-git-send-email-vlee@twopensource.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 225eb73..b02731a 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -69,6 +69,10 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +#ifndef STT_GNU_IFUNC +#define STT_GNU_IFUNC 10 +#endif + static inline int elf_sym__is_function(const GElf_Sym *sym) { return (elf_sym__type(sym) == STT_FUNC || -- cgit v0.10.2 From e35f7362bab455fb5c13ea4ce53f959f3e1610b2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 10 Feb 2015 18:18:51 +0900 Subject: perf buildid-cache: Remove unneeded debugdir parameters Functions related to buildid-cache subcommand use debugdir parameters for passing buildid cache directory path. However all callers just pass buildid_dir global variable. Moreover, other functions which refer buildid cache use buildid_dir directly. This removes unneeded debugdir parameters from those functions and use buildid_dir if needed. Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Borislav Petkov Cc: Hemant Kumar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150210091851.19264.72741.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 50e6b66..d929d95 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -125,8 +125,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, return ret; } -static int build_id_cache__add_kcore(const char *filename, const char *debugdir, - bool force) +static int build_id_cache__add_kcore(const char *filename, bool force) { char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; char from_dir[PATH_MAX], to_dir[PATH_MAX]; @@ -143,7 +142,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", - debugdir, sbuildid); + buildid_dir, sbuildid); if (!force && !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { @@ -155,7 +154,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", - debugdir, sbuildid, dir); + buildid_dir, sbuildid, dir); if (mkdir_p(to_dir, 0755)) return -1; @@ -183,7 +182,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return 0; } -static int build_id_cache__add_file(const char *filename, const char *debugdir) +static int build_id_cache__add_file(const char *filename) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; u8 build_id[BUILD_ID_SIZE]; @@ -195,7 +194,7 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, debugdir, filename, + err = build_id_cache__add_s(sbuild_id, filename, false, false); if (verbose) pr_info("Adding %s %s: %s\n", sbuild_id, filename, @@ -203,8 +202,7 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) return err; } -static int build_id_cache__remove_file(const char *filename, - const char *debugdir) +static int build_id_cache__remove_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -217,7 +215,7 @@ static int build_id_cache__remove_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); + err = build_id_cache__remove_s(sbuild_id); if (verbose) pr_info("Removing %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); @@ -252,8 +250,7 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f return 0; } -static int build_id_cache__update_file(const char *filename, - const char *debugdir) +static int build_id_cache__update_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -266,11 +263,10 @@ static int build_id_cache__update_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (!err) { - err = build_id_cache__add_s(sbuild_id, debugdir, filename, - false, false); - } + err = build_id_cache__remove_s(sbuild_id); + if (!err) + err = build_id_cache__add_s(sbuild_id, filename, false, false); + if (verbose) pr_info("Updating %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); @@ -338,7 +334,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, add_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__add_file(pos->s, buildid_dir)) { + if (build_id_cache__add_file(pos->s)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", pos->s); @@ -356,7 +352,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, remove_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__remove_file(pos->s, buildid_dir)) { + if (build_id_cache__remove_file(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -377,7 +373,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, update_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__update_file(pos->s, buildid_dir)) { + if (build_id_cache__update_file(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -391,8 +387,7 @@ int cmd_buildid_cache(int argc, const char **argv, } } - if (kcore_filename && - build_id_cache__add_kcore(kcore_filename, buildid_dir, force)) + if (kcore_filename && build_id_cache__add_kcore(kcore_filename, force)) pr_warning("Couldn't add %s\n", kcore_filename); out: diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 0c72680..9f764f6 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -259,8 +259,8 @@ void disable_buildid_cache(void) no_buildid_cache = true; } -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms, bool is_vdso) +int build_id_cache__add_s(const char *sbuild_id, const char *name, + bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; char *realname, *filename = zalloc(size), @@ -282,7 +282,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, goto out_free; len = scnprintf(filename, size, "%s%s%s", - debugdir, slash ? "/" : "", + buildid_dir, slash ? "/" : "", is_vdso ? DSO__NAME_VDSO : realname); if (mkdir_p(filename, 0755)) goto out_free; @@ -298,13 +298,13 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, } len = scnprintf(linkname, size, "%s/.build-id/%.2s", - debugdir, sbuild_id); + buildid_dir, sbuild_id); if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) goto out_free; snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); - targetname = filename + strlen(debugdir) - 5; + targetname = filename + strlen(buildid_dir) - 5; memcpy(targetname, "../..", 5); if (symlink(targetname, linkname) == 0) @@ -318,18 +318,17 @@ out_free: } static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, - const char *name, const char *debugdir, - bool is_kallsyms, bool is_vdso) + const char *name, bool is_kallsyms, + bool is_vdso) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; build_id__sprintf(build_id, build_id_size, sbuild_id); - return build_id_cache__add_s(sbuild_id, debugdir, name, - is_kallsyms, is_vdso); + return build_id_cache__add_s(sbuild_id, name, is_kallsyms, is_vdso); } -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) +int build_id_cache__remove_s(const char *sbuild_id) { const size_t size = PATH_MAX; char *filename = zalloc(size), @@ -340,7 +339,7 @@ int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) goto out_free; snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, sbuild_id + 2); + buildid_dir, sbuild_id, sbuild_id + 2); if (access(linkname, F_OK)) goto out_free; @@ -355,7 +354,7 @@ int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) * Since the link is relative, we must make it absolute: */ snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, filename); + buildid_dir, sbuild_id, filename); if (unlink(linkname)) goto out_free; @@ -367,8 +366,7 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *dso, struct machine *machine, - const char *debugdir) +static int dso__cache_build_id(struct dso *dso, struct machine *machine) { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; bool is_vdso = dso__is_vdso(dso); @@ -381,28 +379,26 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, name = nm; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, - debugdir, is_kallsyms, is_vdso); + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, - struct machine *machine, const char *debugdir) + struct machine *machine) { struct dso *pos; int err = 0; dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, machine, debugdir)) + if (dso__cache_build_id(pos, machine)) err = -1; return err; } -static int machine__cache_build_ids(struct machine *machine, const char *debugdir) +static int machine__cache_build_ids(struct machine *machine) { - int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine, - debugdir); - ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine, - debugdir); + int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine); + ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine); return ret; } @@ -417,11 +413,11 @@ int perf_session__cache_build_ids(struct perf_session *session) if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host, buildid_dir); + ret = machine__cache_build_ids(&session->machines.host); for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos, buildid_dir); + ret |= machine__cache_build_ids(pos); } return ret ? -1 : 0; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 8236319..31b3c63 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -22,9 +22,9 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, +int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); +int build_id_cache__remove_s(const char *sbuild_id); void disable_buildid_cache(void); #endif -- cgit v0.10.2 From 5cb113fd84f72b6e08c1970d612fd61327781d4e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 10 Feb 2015 18:18:53 +0900 Subject: perf buildid-cache: Consolidate .build-id cache path generators Consolidate .build-id cache path generating routines to build_id__filename() function. Other functions must use it to get the buildid cache path (link path) from build-id. This can reduce the risk of partial-update. Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Borislav Petkov Cc: Hemant Kumar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150210091853.19264.58513.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 9f764f6..adbc360 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -93,6 +93,35 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } +/* asnprintf consolidates asprintf and snprintf */ +static int asnprintf(char **strp, size_t size, const char *fmt, ...) +{ + va_list ap; + int ret; + + if (!strp) + return -EINVAL; + + va_start(ap, fmt); + if (*strp) + ret = vsnprintf(*strp, size, fmt, ap); + else + ret = vasprintf(strp, fmt, ap); + va_end(ap); + + return ret; +} + +static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +{ + char *tmp = bf; + int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, + sbuild_id, sbuild_id + 2); + if (ret < 0 || (tmp && size < (unsigned int)ret)) + return NULL; + return bf; +} + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; @@ -101,14 +130,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return NULL; build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - if (bf == NULL) { - if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2) < 0) - return NULL; - } else - snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2); - return bf; + return build_id__filename(build_id_hex, bf, size); } #define dsos__for_each_with_build_id(pos, head) \ @@ -264,7 +286,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, { const size_t size = PATH_MAX; char *realname, *filename = zalloc(size), - *linkname = zalloc(size), *targetname; + *linkname = zalloc(size), *targetname, *tmp; int len, err = -1; bool slash = is_kallsyms || is_vdso; @@ -297,13 +319,15 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; } - len = scnprintf(linkname, size, "%s/.build-id/%.2s", - buildid_dir, sbuild_id); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; + tmp = strrchr(linkname, '/'); + *tmp = '\0'; if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) goto out_free; - snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); + *tmp = '/'; targetname = filename + strlen(buildid_dir) - 5; memcpy(targetname, "../..", 5); @@ -332,14 +356,14 @@ int build_id_cache__remove_s(const char *sbuild_id) { const size_t size = PATH_MAX; char *filename = zalloc(size), - *linkname = zalloc(size); + *linkname = zalloc(size), *tmp; int err = -1; if (filename == NULL || linkname == NULL) goto out_free; - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - buildid_dir, sbuild_id, sbuild_id + 2); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; if (access(linkname, F_OK)) goto out_free; @@ -353,8 +377,8 @@ int build_id_cache__remove_s(const char *sbuild_id) /* * Since the link is relative, we must make it absolute: */ - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - buildid_dir, sbuild_id, filename); + tmp = strrchr(linkname, '/') + 1; + snprintf(tmp, size - (tmp - linkname), "%s", filename); if (unlink(linkname)) goto out_free; -- cgit v0.10.2 From 39f5704399042fff5f0d5f6af32bbbc3e787a897 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 11 Feb 2015 11:24:05 -0500 Subject: perf tools: Define _GNU_SOURCE on pthread_attr_setaffinity_np feature check The man page for pthread_attr_set_affinity_np states that _GNU_SOURCE must be defined before pthread.h is included in order to get the proper function declaration. Define this in the Makefile. Without this defined, the feature check fails on a Fedora system with gcc5 and then the perf build later fails with conflicting prototypes for the function. Signed-off-by: Josh Boyer Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Vineet Gupta Link: http://lkml.kernel.org/r/20150211162404.GA15522@hansolo.redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 42ac05a..b32ff33 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -49,7 +49,7 @@ test-hello.bin: $(BUILD) test-pthread-attr-setaffinity-np.bin: - $(BUILD) -Werror -lpthread + $(BUILD) -D_GNU_SOURCE -Werror -lpthread test-stackprotector-all.bin: $(BUILD) -Werror -fstack-protector-all -- cgit v0.10.2 From c819e2cf2eb6f65d3208d195d7a0edef6108d533 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 29 Dec 2014 13:51:45 +0100 Subject: tools build: Add new build support Adding new build framework into 'tools/build' to be used by tools. There's no change for actual building at this point, it comes in the next patches. The idea and more details are explained in the 'tools/build/Documentation/Build.txt' file. I adopted everything from the kernel build system, with some changes to allow for multiple binaries build definitions. While the kernel's build output is single image (forget modules) we need to be able to build several binaries/libraries. The basic idea is that sser provides 'Build' files with objects definitions like: perf-y += a.o perf-y += b.o libperf-y += c.o libperf-y += d.o and the build framework outputs files: perf-in.o # a.o, b.o compiled in libperf-in.o # c.o, d.o compiled in Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-fbj22h4av0otlxupwcmrxgpa@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/build/Build.include b/tools/build/Build.include new file mode 100644 index 0000000..4c8daac --- /dev/null +++ b/tools/build/Build.include @@ -0,0 +1,81 @@ +### +# build: Generic definitions +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg , 2015 +# Copyright (C) Linus Torvalds , 2015 +# + +### +# Convenient variables +comma := , +squote := ' + +### +# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o +dot-target = $(dir $@).$(notdir $@) + +### +# filename of target with directory and extension stripped +basetarget = $(basename $(notdir $@)) + +### +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(dot-target).d) + +### +# Check if both arguments has same arguments. Result is empty string if equal. +arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \ + $(filter-out $(cmd_$@), $(cmd_$(1))) ) + +### +# Escape single quote for use in echo statements +escsq = $(subst $(squote),'\$(squote)',$1) + +# Echo command +# Short version is used, if $(quiet) equals `quiet_', otherwise full one. +echo-cmd = $(if $($(quiet)cmd_$(1)),\ + echo ' $(call escsq,$($(quiet)cmd_$(1)))';) + +### +# Replace >$< with >$$< to preserve $ when reloading the .cmd file +# (needed for make) +# Replace >#< with >\#< to avoid starting a comment in the .cmd file +# (needed for make) +# Replace >'< with >'\''< to be able to enclose the whole string in '...' +# (needed for the shell) +make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) + +### +# Find any prerequisites that is newer than target or that does not exist. +# PHONY targets skipped in both cases. +any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) + +### +# if_changed_dep - execute command if any prerequisite is newer than +# target, or command line has changed and update +# dependencies in the cmd file +if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + cat $(depfile) > $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + +# if_changed - execute command if any prerequisite is newer than +# target, or command line has changed +if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd) + +### +# C flags to be used in rule definitions, includes: +# - depfile generation +# - global $(CFLAGS) +# - per target C flags +# - per object C flags +# - BUILD_STR macro to allow '-D"$(variable)"' constructs +c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt new file mode 100644 index 0000000..00ad2d6 --- /dev/null +++ b/tools/build/Documentation/Build.txt @@ -0,0 +1,139 @@ +Build Framework +=============== + +The perf build framework was adopted from the kernel build system, hence the +idea and the way how objects are built is the same. + +Basically the user provides set of 'Build' files that list objects and +directories to nest for specific target to be build. + +Unlike the kernel we don't have a single build object 'obj-y' list that where +we setup source objects, but we support more. This allows one 'Build' file to +carry a sources list for multiple build objects. + +a) Build framework makefiles +---------------------------- + +The build framework consists of 2 Makefiles: + + Build.include + Makefile.build + +While the 'Build.include' file contains just some generic definitions, the +'Makefile.build' file is the makefile used from the outside. It's +interface/usage is following: + + $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) + +where: + + KSRC - is the path to kernel sources + DIR - is the path to the project to be built + OBJECT - is the name of the build object + +When succefully finished the $(DIR) directory contains the final object file +called $(OBJECT)-in.o: + + $ ls $(DIR)/$(OBJECT)-in.o + +which includes all compiled sources described in 'Build' makefiles. + +a) Build makefiles +------------------ + +The user supplies 'Build' makefiles that contains a objects list, and connects +the build to nested directories. + +Assume we have the following project structure: + + ex/a.c + /b.c + /c.c + /d.c + /arch/e.c + /arch/f.c + +Out of which you build the 'ex' binary ' and the 'libex.a' library: + + 'ex' - consists of 'a.o', 'b.o' and libex.a + 'libex.a' - consists of 'c.o', 'd.o', 'e.o' and 'f.o' + +The build framework does not create the 'ex' and 'libex.a' binaries for you, it +only prepares proper objects to be compiled and grouped together. + +To follow the above example, the user provides following 'Build' files: + + ex/Build: + ex-y += a.o + ex-y += b.o + + libex-y += c.o + libex-y += d.o + libex-y += arch/ + + ex/arch/Build: + libex-y += e.o + libex-y += f.o + +and runs: + + $ make -f tools/build/Makefile.build dir=. obj=ex + $ make -f tools/build/Makefile.build dir=. obj=libex + +which creates the following objects: + + ex/ex-in.o + ex/libex-in.o + +that contain request objects names in Build files. + +It's only a matter of 2 single commands to create the final binaries: + + $ ar rcs libex.a libex-in.o + $ gcc -o ex ex-in.o libex.a + +You can check the 'ex' example in 'tools/build/tests/ex' for more details. + +b) Rules +-------- + +The build framework provides standard compilation rules to handle .S and .c +compilation. + +It's possible to include special rule if needed (like we do for flex or bison +code generation). + +c) CFLAGS +--------- + +It's possible to alter the standard object C flags in the following way: + + CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object + CFLAGS_gtk += '...' - alters CFLAGS for gtk build object + +This C flags changes has the scope of the Build makefile they are defined in. + + +d) Dependencies +--------------- + +For each built object file 'a.o' the '.a.cmd' is created and holds: + + - Command line used to built that object + (for each object) + + - Dependency rules generated by 'gcc -Wp,-MD,...' + (for compiled object) + +All existing '.cmd' files are included in the Build process to follow properly +the dependencies and trigger a rebuild when necessary. + + +e) Single rules +--------------- + +It's possible to build single object file by choice, like: + + $ make util/map.o # objects + $ make util/map.i # preprocessor + $ make util/map.s # assembly diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build new file mode 100644 index 0000000..ae203f2 --- /dev/null +++ b/tools/build/Makefile.build @@ -0,0 +1,97 @@ +### +# Main build makefile. +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg , 2015 +# Copyright (C) Linus Torvalds , 2015 +# + +PHONY := __build +__build: + +ifeq ($(V),1) + quiet = +else + quiet=quiet_ +endif + +build-dir := $(srctree)/tools/build + +# Generic definitions +include $(build-dir)/Build.include + +# Init all relevant variables used in build files so +# 1) they have correct type +# 2) they do not inherit any value from the environment +subdir-y := +obj-y := +subdir-y := +subdir-obj-y := + +# Build definitions +build-file := $(dir)/Build +include $(build-file) + +# Compile command +quiet_cmd_cc_o_c = CC $@ + cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< + +# Link agregate command +# If there's nothing to link, create empty $@ object. +quiet_cmd_ld_multi = LD $@ + cmd_ld_multi = $(if $(strip $(obj-y)),\ + $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) + +# Build rules +$(OUTPUT)%.o: %.c FORCE + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.o: %.S FORCE + $(call if_changed_dep,cc_o_c) + +# Gather build data: +# obj-y - list of build objects +# subdir-y - list of directories to nest +# subdir-obj-y - list of directories objects 'dir/$(obj)-in.o' +obj-y := $($(obj)-y) +subdir-y := $(patsubst %/,%,$(filter %/, $(obj-y))) +obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) +subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) + +# '$(OUTPUT)/dir' prefix to all objects +prefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(prefix),$(obj-y)) +subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) + +# Final '$(obj)-in.o' object +in-target := $(prefix)$(obj)-in.o + +PHONY += $(subdir-y) + +$(subdir-y): + @$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj) + +$(sort $(subdir-obj-y)): $(subdir-y) ; + +$(in-target): $(obj-y) FORCE + $(call rule_mkdir) + $(call if_changed,ld_multi) + +__build: $(in-target) + @: + +PHONY += FORCE +FORCE: + +# Include all cmd files to get all the dependency rules +# for all objects included +targets := $(wildcard $(sort $(obj-y) $(in-target))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif + +.PHONY: $(PHONY) diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build new file mode 100644 index 0000000..0e6c3e6 --- /dev/null +++ b/tools/build/tests/ex/Build @@ -0,0 +1,8 @@ +ex-y += ex.o +ex-y += a.o +ex-y += b.o +ex-y += empty/ + +libex-y += c.o +libex-y += d.o +libex-y += arch/ diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile new file mode 100644 index 0000000..52d2476 --- /dev/null +++ b/tools/build/tests/ex/Makefile @@ -0,0 +1,23 @@ +export srctree := ../../../.. +export CC := gcc +export LD := ld +export AR := ar + +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +ex: ex-in.o libex-in.o + gcc -o $@ $^ + +ex.%: FORCE + make -f $(srctree)/tools/build/Makefile.build dir=. $@ + +ex-in.o: FORCE + make $(build)=ex + +libex-in.o: FORCE + make $(build)=libex + +clean: + find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + rm -f ex ex.i ex.s + +.PHONY: FORCE diff --git a/tools/build/tests/ex/a.c b/tools/build/tests/ex/a.c new file mode 100644 index 0000000..8517627 --- /dev/null +++ b/tools/build/tests/ex/a.c @@ -0,0 +1,5 @@ + +int a(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/Build b/tools/build/tests/ex/arch/Build new file mode 100644 index 0000000..5550618 --- /dev/null +++ b/tools/build/tests/ex/arch/Build @@ -0,0 +1,2 @@ +libex-y += e.o +libex-y += f.o diff --git a/tools/build/tests/ex/arch/e.c b/tools/build/tests/ex/arch/e.c new file mode 100644 index 0000000..beaa4a1 --- /dev/null +++ b/tools/build/tests/ex/arch/e.c @@ -0,0 +1,5 @@ + +int e(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/f.c b/tools/build/tests/ex/arch/f.c new file mode 100644 index 0000000..7c3e9e9 --- /dev/null +++ b/tools/build/tests/ex/arch/f.c @@ -0,0 +1,5 @@ + +int f(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/b.c b/tools/build/tests/ex/b.c new file mode 100644 index 0000000..c24ff9c --- /dev/null +++ b/tools/build/tests/ex/b.c @@ -0,0 +1,5 @@ + +int b(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/c.c b/tools/build/tests/ex/c.c new file mode 100644 index 0000000..e216d02 --- /dev/null +++ b/tools/build/tests/ex/c.c @@ -0,0 +1,5 @@ + +int c(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/d.c b/tools/build/tests/ex/d.c new file mode 100644 index 0000000..80dc0f0 --- /dev/null +++ b/tools/build/tests/ex/d.c @@ -0,0 +1,5 @@ + +int d(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/empty/Build b/tools/build/tests/ex/empty/Build new file mode 100644 index 0000000..e69de29 diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c new file mode 100644 index 0000000..dc42eb2 --- /dev/null +++ b/tools/build/tests/ex/ex.c @@ -0,0 +1,19 @@ + +int a(void); +int b(void); +int c(void); +int d(void); +int e(void); +int f(void); + +int main(void) +{ + a(); + b(); + c(); + d(); + e(); + f(); + + return 0; +} diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh new file mode 100755 index 0000000..5494f8e --- /dev/null +++ b/tools/build/tests/run.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +function test_ex { + make -C ex V=1 clean > ex.out 2>&1 + make -C ex V=1 >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} + +function test_ex_suffix { + make -C ex V=1 clean > ex.out 2>&1 + + # use -rR to disable make's builtin rules + make -rR -C ex V=1 ex.o >> ex.out 2>&1 + make -rR -C ex V=1 ex.i >> ex.out 2>&1 + make -rR -C ex V=1 ex.s >> ex.out 2>&1 + + if [ -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + if [ ! -f ./ex/ex.o -o ! -f ./ex/ex.i -o ! -f ./ex/ex.s ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} +echo -n Testing.. + +test_ex +test_ex_suffix + +echo OK diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index fbbfdc3..11ccbb2 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,5 +1,6 @@ tools/perf tools/scripts +tools/build tools/lib/traceevent tools/lib/api tools/lib/symbol/kallsyms.c -- cgit v0.10.2 From fcfd6611fbccdbf2593bd949097a5c0e45cd96da Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 31 Dec 2014 17:37:00 +0100 Subject: tools build: Add detected config support Adding support to include detected configuration makefile into the build process. This will allow the Build objects to be configurable based on the config data, like: perf-$(CONFIG_KRAVA) += krava.o The configuration is stored in '.config-detected' file, which is generated for each compilation. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-bl8qho0ubck7aqrbbfu9inlm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index ae203f2..35174d9 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -22,6 +22,9 @@ build-dir := $(srctree)/tools/build # Generic definitions include $(build-dir)/Build.include +# do not force detected configuration +-include .config-detected + # Init all relevant variables used in build files so # 1) they have correct type # 2) they do not inherit any value from the environment diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index cc22408..ba41421 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,6 +11,10 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif +$(shell echo -n > .config-detected) +detected = $(shell echo "$(1)=y" >> .config-detected) +detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected) + LIB_INCLUDE := $(srctree)/tools/lib/ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) -- cgit v0.10.2 From 579ff6d409afa00e78822be9482d382dffd29ff4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 16:44:11 +0100 Subject: tools build: Add subdir support Add support to make directory any time we build objects out of the tree (O=/tmp/krava) and the output directory does not exist. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-h80ukls4o2kpr0e4c4bfln6u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 35174d9..692e1b1 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -37,6 +37,11 @@ subdir-obj-y := build-file := $(dir)/Build include $(build-file) +# Create directory unless it exists +quiet_cmd_mkdir = MKDIR $(dir $@) + cmd_mkdir = mkdir -p $(dir $@) + rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir)) + # Compile command quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< @@ -49,9 +54,11 @@ quiet_cmd_ld_multi = LD $@ # Build rules $(OUTPUT)%.o: %.c FORCE + $(call rule_mkdir) $(call if_changed_dep,cc_o_c) $(OUTPUT)%.o: %.S FORCE + $(call rule_mkdir) $(call if_changed_dep,cc_o_c) # Gather build data: -- cgit v0.10.2 From 885e00be17c07ffb517d471bf39c8acc44ef87a9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 11 Jan 2015 23:05:29 +0100 Subject: perf tools: Remove api fs object from python build It's already included in libapikfs.a library, which is already used to link perf.so. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ijp7xkmj585rqajy4xmvjnar@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 6c6a695..4d28624 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -17,6 +17,5 @@ util/xyarray.c util/cgroup.c util/rblist.c util/strlist.c -../lib/api/fs/fs.c util/trace-event.c ../../lib/rbtree.c -- cgit v0.10.2 From 6dd280cdb8a2cf53deacd6240707ec2f22222b20 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 10 Jan 2015 21:43:32 +0100 Subject: perf build: Disable make's built-in rules We don't use any built-in rules, so we can disable make's checks for that and build faster. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-fr54ist3woy7efz6z3m720vb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index aa6a504..0a256fe 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -84,6 +84,10 @@ ifneq ($(OUTPUT),) #$(info Determined 'OUTPUT' to be $(OUTPUT)) endif +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) @touch $(OUTPUT)PERF-VERSION-FILE -- cgit v0.10.2 From 72965b87c5d5d15e33c620901b46c5ca1a3d6b8b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 29 Dec 2014 13:52:36 +0100 Subject: perf build: Add bench objects building Move bench objects building under build framework and enable perf-in.o rule. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-b0gxubmn3qjabaq0lune53y3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build new file mode 100644 index 0000000..9ea828d --- /dev/null +++ b/tools/perf/Build @@ -0,0 +1,2 @@ +perf-y += builtin-bench.o +perf-y += bench/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0a256fe..77f6794 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -93,6 +93,7 @@ $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD @touch $(OUTPUT)PERF-VERSION-FILE CC = $(CROSS_COMPILE)gcc +LD = $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar PKG_CONFIG = $(CROSS_COMPILE)pkg-config @@ -464,21 +465,6 @@ LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o LIB_OBJS += $(OUTPUT)tests/switch-tracking.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o -BUILTIN_OBJS += $(OUTPUT)builtin-bench.o -# Benchmark modules -BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o -BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o -ifeq ($(ARCH), x86) -ifeq ($(IS_64_BIT), 1) -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o -endif -endif -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o - BUILTIN_OBJS += $(OUTPUT)builtin-diff.o BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o BUILTIN_OBJS += $(OUTPUT)builtin-help.o @@ -597,10 +583,6 @@ ifeq ($(NO_PERF_REGS),0) LIB_OBJS += $(OUTPUT)util/perf_regs.o endif -ifndef NO_LIBNUMA - BUILTIN_OBJS += $(OUTPUT)bench/numa.o -endif - ifndef NO_ZLIB LIB_OBJS += $(OUTPUT)util/zlib.o endif @@ -627,14 +609,22 @@ shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell strip: $(PROGRAMS) $(OUTPUT)perf $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf +PERF_IN := $(OUTPUT)perf-in.o + +export srctree OUTPUT RM CC LD AR CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +$(PERF_IN): FORCE + @$(MAKE) $(build)=perf + $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ $(CFLAGS) -c $(filter %.c,$^) -o $@ -$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) +$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) $(PERF_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ - $(BUILTIN_OBJS) $(LIBS) -o $@ + $(BUILTIN_OBJS) $(PERF_IN) $(LIBS) -o $@ $(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $< @@ -989,6 +979,8 @@ config-clean: clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) + @find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + @$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean @@ -1004,7 +996,9 @@ else GIT-HEAD-PHONY = endif +FORCE: + .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS FORCE diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build new file mode 100644 index 0000000..5ce9802 --- /dev/null +++ b/tools/perf/bench/Build @@ -0,0 +1,11 @@ +perf-y += sched-messaging.o +perf-y += sched-pipe.o +perf-y += mem-memcpy.o +perf-y += futex-hash.o +perf-y += futex-wake.o +perf-y += futex-requeue.o + +perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o +perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o + +perf-$(CONFIG_NUMA) += numa.o diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index ba41421..3e90fca 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -28,6 +28,7 @@ ifeq ($(ARCH),x86) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 + $(call detected,CONFIG_X86_64) else LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif @@ -639,6 +640,7 @@ ifndef NO_LIBNUMA else CFLAGS += -DHAVE_LIBNUMA_SUPPORT EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) endif endif -- cgit v0.10.2 From f39e042a133485e4b1aa73d3bc2249d01421f765 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 29 Dec 2014 15:03:09 +0100 Subject: perf build: Add tests objects building Move test objects building under build framework. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-azbkwd1fl32t997a4shz4lgp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build index 9ea828d..b4b6a7c 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -1,2 +1,3 @@ perf-y += builtin-bench.o perf-y += bench/ +perf-y += tests/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 77f6794..80c2a8f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -425,45 +425,6 @@ LIB_OBJS += $(OUTPUT)ui/stdio/hist.o LIB_OBJS += $(OUTPUT)arch/common.o -LIB_OBJS += $(OUTPUT)tests/parse-events.o -LIB_OBJS += $(OUTPUT)tests/dso-data.o -LIB_OBJS += $(OUTPUT)tests/attr.o -LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o -LIB_OBJS += $(OUTPUT)tests/open-syscall.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o -LIB_OBJS += $(OUTPUT)tests/mmap-basic.o -LIB_OBJS += $(OUTPUT)tests/perf-record.o -LIB_OBJS += $(OUTPUT)tests/rdpmc.o -LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o -LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o -LIB_OBJS += $(OUTPUT)tests/fdarray.o -LIB_OBJS += $(OUTPUT)tests/pmu.o -LIB_OBJS += $(OUTPUT)tests/hists_common.o -LIB_OBJS += $(OUTPUT)tests/hists_link.o -LIB_OBJS += $(OUTPUT)tests/hists_filter.o -LIB_OBJS += $(OUTPUT)tests/hists_output.o -LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o -LIB_OBJS += $(OUTPUT)tests/python-use.o -LIB_OBJS += $(OUTPUT)tests/bp_signal.o -LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o -LIB_OBJS += $(OUTPUT)tests/task-exit.o -LIB_OBJS += $(OUTPUT)tests/sw-clock.o -ifeq ($(ARCH),x86) -LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o -endif -LIB_OBJS += $(OUTPUT)tests/code-reading.o -LIB_OBJS += $(OUTPUT)tests/sample-parsing.o -LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o -ifndef NO_DWARF_UNWIND -ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) -LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o -endif -endif -LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o -LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o -LIB_OBJS += $(OUTPUT)tests/switch-tracking.o - BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o @@ -483,7 +444,6 @@ BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o BUILTIN_OBJS += $(OUTPUT)builtin-lock.o BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o BUILTIN_OBJS += $(OUTPUT)builtin-inject.o -BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o BUILTIN_OBJS += $(OUTPUT)builtin-mem.o PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) @@ -525,7 +485,6 @@ endif ifndef NO_LIBUNWIND LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o endif -LIB_OBJS += $(OUTPUT)tests/keep-tracking.o ifndef NO_LIBAUDIT BUILTIN_OBJS += $(OUTPUT)builtin-trace.o @@ -700,20 +659,6 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS '-DPREFIX="$(prefix_SQ)"' \ $< -$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - -DPYTHONPATH='"$(OUTPUT)python"' \ - -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $< - $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 3e90fca..2fd0185 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -20,10 +20,13 @@ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(src-perf)/config/Makefile.arch +$(call detected_var,ARCH) + NO_PERF_REGS := 1 # Additional ARCH settings for x86 ifeq ($(ARCH),x86) + $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S @@ -424,6 +427,7 @@ endif ifeq ($(dwarf-post-unwind),1) CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT + $(call detected,CONFIG_DWARF_UNWIND) else NO_DWARF_UNWIND := 1 endif @@ -821,3 +825,9 @@ endif ifeq ($(display_lib),1) $(info ) endif + +$(call detected_var,bindir_SQ) +$(call detected_var,PYTHON_WORD) +ifneq ($(OUTPUT),) +$(call detected_var,OUTPUT) +endif diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build new file mode 100644 index 0000000..2de01a4 --- /dev/null +++ b/tools/perf/tests/Build @@ -0,0 +1,42 @@ +perf-y += builtin-test.o +perf-y += parse-events.o +perf-y += dso-data.o +perf-y += attr.o +perf-y += vmlinux-kallsyms.o +perf-y += open-syscall.o +perf-y += open-syscall-all-cpus.o +perf-y += open-syscall-tp-fields.o +perf-y += mmap-basic.o +perf-y += perf-record.o +perf-y += rdpmc.o +perf-y += evsel-roundtrip-name.o +perf-y += evsel-tp-sched.o +perf-y += fdarray.o +perf-y += pmu.o +perf-y += hists_common.o +perf-y += hists_link.o +perf-y += hists_filter.o +perf-y += hists_output.o +perf-y += hists_cumulate.o +perf-y += python-use.o +perf-y += bp_signal.o +perf-y += bp_signal_overflow.o +perf-y += task-exit.o +perf-y += sw-clock.o +perf-y += mmap-thread-lookup.o +perf-y += thread-mg-share.o +perf-y += switch-tracking.o +perf-y += keep-tracking.o +perf-y += code-reading.o +perf-y += sample-parsing.o +perf-y += parse-no-sample-id-all.o + +perf-$(CONFIG_X86) += perf-time-to-tsc.o + +ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +endif + +CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls -- cgit v0.10.2 From 285ab8bfc6637780052f663d90e3aa9a653042c9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 29 Dec 2014 15:13:44 +0100 Subject: perf build: Add builtin objects building Move the rest of builtin objects (bench and test are already in) building under build framework. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-mrh2d4kfyi4g1el4kmdcghl8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build index b4b6a7c..80a944b 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -1,3 +1,32 @@ perf-y += builtin-bench.o +perf-y += builtin-annotate.o +perf-y += builtin-diff.o +perf-y += builtin-evlist.o +perf-y += builtin-help.o +perf-y += builtin-sched.o +perf-y += builtin-buildid-list.o +perf-y += builtin-buildid-cache.o +perf-y += builtin-list.o +perf-y += builtin-record.o +perf-y += builtin-report.o +perf-y += builtin-stat.o +perf-y += builtin-timechart.o +perf-y += builtin-top.o +perf-y += builtin-script.o +perf-y += builtin-kmem.o +perf-y += builtin-lock.o +perf-y += builtin-kvm.o +perf-y += builtin-inject.o +perf-y += builtin-mem.o + +perf-$(CONFIG_AUDIT) += builtin-trace.o + perf-y += bench/ perf-y += tests/ + +paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" +paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))" +paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" + +CFLAGS_builtin-help.o += $(paths) +CFLAGS_builtin-timechart.o += $(paths) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 80c2a8f..4990b99 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -425,26 +425,7 @@ LIB_OBJS += $(OUTPUT)ui/stdio/hist.o LIB_OBJS += $(OUTPUT)arch/common.o -BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o -BUILTIN_OBJS += $(OUTPUT)builtin-diff.o -BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o -BUILTIN_OBJS += $(OUTPUT)builtin-help.o -BUILTIN_OBJS += $(OUTPUT)builtin-sched.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o -BUILTIN_OBJS += $(OUTPUT)builtin-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-record.o -BUILTIN_OBJS += $(OUTPUT)builtin-report.o -BUILTIN_OBJS += $(OUTPUT)builtin-stat.o -BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o -BUILTIN_OBJS += $(OUTPUT)builtin-top.o -BUILTIN_OBJS += $(OUTPUT)builtin-script.o BUILTIN_OBJS += $(OUTPUT)builtin-probe.o -BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o -BUILTIN_OBJS += $(OUTPUT)builtin-lock.o -BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o -BUILTIN_OBJS += $(OUTPUT)builtin-inject.o -BUILTIN_OBJS += $(OUTPUT)builtin-mem.o PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) @@ -486,10 +467,6 @@ ifndef NO_LIBUNWIND LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o endif -ifndef NO_LIBAUDIT - BUILTIN_OBJS += $(OUTPUT)builtin-trace.o -endif - ifndef NO_SLANG LIB_OBJS += $(OUTPUT)ui/browser.o LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o @@ -573,7 +550,7 @@ PERF_IN := $(OUTPUT)perf-in.o export srctree OUTPUT RM CC LD AR CFLAGS V build := -f $(srctree)/tools/build/Makefile.build dir=. obj -$(PERF_IN): FORCE +$(PERF_IN): $(OUTPUT)common-cmds.h FORCE @$(MAKE) $(build)=perf $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS @@ -591,18 +568,6 @@ $(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) $(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS) $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) -$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< - -$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< - $(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 2fd0185..deb0c59 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -456,6 +456,7 @@ ifndef NO_LIBAUDIT else CFLAGS += -DHAVE_LIBAUDIT_SUPPORT EXTLIBS += -laudit + $(call detected,CONFIG_AUDIT) endif endif @@ -831,3 +832,6 @@ $(call detected_var,PYTHON_WORD) ifneq ($(OUTPUT),) $(call detected_var,OUTPUT) endif +$(call detected_var,htmldir_SQ) +$(call detected_var,infodir_SQ) +$(call detected_var,mandir_SQ) -- cgit v0.10.2 From 9352aabad16af51c4c66fb2470ca01e4005bd282 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 29 Dec 2014 17:42:46 +0100 Subject: perf build: Add libperf objects building Move the util objects building under build framework. Add the new libperf build object so it's separated from the rest of the perf code and could be librarized. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-574tgt9t23tnxo9td8qjiibc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 692e1b1..aced86d 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -37,6 +37,9 @@ subdir-obj-y := build-file := $(dir)/Build include $(build-file) +quiet_cmd_flex = FLEX $@ +quiet_cmd_bison = BISON $@ + # Create directory unless it exists quiet_cmd_mkdir = MKDIR $(dir $@) cmd_mkdir = mkdir -p $(dir $@) diff --git a/tools/perf/Build b/tools/perf/Build index 80a944b..31c4c55 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -30,3 +30,6 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" CFLAGS_builtin-help.o += $(paths) CFLAGS_builtin-timechart.o += $(paths) + +libperf-y += util/ +libperf-y += arch/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4990b99..8951cd9 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -82,6 +82,11 @@ endif ifneq ($(OUTPUT),) #$(info Determined 'OUTPUT' to be $(OUTPUT)) +# Adding $(OUTPUT) as a directory to look for source files, +# because use generated output files as sources dependency +# for flex/bison parsers. +VPATH += $(OUTPUT) +export VPATH endif # Do not use make's built-in rules @@ -211,21 +216,6 @@ endif export PERL_PATH -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l - -$(OUTPUT)util/parse-events-bison.c: util/parse-events.y - $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ - -$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l - -$(OUTPUT)util/pmu-bison.c: util/pmu.y - $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ - -$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c -$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c - LIB_FILE=$(OUTPUT)libperf.a LIB_H += ../lib/symbol/kallsyms.h @@ -337,84 +327,8 @@ LIB_H += util/data.h LIB_H += util/kvm-stat.h LIB_H += util/thread-stack.h -LIB_OBJS += $(OUTPUT)util/abspath.o -LIB_OBJS += $(OUTPUT)util/alias.o -LIB_OBJS += $(OUTPUT)util/annotate.o -LIB_OBJS += $(OUTPUT)util/build-id.o -LIB_OBJS += $(OUTPUT)util/config.o -LIB_OBJS += $(OUTPUT)util/ctype.o -LIB_OBJS += $(OUTPUT)util/db-export.o -LIB_OBJS += $(OUTPUT)util/pmu.o -LIB_OBJS += $(OUTPUT)util/environment.o -LIB_OBJS += $(OUTPUT)util/event.o -LIB_OBJS += $(OUTPUT)util/evlist.o -LIB_OBJS += $(OUTPUT)util/evsel.o -LIB_OBJS += $(OUTPUT)util/exec_cmd.o -LIB_OBJS += $(OUTPUT)util/find_next_bit.o -LIB_OBJS += $(OUTPUT)util/help.o -LIB_OBJS += $(OUTPUT)util/kallsyms.o -LIB_OBJS += $(OUTPUT)util/levenshtein.o -LIB_OBJS += $(OUTPUT)util/parse-options.o -LIB_OBJS += $(OUTPUT)util/parse-events.o -LIB_OBJS += $(OUTPUT)util/path.o -LIB_OBJS += $(OUTPUT)util/rbtree.o -LIB_OBJS += $(OUTPUT)util/bitmap.o -LIB_OBJS += $(OUTPUT)util/hweight.o -LIB_OBJS += $(OUTPUT)util/run-command.o -LIB_OBJS += $(OUTPUT)util/quote.o -LIB_OBJS += $(OUTPUT)util/strbuf.o -LIB_OBJS += $(OUTPUT)util/string.o -LIB_OBJS += $(OUTPUT)util/strlist.o -LIB_OBJS += $(OUTPUT)util/strfilter.o -LIB_OBJS += $(OUTPUT)util/top.o -LIB_OBJS += $(OUTPUT)util/usage.o -LIB_OBJS += $(OUTPUT)util/wrapper.o -LIB_OBJS += $(OUTPUT)util/sigchain.o -LIB_OBJS += $(OUTPUT)util/dso.o -LIB_OBJS += $(OUTPUT)util/symbol.o LIB_OBJS += $(OUTPUT)util/symbol-elf.o -LIB_OBJS += $(OUTPUT)util/color.o -LIB_OBJS += $(OUTPUT)util/pager.o -LIB_OBJS += $(OUTPUT)util/header.o -LIB_OBJS += $(OUTPUT)util/callchain.o -LIB_OBJS += $(OUTPUT)util/values.o -LIB_OBJS += $(OUTPUT)util/debug.o -LIB_OBJS += $(OUTPUT)util/machine.o -LIB_OBJS += $(OUTPUT)util/map.o -LIB_OBJS += $(OUTPUT)util/pstack.o -LIB_OBJS += $(OUTPUT)util/session.o -LIB_OBJS += $(OUTPUT)util/ordered-events.o -LIB_OBJS += $(OUTPUT)util/comm.o -LIB_OBJS += $(OUTPUT)util/thread.o -LIB_OBJS += $(OUTPUT)util/thread_map.o -LIB_OBJS += $(OUTPUT)util/trace-event-parse.o -LIB_OBJS += $(OUTPUT)util/parse-events-flex.o -LIB_OBJS += $(OUTPUT)util/parse-events-bison.o -LIB_OBJS += $(OUTPUT)util/pmu-flex.o -LIB_OBJS += $(OUTPUT)util/pmu-bison.o -LIB_OBJS += $(OUTPUT)util/trace-event-read.o -LIB_OBJS += $(OUTPUT)util/trace-event-info.o -LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o -LIB_OBJS += $(OUTPUT)util/trace-event.o -LIB_OBJS += $(OUTPUT)util/svghelper.o -LIB_OBJS += $(OUTPUT)util/sort.o -LIB_OBJS += $(OUTPUT)util/hist.o LIB_OBJS += $(OUTPUT)util/probe-event.o -LIB_OBJS += $(OUTPUT)util/util.o -LIB_OBJS += $(OUTPUT)util/xyarray.o -LIB_OBJS += $(OUTPUT)util/cpumap.o -LIB_OBJS += $(OUTPUT)util/cgroup.o -LIB_OBJS += $(OUTPUT)util/target.o -LIB_OBJS += $(OUTPUT)util/rblist.o -LIB_OBJS += $(OUTPUT)util/intlist.o -LIB_OBJS += $(OUTPUT)util/vdso.o -LIB_OBJS += $(OUTPUT)util/stat.o -LIB_OBJS += $(OUTPUT)util/record.o -LIB_OBJS += $(OUTPUT)util/srcline.o -LIB_OBJS += $(OUTPUT)util/data.o -LIB_OBJS += $(OUTPUT)util/tsc.o -LIB_OBJS += $(OUTPUT)util/cloexec.o -LIB_OBJS += $(OUTPUT)util/thread-stack.o LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o @@ -423,8 +337,6 @@ LIB_OBJS += $(OUTPUT)ui/util.o LIB_OBJS += $(OUTPUT)ui/hist.o LIB_OBJS += $(OUTPUT)ui/stdio/hist.o -LIB_OBJS += $(OUTPUT)arch/common.o - BUILTIN_OBJS += $(OUTPUT)builtin-probe.o PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) @@ -547,7 +459,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o -export srctree OUTPUT RM CC LD AR CFLAGS V +export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): $(OUTPUT)common-cmds.h FORCE @@ -601,12 +513,6 @@ endif # These two need to be here so that when O= is not used they take precedence # over the general rule for .o -$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $< - -$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< - $(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< $(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS @@ -618,15 +524,6 @@ $(OUTPUT)%.o: %.S $(OUTPUT)%.s: %.S $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ - '-DPREFIX="$(prefix_SQ)"' \ - $< - -$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - $(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $< @@ -645,21 +542,6 @@ $(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< -$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< - -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/hweight.o: ../../lib/hweight.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< - $(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< @@ -703,8 +585,13 @@ $(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES) $(OUTPUT_DIRECTORIES): $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null -$(LIB_FILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) +LIBPERF_IN := $(OUTPUT)libperf-in.o + +$(LIBPERF_IN): FORCE + @$(MAKE) $(build)=libperf + +$(LIB_FILE): $(LIB_OBJS) $(LIBPERF_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) # libtraceevent.a TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build new file mode 100644 index 0000000..304f5e7 --- /dev/null +++ b/tools/perf/arch/Build @@ -0,0 +1 @@ +libperf-y += common.o diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index deb0c59..5b89bb7 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -122,6 +122,8 @@ ifdef PARSER_DEBUG PARSER_DEBUG_BISON := -t PARSER_DEBUG_FLEX := -d CFLAGS += -DPARSER_DEBUG + $(call detected_var,PARSER_DEBUG_BISON) + $(call detected_var,PARSER_DEBUG_FLEX) endif ifndef NO_LIBPYTHON @@ -835,3 +837,6 @@ endif $(call detected_var,htmldir_SQ) $(call detected_var,infodir_SQ) $(call detected_var,mandir_SQ) +$(call detected_var,ETC_PERFCONFIG_SQ) +$(call detected_var,prefix_SQ) +$(call detected_var,perfexecdir_SQ) diff --git a/tools/perf/util/Build b/tools/perf/util/Build new file mode 100644 index 0000000..c107f30 --- /dev/null +++ b/tools/perf/util/Build @@ -0,0 +1,116 @@ +libperf-y += abspath.o +libperf-y += alias.o +libperf-y += annotate.o +libperf-y += build-id.o +libperf-y += config.o +libperf-y += ctype.o +libperf-y += db-export.o +libperf-y += environment.o +libperf-y += event.o +libperf-y += evlist.o +libperf-y += evsel.o +libperf-y += exec_cmd.o +libperf-y += find_next_bit.o +libperf-y += help.o +libperf-y += kallsyms.o +libperf-y += levenshtein.o +libperf-y += parse-options.o +libperf-y += parse-events.o +libperf-y += path.o +libperf-y += rbtree.o +libperf-y += bitmap.o +libperf-y += hweight.o +libperf-y += run-command.o +libperf-y += quote.o +libperf-y += strbuf.o +libperf-y += string.o +libperf-y += strlist.o +libperf-y += strfilter.o +libperf-y += top.o +libperf-y += usage.o +libperf-y += wrapper.o +libperf-y += sigchain.o +libperf-y += dso.o +libperf-y += symbol.o +libperf-y += color.o +libperf-y += pager.o +libperf-y += header.o +libperf-y += callchain.o +libperf-y += values.o +libperf-y += debug.o +libperf-y += machine.o +libperf-y += map.o +libperf-y += pstack.o +libperf-y += session.o +libperf-y += ordered-events.o +libperf-y += comm.o +libperf-y += thread.o +libperf-y += thread_map.o +libperf-y += trace-event-parse.o +libperf-y += parse-events-flex.o +libperf-y += parse-events-bison.o +libperf-y += pmu.o +libperf-y += pmu-flex.o +libperf-y += pmu-bison.o +libperf-y += trace-event-read.o +libperf-y += trace-event-info.o +libperf-y += trace-event-scripting.o +libperf-y += trace-event.o +libperf-y += svghelper.o +libperf-y += sort.o +libperf-y += hist.o +libperf-y += util.o +libperf-y += xyarray.o +libperf-y += cpumap.o +libperf-y += cgroup.o +libperf-y += target.o +libperf-y += rblist.o +libperf-y += intlist.o +libperf-y += vdso.o +libperf-y += stat.o +libperf-y += record.o +libperf-y += srcline.o +libperf-y += data.o +libperf-y += tsc.o +libperf-y += cloexec.o +libperf-y += thread-stack.o + +CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" + +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c + @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l + +$(OUTPUT)util/parse-events-bison.c: util/parse-events.y + @$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ + +$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c + @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l + +$(OUTPUT)util/pmu-bison.c: util/pmu.y + @$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ + +CFLAGS_parse-events-flex.o += -w +CFLAGS_pmu-flex.o += -w +CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w +CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w + +$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c + +CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_parse-events.o += -Wno-redundant-decls + +$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE + $(call if_changed_dep,cc_o_c) -- cgit v0.10.2 From 709e679193c6e0e39222cd1fd51008225208cbc7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 29 Dec 2014 23:52:25 +0100 Subject: perf build: Add probe objects building Move the probe objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-p39iitiu2ltgmtbn48bsh7nz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build index 31c4c55..170e456 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -20,6 +20,7 @@ perf-y += builtin-inject.o perf-y += builtin-mem.o perf-$(CONFIG_AUDIT) += builtin-trace.o +perf-$(CONFIG_LIBELF) += builtin-probe.o perf-y += bench/ perf-y += tests/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8951cd9..0997e2b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -327,9 +327,6 @@ LIB_H += util/data.h LIB_H += util/kvm-stat.h LIB_H += util/thread-stack.h -LIB_OBJS += $(OUTPUT)util/symbol-elf.o -LIB_OBJS += $(OUTPUT)util/probe-event.o - LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o LIB_OBJS += $(OUTPUT)ui/progress.o @@ -337,8 +334,6 @@ LIB_OBJS += $(OUTPUT)ui/util.o LIB_OBJS += $(OUTPUT)ui/hist.o LIB_OBJS += $(OUTPUT)ui/stdio/hist.o -BUILTIN_OBJS += $(OUTPUT)builtin-probe.o - PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) # We choose to avoid "if .. else if .. else .. endif endif" @@ -351,19 +346,7 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifdef NO_LIBELF -# Remove ELF/DWARF dependent codes -LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) - -BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) - -# Use minimal symbol handling -LIB_OBJS += $(OUTPUT)util/symbol-minimal.o - -else # NO_LIBELF +ifndef NO_LIBELF ifndef NO_DWARF LIB_OBJS += $(OUTPUT)util/probe-finder.o LIB_OBJS += $(OUTPUT)util/dwarf-aux.o diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 5b89bb7..79ee4cc 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -371,6 +371,7 @@ endif # NO_LIBELF ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf + $(call detected,CONFIG_LIBELF) ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT diff --git a/tools/perf/util/Build b/tools/perf/util/Build index c107f30..73a0411 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -75,6 +75,13 @@ libperf-y += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o +libperf-$(CONFIG_LIBELF) += symbol-elf.o +libperf-$(CONFIG_LIBELF) += probe-event.o + +ifndef CONFIG_LIBELF +libperf-y += symbol-minimal.o +endif + CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" -- cgit v0.10.2 From 8379fce485cc57daa42a06f4cf1ad822b794d95d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 00:06:25 +0100 Subject: perf build: Add dwarf objects building Move the dwarf objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5ody6tnfnkt4rezvpem8n7rm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0997e2b..5cb62b0 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -346,13 +346,6 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifndef NO_LIBELF -ifndef NO_DWARF - LIB_OBJS += $(OUTPUT)util/probe-finder.o - LIB_OBJS += $(OUTPUT)util/dwarf-aux.o -endif # NO_DWARF -endif # NO_LIBELF - ifndef NO_LIBDW_DWARF_UNWIND LIB_OBJS += $(OUTPUT)util/unwind-libdw.o LIB_H += util/unwind-libdw.h diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 79ee4cc..5f55398 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -392,6 +392,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) EXTLIBS += -ldw + $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF endif # NO_LIBELF diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 73a0411..10630fb 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -82,6 +82,9 @@ ifndef CONFIG_LIBELF libperf-y += symbol-minimal.o endif +libperf-$(CONFIG_DWARF) += probe-finder.o +libperf-$(CONFIG_DWARF) += dwarf-aux.o + CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" -- cgit v0.10.2 From b2e45c322e0298652fc05e65c671b2b88d30ae31 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 00:11:11 +0100 Subject: perf build: Add dwarf unwind objects building Move the dwarf unwind objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-7f7dmhkhs0e7jnqiu9ibzqia@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5cb62b0..96e3cdc 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -346,15 +346,6 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifndef NO_LIBDW_DWARF_UNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libdw.o - LIB_H += util/unwind-libdw.h -endif - -ifndef NO_LIBUNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o -endif - ifndef NO_SLANG LIB_OBJS += $(OUTPUT)ui/browser.o LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 5f55398..e55d811 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -420,9 +420,11 @@ ifdef NO_LIBUNWIND dwarf-post-unwind := 0 else dwarf-post-unwind-text := libdw + $(call detected,CONFIG_LIBDW_DWARF_UNWIND) endif else dwarf-post-unwind-text := libunwind + $(call detected,CONFIG_LIBUNWIND) # Enable libunwind support by default. ifndef NO_LIBDW_DWARF_UNWIND NO_LIBDW_DWARF_UNWIND := 1 diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 10630fb..0401a80 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -85,6 +85,9 @@ endif libperf-$(CONFIG_DWARF) += probe-finder.o libperf-$(CONFIG_DWARF) += dwarf-aux.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o + CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" -- cgit v0.10.2 From 3b939a631b53bdb1bf6826fca2a330b16e995fc2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 00:16:01 +0100 Subject: perf build: Add ui objects building Move the ui objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-re5vuat8uu396n7hyor9b5ve@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build index 170e456..e84ced3 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -34,3 +34,4 @@ CFLAGS_builtin-timechart.o += $(paths) libperf-y += util/ libperf-y += arch/ +libperf-y += ui/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 96e3cdc..c658fa6 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -327,13 +327,6 @@ LIB_H += util/data.h LIB_H += util/kvm-stat.h LIB_H += util/thread-stack.h -LIB_OBJS += $(OUTPUT)ui/setup.o -LIB_OBJS += $(OUTPUT)ui/helpline.o -LIB_OBJS += $(OUTPUT)ui/progress.o -LIB_OBJS += $(OUTPUT)ui/util.o -LIB_OBJS += $(OUTPUT)ui/hist.o -LIB_OBJS += $(OUTPUT)ui/stdio/hist.o - PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) # We choose to avoid "if .. else if .. else .. endif endif" @@ -491,9 +484,6 @@ $(OUTPUT)%.o: %.S $(OUTPUT)%.s: %.S $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $< - $(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index e55d811..0739138 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -844,3 +844,4 @@ $(call detected_var,mandir_SQ) $(call detected_var,ETC_PERFCONFIG_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) +$(call detected_var,LIBDIR) diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build new file mode 100644 index 0000000..077b6a4 --- /dev/null +++ b/tools/perf/ui/Build @@ -0,0 +1,8 @@ +libperf-y += setup.o +libperf-y += helpline.o +libperf-y += progress.o +libperf-y += util.o +libperf-y += hist.o +libperf-y += stdio/hist.o + +CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" -- cgit v0.10.2 From cf15c74cbdccb8fb5ced91c6f24f9b3a68f9a82b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 00:27:52 +0100 Subject: perf build: Add slang objects building Move the slang objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-2ofo1r00jl6i143qxcl9n2jr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c658fa6..d9de8af 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -339,24 +339,6 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifndef NO_SLANG - LIB_OBJS += $(OUTPUT)ui/browser.o - LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o - LIB_OBJS += $(OUTPUT)ui/browsers/hists.o - LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o - LIB_OBJS += $(OUTPUT)ui/browsers/header.o - LIB_OBJS += $(OUTPUT)ui/tui/setup.o - LIB_OBJS += $(OUTPUT)ui/tui/util.o - LIB_OBJS += $(OUTPUT)ui/tui/helpline.o - LIB_OBJS += $(OUTPUT)ui/tui/progress.o - LIB_H += ui/tui/tui.h - LIB_H += ui/browser.h - LIB_H += ui/browsers/map.h - LIB_H += ui/keysyms.h - LIB_H += ui/libslang.h -endif - ifndef NO_GTK2 ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so @@ -484,21 +466,6 @@ $(OUTPUT)%.o: %.S $(OUTPUT)%.s: %.S $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - $(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 0739138..4b25111 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -479,6 +479,7 @@ ifndef NO_SLANG CFLAGS += -I/usr/include/slang CFLAGS += -DHAVE_SLANG_SUPPORT EXTLIBS += -lslang + $(call detected,CONFIG_SLANG) endif endif diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build index 077b6a4..0a73538 100644 --- a/tools/perf/ui/Build +++ b/tools/perf/ui/Build @@ -6,3 +6,9 @@ libperf-y += hist.o libperf-y += stdio/hist.o CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" + +libperf-$(CONFIG_SLANG) += browser.o +libperf-$(CONFIG_SLANG) += browsers/ +libperf-$(CONFIG_SLANG) += tui/ + +CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build new file mode 100644 index 0000000..de223f5 --- /dev/null +++ b/tools/perf/ui/browsers/Build @@ -0,0 +1,10 @@ +libperf-y += annotate.o +libperf-y += hists.o +libperf-y += map.o +libperf-y += scripts.o +libperf-y += header.o + +CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST +CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST +CFLAGS_map.o += -DENABLE_SLFUTURE_CONST +CFLAGS_scripts.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build new file mode 100644 index 0000000..9e4c6ca --- /dev/null +++ b/tools/perf/ui/tui/Build @@ -0,0 +1,4 @@ +libperf-y += setup.o +libperf-y += util.o +libperf-y += helpline.o +libperf-y += progress.o -- cgit v0.10.2 From 88aeea06ebd2192328c83519f07e5768681e29e7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 00:34:23 +0100 Subject: perf build: Add gtk objects building Move the gtk objects building under build framework. Add new gtk build object so it's separated from the rest of the code and could be librarized. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cd27z7vww85nxdq37rkjkkbm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build index e84ced3..a9ff8fd 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -35,3 +35,5 @@ CFLAGS_builtin-timechart.o += $(paths) libperf-y += util/ libperf-y += arch/ libperf-y += ui/ + +gtk-y += ui/gtk/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d9de8af..18ff8aa 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -341,14 +341,7 @@ endif ifndef NO_GTK2 ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so - - GTK_OBJS += $(OUTPUT)ui/gtk/browser.o - GTK_OBJS += $(OUTPUT)ui/gtk/hists.o - GTK_OBJS += $(OUTPUT)ui/gtk/setup.o - GTK_OBJS += $(OUTPUT)ui/gtk/util.o - GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o - GTK_OBJS += $(OUTPUT)ui/gtk/progress.o - GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o + GTK_IN := $(OUTPUT)gtk-in.o install-gtk: $(OUTPUT)libperf-gtk.so $(call QUIET_INSTALL, 'GTK UI') \ @@ -416,10 +409,10 @@ $(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) $(PERF_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ $(BUILTIN_OBJS) $(PERF_IN) $(LIBS) -o $@ -$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) - $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $< +$(GTK_IN): FORCE + @$(MAKE) $(build)=gtk -$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS) +$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) $(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt @@ -699,7 +692,7 @@ config-clean: @$(MAKE) -C config/feature-checks clean >/dev/null clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) @find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete @$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 4b25111..6b96782 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -846,3 +846,4 @@ $(call detected_var,ETC_PERFCONFIG_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) $(call detected_var,LIBDIR) +$(call detected_var,GTK_CFLAGS) diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build new file mode 100644 index 0000000..ec22e89 --- /dev/null +++ b/tools/perf/ui/gtk/Build @@ -0,0 +1,9 @@ +CFLAGS_gtk += -fPIC $(GTK_CFLAGS) + +gtk-y += browser.o +gtk-y += hists.o +gtk-y += setup.o +gtk-y += util.o +gtk-y += helpline.o +gtk-y += progress.o +gtk-y += annotate.o -- cgit v0.10.2 From c7355f842bf84ba7b1c9d6378f85bb53c99284b2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 13:11:32 +0100 Subject: perf build: Add scripts objects building Move the scripts objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ry8pd41ahwpq9h46i8te33c7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build index a9ff8fd..133ec64 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -35,5 +35,6 @@ CFLAGS_builtin-timechart.o += $(paths) libperf-y += util/ libperf-y += arch/ libperf-y += ui/ +libperf-y += scripts/ gtk-y += ui/gtk/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 18ff8aa..4705fa9 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -349,16 +349,6 @@ install-gtk: $(OUTPUT)libperf-gtk.so $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif -ifndef NO_LIBPERL - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o - LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o -endif - -ifndef NO_LIBPYTHON - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o - LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o -endif - ifeq ($(NO_PERF_REGS),0) ifeq ($(ARCH),x86) LIB_H += arch/x86/include/perf_regs.h @@ -459,18 +449,6 @@ $(OUTPUT)%.o: %.S $(OUTPUT)%.s: %.S $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< - -$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $< - -$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< - -$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< - $(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 6b96782..e92d1a4 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -518,6 +518,7 @@ else else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) + $(call detected,CONFIG_LIBPERL) endif endif @@ -577,6 +578,7 @@ else LDFLAGS += $(PYTHON_EMBED_LDFLAGS) EXTLIBS += $(PYTHON_EMBED_LIBADD) LANG_BINDINGS += $(obj-perf)python/perf.so + $(call detected,CONFIG_LIBPYTHON) endif endif endif @@ -847,3 +849,5 @@ $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) $(call detected_var,LIBDIR) $(call detected_var,GTK_CFLAGS) +$(call detected_var,PERL_EMBED_CCOPTS) +$(call detected_var,PYTHON_EMBED_CCOPTS) diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build new file mode 100644 index 0000000..41efd7e --- /dev/null +++ b/tools/perf/scripts/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ +libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build new file mode 100644 index 0000000..928e110 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build new file mode 100644 index 0000000..aefc15c --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 0401a80..b0a6542 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -88,6 +88,8 @@ libperf-$(CONFIG_DWARF) += dwarf-aux.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-y += scripting-engines/ + CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build new file mode 100644 index 0000000..6516e22 --- /dev/null +++ b/tools/perf/util/scripting-engines/Build @@ -0,0 +1,6 @@ +libperf-$(CONFIG_LIBPERL) += trace-event-perl.o +libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o + +CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default + +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -- cgit v0.10.2 From 3bc3374cc50ce533259b7efed261f3d68307113c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 13:30:04 +0100 Subject: perf build: Add perf regs objects building Move the regs objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-hgny792g5x5iaklc34aa57uh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4705fa9..7daccac 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -349,13 +349,6 @@ install-gtk: $(OUTPUT)libperf-gtk.so $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif -ifeq ($(NO_PERF_REGS),0) - ifeq ($(ARCH),x86) - LIB_H += arch/x86/include/perf_regs.h - endif - LIB_OBJS += $(OUTPUT)util/perf_regs.o -endif - ifndef NO_ZLIB LIB_OBJS += $(OUTPUT)util/zlib.o endif diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index e92d1a4..2fc4d56 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -48,6 +48,10 @@ ifeq ($(ARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(NO_PERF_REGS),0) + $(call detected,CONFIG_PERF_REGS) +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures diff --git a/tools/perf/util/Build b/tools/perf/util/Build index b0a6542..f698432 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -90,6 +90,8 @@ libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-y += scripting-engines/ +libperf-$(CONFIG_PERF_REGS) += perf_regs.o + CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" -- cgit v0.10.2 From 1571b695053c4ccad66c5151d78247a6590338d6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 13:31:12 +0100 Subject: perf build: Add zlib objects building Move the zlib objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cpbb47g82ahpa4yqfr9dcobq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7daccac..713f4d1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -349,10 +349,6 @@ install-gtk: $(OUTPUT)libperf-gtk.so $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif -ifndef NO_ZLIB - LIB_OBJS += $(OUTPUT)util/zlib.o -endif - ifdef ASCIIDOC8 export ASCIIDOC8 endif diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 2fc4d56..b97a7b9 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -640,6 +640,7 @@ ifndef NO_ZLIB ifeq ($(feature-zlib), 1) CFLAGS += -DHAVE_ZLIB_SUPPORT EXTLIBS += -lz + $(call detected,CONFIG_ZLIB) else NO_ZLIB := 1 endif diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f698432..4599188 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -91,6 +91,7 @@ libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-y += scripting-engines/ libperf-$(CONFIG_PERF_REGS) += perf_regs.o +libperf-$(CONFIG_ZLIB) += zlib.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" -- cgit v0.10.2 From cb4e67fdee46116d6ec5ad37316cf7ff35ad1d7c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 17:09:15 +0100 Subject: perf build: Add perf.o object building Move the perf object building under build framework to be included in the perf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-wiiciip2w6ajvj03huqz50xw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build index 133ec64..976e038 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -25,12 +25,15 @@ perf-$(CONFIG_LIBELF) += builtin-probe.o perf-y += bench/ perf-y += tests/ +perf-y += perf.o + paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))" paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" CFLAGS_builtin-help.o += $(paths) CFLAGS_builtin-timechart.o += $(paths) +CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE libperf-y += util/ libperf-y += arch/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 713f4d1..0a669f9 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -376,16 +376,11 @@ PERF_IN := $(OUTPUT)perf-in.o export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX build := -f $(srctree)/tools/build/Makefile.build dir=. obj -$(PERF_IN): $(OUTPUT)common-cmds.h FORCE +$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE @$(MAKE) $(build)=perf -$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - $(CFLAGS) -c $(filter %.c,$^) -o $@ - -$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) $(PERF_IN) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ +$(OUTPUT)perf: $(BUILTIN_OBJS) $(PERFLIBS) $(PERF_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ $(BUILTIN_OBJS) $(PERF_IN) $(LIBS) -o $@ $(GTK_IN): FORCE @@ -403,8 +398,7 @@ $(SCRIPTS) : % : %.sh $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@' # These can record PERF_VERSION -$(OUTPUT)perf.o perf.spec \ - $(SCRIPTS) \ +perf.spec $(SCRIPTS) \ : $(OUTPUT)PERF-VERSION-FILE .SUFFIXES: @@ -659,7 +653,7 @@ config-clean: @$(MAKE) -C config/feature-checks clean >/dev/null clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) @find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete @$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 -- cgit v0.10.2 From 5e8c0fb6a95728b852d56c0a9244425d474670c0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 14:03:40 +0100 Subject: perf build: Add arch x86 objects building Move the x86 arch objects building under build framework to be included in the libperf build object. Adding also arch/$(ARCH)/Build files for the rest of the archs. The reason for this is that in arch/Build we now do: +libperf-y += $(ARCH)/ which would make the build to fail on other architectures, because the build framework requires 'Build' file in nested directories and this patch adds it only for x86. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5enob06z07m7ew6nzzdmp3n2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index 304f5e7..109eb75 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1 +1,2 @@ libperf-y += common.o +libperf-y += $(ARCH)/ diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build new file mode 100644 index 0000000..e69de29 diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build new file mode 100644 index 0000000..e69de29 diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build new file mode 100644 index 0000000..e69de29 diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build new file mode 100644 index 0000000..e69de29 diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build new file mode 100644 index 0000000..e69de29 diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build new file mode 100644 index 0000000..e69de29 diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build new file mode 100644 index 0000000..41bf61d --- /dev/null +++ b/tools/perf/arch/x86/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 9b21881..21322e0 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -1,19 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o -endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o -LIB_H += arch/$(ARCH)/util/tsc.h HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build new file mode 100644 index 0000000..b30eff9 --- /dev/null +++ b/tools/perf/arch/x86/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build new file mode 100644 index 0000000..cfbccc4 --- /dev/null +++ b/tools/perf/arch/x86/util/Build @@ -0,0 +1,8 @@ +libperf-y += header.o +libperf-y += tsc.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -- cgit v0.10.2 From f6ff0e6d7bf41e8464b4a50ba48e0e1502ef8438 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 14:07:04 +0100 Subject: perf build: Add arch arm objects building Move the arm arch objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-7bxhmeh4bjabqsmxu4gl6p0b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build index e69de29..41bf61d 100644 --- a/tools/perf/arch/arm/Build +++ b/tools/perf/arch/arm/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 09d6215..7fbca17 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,14 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build new file mode 100644 index 0000000..b30eff9 --- /dev/null +++ b/tools/perf/arch/arm/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build new file mode 100644 index 0000000..d22e3d0 --- /dev/null +++ b/tools/perf/arch/arm/util/Build @@ -0,0 +1,4 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -- cgit v0.10.2 From 7db216181484d871fcebfab11cdd146aaf80bf94 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 14:09:08 +0100 Subject: perf build: Add arch arm64 objects building Move the arm64 arch objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ptqfz1op92yrtccjiww7h1v5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build index e69de29..54afe4a 100644 --- a/tools/perf/arch/arm64/Build +++ b/tools/perf/arch/arm64/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 67e9b3d..7fbca17 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,7 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o endif diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build new file mode 100644 index 0000000..e58123a8 --- /dev/null +++ b/tools/perf/arch/arm64/util/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -- cgit v0.10.2 From 07a39e11a0e783c3e9f2a653d4b0dcde954edd28 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 14:10:57 +0100 Subject: perf build: Add arch powerpc objects building Move the powerpc arch objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-nqrtlipvjptdyjfuzlnegqgu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build index e69de29..54afe4a 100644 --- a/tools/perf/arch/powerpc/Build +++ b/tools/perf/arch/powerpc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 6f7782b..7fbca17 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,6 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build new file mode 100644 index 0000000..0af6e9b --- /dev/null +++ b/tools/perf/arch/powerpc/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_DWARF) += skip-callchain-idx.o -- cgit v0.10.2 From 953bce80c01a3e475a5134e8ec410d6f39b9d188 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 14:12:21 +0100 Subject: perf build: Add arch s390 objects building Move the s390 arch objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-8f5tlfwegkirhir2ffz8nw3i@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build index e69de29..54afe4a 100644 --- a/tools/perf/arch/s390/Build +++ b/tools/perf/arch/s390/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 798ac73..21322e0 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,7 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build new file mode 100644 index 0000000..8a61372 --- /dev/null +++ b/tools/perf/arch/s390/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o -- cgit v0.10.2 From 61b021244b328e3cb5b08079b28e4e2742ff7656 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 14:13:25 +0100 Subject: perf build: Add arch sh objects building Move the sh arch objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-nsg1j4djtq85jtrqw830f2az@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build index e69de29..54afe4a 100644 --- a/tools/perf/arch/sh/Build +++ b/tools/perf/arch/sh/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile index 15130b50..7fbca17 100644 --- a/tools/perf/arch/sh/Makefile +++ b/tools/perf/arch/sh/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build new file mode 100644 index 0000000..954e287 --- /dev/null +++ b/tools/perf/arch/sh/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o -- cgit v0.10.2 From 6d8e62c302bb9285e8882116fc317916ece5d0ab Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 14:14:20 +0100 Subject: perf build: Add arch sparc objects building Move the sparc arch objects building under build framework to be included in the libperf build object. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-160hknrqr27c9zf59japw91y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build index e69de29..54afe4a 100644 --- a/tools/perf/arch/sparc/Build +++ b/tools/perf/arch/sparc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 15130b50..7fbca17 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build new file mode 100644 index 0000000..954e287 --- /dev/null +++ b/tools/perf/arch/sparc/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o -- cgit v0.10.2 From 1999307b469bdfda97baa78c7f4ecf3800fdbacd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 18:44:38 +0100 Subject: perf build: Add single target build framework support Add support to build single targets, like: $ make util/map.o # objects $ make util/map.i # preprocessor $ make util/map.s # assembly Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-tt10y0dmweq6rjaod937rpb4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index aced86d..b5ded20 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -49,6 +49,12 @@ quiet_cmd_mkdir = MKDIR $(dir $@) quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< +quiet_cmd_cc_i_c = CPP $@ + cmd_cc_i_c = $(CC) $(c_flags) -E -o $@ $< + +quiet_cmd_cc_s_c = AS $@ + cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< + # Link agregate command # If there's nothing to link, create empty $@ object. quiet_cmd_ld_multi = LD $@ @@ -64,6 +70,18 @@ $(OUTPUT)%.o: %.S FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) +$(OUTPUT)%.i: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.i: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.s: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_s_c) + # Gather build data: # obj-y - list of build objects # subdir-y - list of directories to nest @@ -100,7 +118,7 @@ FORCE: # Include all cmd files to get all the dependency rules # for all objects included -targets := $(wildcard $(sort $(obj-y) $(in-target))) +targets := $(wildcard $(sort $(obj-y) $(in-target) $(MAKECMDGOALS))) cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) ifneq ($(cmd_files),) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0a669f9..ef637e9 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -421,16 +421,33 @@ endif # These two need to be here so that when O= is not used they take precedence # over the general rule for .o -$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $< -$(OUTPUT)%.o: %.S - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.s: %.S - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< +# get relative building directory (to $(OUTPUT)) +# and '.' if it's $(OUTPUT) itself +__build-dir = $(subst $(OUTPUT),,$(dir $@)) +build-dir = $(if $(__build-dir),$(__build-dir),.) + +single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h + +$(OUTPUT)%.o: %.c single_dep FORCE + @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + +$(OUTPUT)%.i: %.c single_dep FORCE + @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + +$(OUTPUT)%.s: %.c single_dep FORCE + @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + +$(OUTPUT)%-bison.o: %.c single_dep FORCE + @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + +$(OUTPUT)%-flex.o: %.c single_dep FORCE + @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + +$(OUTPUT)%.o: %.S single_dep FORCE + @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + +$(OUTPUT)%.i: %.S single_dep FORCE + @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -675,5 +692,5 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS FORCE +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS FORCE single_dep diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4599188..32f9327 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -97,15 +97,19 @@ CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c + $(call rule_mkdir) @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l $(OUTPUT)util/parse-events-bison.c: util/parse-events.y + $(call rule_mkdir) @$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c + $(call rule_mkdir) @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l $(OUTPUT)util/pmu-bison.c: util/pmu.y + $(call rule_mkdir) @$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ CFLAGS_parse-events-flex.o += -w @@ -122,13 +126,17 @@ CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_parse-events.o += -Wno-redundant-decls $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE + $(call rule_mkdir) $(call if_changed_dep,cc_o_c) $(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE + $(call rule_mkdir) $(call if_changed_dep,cc_o_c) $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE + $(call rule_mkdir) $(call if_changed_dep,cc_o_c) $(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE + $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -- cgit v0.10.2 From 64f72f3b7316793ba03bc38f5c3cfc627068afe4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 16:48:54 +0100 Subject: perf build: Remove directory dependency rules Removing subdirectories creation support from Makefile.perf as it's no longer needed, since it's properly handled by new build system. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-2i8x5hdllpm6cyhfh1cr88hv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index ef637e9..b282cbc 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -465,21 +465,6 @@ endif $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) $(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) -# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So -# we depend the various files onto their directories. -DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS) -DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -# no need to add flex objects, because they depend on bison ones -DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c -DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c - -OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS))) - -$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES) -# In the second step, we make a rule to actually create these directories -$(OUTPUT_DIRECTORIES): - $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null - LIBPERF_IN := $(OUTPUT)libperf-in.o $(LIBPERF_IN): FORCE -- cgit v0.10.2 From 8e499ac5376c03fa3ff90ddd7def3fee175aa4b6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 16:51:35 +0100 Subject: perf build: Remove uneeded variables Removing uneeded variables from Makefile.perf: BUILTIN_OBJS LIB_OBJS GTK_OBJS - objects are now hold by in the build Makefiles LIB_H - header dependencies iare now handled by Build framework Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-o85k0klhwqh3fmvryfgcpr95@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index b282cbc..d49f793 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -137,10 +137,6 @@ export prefix bindir sharedir sysconfdir DESTDIR SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ # Guard against environment variables -BUILTIN_OBJS = -LIB_H = -LIB_OBJS = -GTK_OBJS = PYRF_OBJS = SCRIPT_SH = @@ -218,115 +214,6 @@ export PERL_PATH LIB_FILE=$(OUTPUT)libperf.a -LIB_H += ../lib/symbol/kallsyms.h -LIB_H += ../../include/uapi/linux/perf_event.h -LIB_H += ../../include/linux/rbtree.h -LIB_H += ../../include/linux/list.h -LIB_H += ../../include/uapi/linux/const.h -LIB_H += ../include/linux/hash.h -LIB_H += ../../include/linux/stringify.h -LIB_H += util/include/linux/bitmap.h -LIB_H += ../include/linux/bitops.h -LIB_H += ../include/asm-generic/bitops/arch_hweight.h -LIB_H += ../include/asm-generic/bitops/atomic.h -LIB_H += ../include/asm-generic/bitops/const_hweight.h -LIB_H += ../include/asm-generic/bitops/find.h -LIB_H += ../include/asm-generic/bitops/fls64.h -LIB_H += ../include/asm-generic/bitops/fls.h -LIB_H += ../include/asm-generic/bitops/__ffs.h -LIB_H += ../include/asm-generic/bitops/__fls.h -LIB_H += ../include/asm-generic/bitops/hweight.h -LIB_H += ../include/asm-generic/bitops.h -LIB_H += ../include/linux/compiler.h -LIB_H += ../include/linux/log2.h -LIB_H += util/include/linux/const.h -LIB_H += util/include/linux/ctype.h -LIB_H += util/include/linux/kernel.h -LIB_H += util/include/linux/list.h -LIB_H += ../include/linux/export.h -LIB_H += util/include/linux/poison.h -LIB_H += util/include/linux/rbtree.h -LIB_H += util/include/linux/rbtree_augmented.h -LIB_H += util/include/linux/string.h -LIB_H += ../include/linux/types.h -LIB_H += util/include/linux/linkage.h -LIB_H += util/include/asm/asm-offsets.h -LIB_H += ../include/asm/bug.h -LIB_H += util/include/asm/byteorder.h -LIB_H += util/include/asm/swab.h -LIB_H += util/include/asm/system.h -LIB_H += util/include/asm/uaccess.h -LIB_H += util/include/dwarf-regs.h -LIB_H += util/include/asm/dwarf2.h -LIB_H += util/include/asm/cpufeature.h -LIB_H += util/include/asm/unistd_32.h -LIB_H += util/include/asm/unistd_64.h -LIB_H += perf.h -LIB_H += util/annotate.h -LIB_H += util/cache.h -LIB_H += util/callchain.h -LIB_H += util/build-id.h -LIB_H += util/db-export.h -LIB_H += util/debug.h -LIB_H += util/pmu.h -LIB_H += util/event.h -LIB_H += util/evsel.h -LIB_H += util/evlist.h -LIB_H += util/exec_cmd.h -LIB_H += util/find-vdso-map.c -LIB_H += util/levenshtein.h -LIB_H += util/machine.h -LIB_H += util/map.h -LIB_H += util/parse-options.h -LIB_H += util/parse-events.h -LIB_H += util/quote.h -LIB_H += util/util.h -LIB_H += util/xyarray.h -LIB_H += util/header.h -LIB_H += util/help.h -LIB_H += util/session.h -LIB_H += util/ordered-events.h -LIB_H += util/strbuf.h -LIB_H += util/strlist.h -LIB_H += util/strfilter.h -LIB_H += util/svghelper.h -LIB_H += util/tool.h -LIB_H += util/run-command.h -LIB_H += util/sigchain.h -LIB_H += util/dso.h -LIB_H += util/symbol.h -LIB_H += util/color.h -LIB_H += util/values.h -LIB_H += util/sort.h -LIB_H += util/hist.h -LIB_H += util/comm.h -LIB_H += util/thread.h -LIB_H += util/thread_map.h -LIB_H += util/trace-event.h -LIB_H += util/probe-finder.h -LIB_H += util/dwarf-aux.h -LIB_H += util/probe-event.h -LIB_H += util/pstack.h -LIB_H += util/cpumap.h -LIB_H += util/top.h -LIB_H += $(ARCH_INCLUDE) -LIB_H += util/cgroup.h -LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h -LIB_H += util/target.h -LIB_H += util/rblist.h -LIB_H += util/intlist.h -LIB_H += util/perf_regs.h -LIB_H += util/unwind.h -LIB_H += util/vdso.h -LIB_H += util/tsc.h -LIB_H += ui/helpline.h -LIB_H += ui/progress.h -LIB_H += ui/util.h -LIB_H += ui/ui.h -LIB_H += util/data.h -LIB_H += util/kvm-stat.h -LIB_H += util/thread-stack.h - PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) # We choose to avoid "if .. else if .. else .. endif endif" @@ -379,9 +266,8 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE @$(MAKE) $(build)=perf -$(OUTPUT)perf: $(BUILTIN_OBJS) $(PERFLIBS) $(PERF_IN) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ - $(BUILTIN_OBJS) $(PERF_IN) $(LIBS) -o $@ +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ $(GTK_IN): FORCE @$(MAKE) $(build)=gtk @@ -462,15 +348,14 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c endif -$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) +$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o $(LIBPERF_IN): FORCE @$(MAKE) $(build)=libperf -$(LIB_FILE): $(LIB_OBJS) $(LIBPERF_IN) +$(LIB_FILE): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) # libtraceevent.a @@ -655,7 +540,7 @@ config-clean: @$(MAKE) -C config/feature-checks clean >/dev/null clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) @find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete @$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 -- cgit v0.10.2 From db8486626246f86d8f6f77ff0020c68a6dda2e23 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 30 Dec 2014 19:02:51 +0100 Subject: perf build: Remove PERF-CFLAGS file Removing PERF-CFLAGS file, because the build framework stores full build command line for each object and triggers rebuilt if necessary. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-99hamnd2msiwgsi78yauihhd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d49f793..710731b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -365,7 +365,7 @@ LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT) LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)" LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS +$(LIBTRACEEVENT): $(TE_SOURCES) $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins $(LIBTRACEEVENT)-clean: @@ -444,17 +444,6 @@ cscope: $(QUIET_GEN)$(RM) cscope*; \ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) -### Detect prefix changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ - $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ) - -$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or prefix"; \ - echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ - fi - ### Testing rules # GNU make supports exporting all variables by "export" without parameters. @@ -544,7 +533,7 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean @find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete @$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) @@ -562,5 +551,5 @@ FORCE: .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS FORCE single_dep +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep -- cgit v0.10.2 From f819f703a42eed63443cef796b1852e6baf985bd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 14 Jan 2015 19:05:27 +0100 Subject: perf build: Add build documentation Adding file describing the basics of perf build process. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ibgf7vxyduwohlqqfayl11xb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt new file mode 100644 index 0000000..f6fc650 --- /dev/null +++ b/tools/perf/Documentation/Build.txt @@ -0,0 +1,49 @@ + +1) perf build +============= +The perf build process consists of several separated building blocks, +which are linked together to form the perf binary: + - libperf library (static) + - perf builtin commands + - traceevent library (static) + - GTK ui library + +Several makefiles govern the perf build: + + - Makefile + top level Makefile working as a wrapper that calls the main + Makefile.perf with a -j option to do parallel builds. + + - Makefile.perf + main makefile that triggers build of all perf objects including + installation and documentation processing. + + - tools/build/Makefile.build + main makefile of the build framework + + - tools/build/Build.include + build framework generic definitions + + - Build makefiles + makefiles that defines build objects + +Please refer to tools/build/Documentation/Build.txt for more +information about build framework. + + +2) perf build +============= +The Makefile.perf triggers the build framework for build objects: + perf, libperf, gtk + +resulting in following objects: + $ ls *-in.o + gtk-in.o libperf-in.o perf-in.o + +Those objects are then used in final linking: + libperf-gtk.so <- gtk-in.o libperf-in.o + perf <- perf-in.o libperf-in.o + + +NOTE this description is omitting other libraries involved, only + focusing on build framework outcomes -- cgit v0.10.2 From b4f9166847354cb839c275c062c6b17afba49211 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Jan 2015 16:38:25 +0100 Subject: tools lib api: Use tools build framework Move the libapikfs library building under tools build framework. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-xjo8r7nuqy9mvlfrmx9zcfwb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/api/Build b/tools/lib/api/Build new file mode 100644 index 0000000..64dd8d4 --- /dev/null +++ b/tools/lib/api/Build @@ -0,0 +1,2 @@ +libapikfs-y += fd/ +libapikfs-y += fs/ diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 212aa4f..1aa47c2 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -1,53 +1,43 @@ include ../../scripts/Makefile.include include ../../perf/config/utilities.mak # QUIET_CLEAN +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar -# guard against environment variables -LIB_H= -LIB_OBJS= - -LIB_H += fs/debugfs.h -LIB_H += fs/tracefs.h -LIB_H += fs/findfs.h -LIB_H += fs/fs.h -# See comment below about piggybacking... -LIB_H += fd/array.h - -LIB_OBJS += $(OUTPUT)fs/debugfs.o -LIB_OBJS += $(OUTPUT)fs/tracefs.o -LIB_OBJS += $(OUTPUT)fs/findfs.o -LIB_OBJS += $(OUTPUT)fs/fs.o -# XXX piggybacking here, need to introduce libapikfd, or rename this -# to plain libapik.a and make it have it all api goodies -LIB_OBJS += $(OUTPUT)fd/array.o +MAKEFLAGS += --no-print-directory -LIBFILE = libapikfs.a +LIBFILE = $(OUTPUT)libapikfs.a -CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC -EXTLIBS = -lelf -lpthread -lrt -lm -ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ALL_LDFLAGS = $(LDFLAGS) +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f -$(LIBFILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $(OUTPUT)$@ $(LIB_OBJS) +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +API_IN := $(OUTPUT)libapikfs-in.o -$(LIB_OBJS): $(LIB_H) +export srctree OUTPUT CC LD CFLAGS V -libapi_dirs: - $(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs +all: $(LIBFILE) -$(OUTPUT)%.o: %.c libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< -$(OUTPUT)%.s: %.c libapi_dirs - $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< -$(OUTPUT)%.o: %.S libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< +$(API_IN): FORCE + @$(MAKE) $(build)=libapikfs + +$(LIBFILE): $(API_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) clean: - $(call QUIET_CLEAN, libapi) $(RM) $(LIB_OBJS) $(LIBFILE) + $(call QUIET_CLEAN, libapikfs) $(RM) $(LIBFILE); \ + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM) + +FORCE: -.PHONY: clean +.PHONY: clean FORCE diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build new file mode 100644 index 0000000..4ddcb0f --- /dev/null +++ b/tools/lib/api/fd/Build @@ -0,0 +1 @@ +libapikfs-y += array.o diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build new file mode 100644 index 0000000..5f0fe4d --- /dev/null +++ b/tools/lib/api/fs/Build @@ -0,0 +1,4 @@ +libapikfs-y += fs.o +libapikfs-y += findfs.o +libapikfs-y += debugfs.o +libapikfs-y += tracefs.o diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 710731b..cc733ac 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -375,20 +375,12 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: $(LIBTRACEEVENT) $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins -LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch]) - -# if subdir is set, we've been called from above so target has been built -# already -$(LIBAPIKFS): $(LIBAPIKFS_SOURCES) -ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a -endif +$(LIBAPIKFS): FORCE + @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapikfs.a $(LIBAPIKFS)-clean: -ifeq ($(subdir),) $(call QUIET_CLEAN, libapikfs) @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -endif help: @echo 'Perf make targets:' -- cgit v0.10.2 From 285a8f247b08c2aff83633fb82c217f91455d10b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 10 Jan 2015 20:53:13 +0100 Subject: tools lib api: Rename libapikfs.a to libapi.a Renaming libapikfs.a to libapi.a, because it's not just 'fs' specific library now. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-g1mk5oj2ayq4vn653ovfg3gv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/api/Build b/tools/lib/api/Build index 64dd8d4..3653965 100644 --- a/tools/lib/api/Build +++ b/tools/lib/api/Build @@ -1,2 +1,2 @@ -libapikfs-y += fd/ -libapikfs-y += fs/ +libapi-y += fd/ +libapi-y += fs/ diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 1aa47c2..d8fe29f 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -13,7 +13,7 @@ AR = $(CROSS_COMPILE)ar MAKEFLAGS += --no-print-directory -LIBFILE = $(OUTPUT)libapikfs.a +LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 -fPIC @@ -22,20 +22,20 @@ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f build := -f $(srctree)/tools/build/Makefile.build dir=. obj -API_IN := $(OUTPUT)libapikfs-in.o +API_IN := $(OUTPUT)libapi-in.o export srctree OUTPUT CC LD CFLAGS V all: $(LIBFILE) $(API_IN): FORCE - @$(MAKE) $(build)=libapikfs + @$(MAKE) $(build)=libapi $(LIBFILE): $(API_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) clean: - $(call QUIET_CLEAN, libapikfs) $(RM) $(LIBFILE); \ + $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM) FORCE: diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build index 4ddcb0f..605d99f 100644 --- a/tools/lib/api/fd/Build +++ b/tools/lib/api/fd/Build @@ -1 +1 @@ -libapikfs-y += array.o +libapi-y += array.o diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build index 5f0fe4d..6de5a4f 100644 --- a/tools/lib/api/fs/Build +++ b/tools/lib/api/fs/Build @@ -1,4 +1,4 @@ -libapikfs-y += fs.o -libapikfs-y += findfs.o -libapikfs-y += debugfs.o -libapikfs-y += tracefs.o +libapi-y += fs.o +libapi-y += debugfs.o +libapi-y += findfs.o +libapi-y += tracefs.o diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index cc733ac..c908840 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -161,8 +161,8 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT -LIBAPIKFS = $(LIB_PATH)libapikfs.a -export LIBAPIKFS +LIBAPI = $(LIB_PATH)libapi.a +export LIBAPI # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ @@ -173,7 +173,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS) +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \ @@ -214,7 +214,7 @@ export PERL_PATH LIB_FILE=$(OUTPUT)libperf.a -PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) +PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If @@ -375,11 +375,11 @@ $(LIBTRACEEVENT)-clean: install-traceevent-plugins: $(LIBTRACEEVENT) $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins -$(LIBAPIKFS): FORCE - @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapikfs.a +$(LIBAPI): FORCE + @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a -$(LIBAPIKFS)-clean: - $(call QUIET_CLEAN, libapikfs) +$(LIBAPI)-clean: + $(call QUIET_CLEAN, libapi) @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null help: @@ -520,7 +520,7 @@ config-clean: $(call QUIET_CLEAN, config) @$(MAKE) -C config/feature-checks clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean +clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) @find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete @$(RM) .config-detected diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index d0aee4b..1833103 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -25,7 +25,7 @@ cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') -libapikfs = getenv('LIBAPIKFS') +libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] -- cgit v0.10.2 From 2d58ab9bdb1958e94f1007882d67c77edda810c0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 7 Jan 2015 18:39:45 +0100 Subject: tools lib traceevent: Use tools build framework Move the libtraceevent library building under tools build framework. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-opvx59tcawlmm916lg4aff4h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build new file mode 100644 index 0000000..c681d05 --- /dev/null +++ b/tools/lib/traceevent/Build @@ -0,0 +1,17 @@ +libtraceevent-y += event-parse.o +libtraceevent-y += event-plugin.o +libtraceevent-y += trace-seq.o +libtraceevent-y += parse-filter.o +libtraceevent-y += parse-utils.o +libtraceevent-y += kbuffer-parse.o + +plugin_jbd2-y += plugin_jbd2.o +plugin_hrtimer-y += plugin_hrtimer.o +plugin_kmem-y += plugin_kmem.o +plugin_kvm-y += plugin_kvm.o +plugin_mac80211-y += plugin_mac80211.o +plugin_sched_switch-y += plugin_sched_switch.o +plugin_function-y += plugin_function.o +plugin_xen-y += plugin_xen.o +plugin_scsi-y += plugin_scsi.o +plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 005c9cc..d410da3 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -67,7 +67,7 @@ PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' endif -include $(if $(BUILD_SRC),$(BUILD_SRC)/)../../scripts/Makefile.include +include ../../scripts/Makefile.include # copy a bit from Linux kbuild @@ -78,40 +78,13 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \ - BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1 -endef - -all: sub-make - -$(MAKECMDGOALS): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) -objtree := $(CURDIR) -src := $(srctree) -obj := $(objtree) - export prefix bindir src obj # Shell quotes @@ -132,16 +105,19 @@ EXTRAVERSION = $(EP_EXTRAVERSION) OBJ = $@ N = -export Q VERBOSE - EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) -INCLUDES = -I. -I $(srctree)/../../include $(CONFIG_INCLUDES) +INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) -# Set compile option CFLAGS if not set elsewhere -CFLAGS ?= -g -Wall +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif # Append required CFLAGS +override CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) override CFLAGS += $(udis86-flags) -D_GNU_SOURCE @@ -151,74 +127,58 @@ else Q = @ endif -do_compile_shared_library = \ - ($(print_shared_lib_compile) \ - $(CC) --shared $^ -o $@) - -do_plugin_build = \ - ($(print_plugin_build) \ - $(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<) - -do_build_static_lib = \ - ($(print_static_lib_build) \ - $(RM) $@; $(AR) rcs $@ $^) - - -do_compile = $(QUIET_CC)$(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; +# Disable command line variables (CFLAGS) overide from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= -$(obj)/%.o: $(src)/%.c - $(call do_compile) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -%.o: $(src)/%.c - $(call do_compile) +PLUGINS = plugin_jbd2.so +PLUGINS += plugin_hrtimer.so +PLUGINS += plugin_kmem.so +PLUGINS += plugin_kvm.so +PLUGINS += plugin_mac80211.so +PLUGINS += plugin_sched_switch.so +PLUGINS += plugin_function.so +PLUGINS += plugin_xen.so +PLUGINS += plugin_scsi.so +PLUGINS += plugin_cfg80211.so -PEVENT_LIB_OBJS = event-parse.o -PEVENT_LIB_OBJS += event-plugin.o -PEVENT_LIB_OBJS += trace-seq.o -PEVENT_LIB_OBJS += parse-filter.o -PEVENT_LIB_OBJS += parse-utils.o -PEVENT_LIB_OBJS += kbuffer-parse.o +PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) +PLUGINS_IN := $(PLUGINS:.so=-in.o) -PLUGIN_OBJS = plugin_jbd2.o -PLUGIN_OBJS += plugin_hrtimer.o -PLUGIN_OBJS += plugin_kmem.o -PLUGIN_OBJS += plugin_kvm.o -PLUGIN_OBJS += plugin_mac80211.o -PLUGIN_OBJS += plugin_sched_switch.o -PLUGIN_OBJS += plugin_function.o -PLUGIN_OBJS += plugin_xen.o -PLUGIN_OBJS += plugin_scsi.o -PLUGIN_OBJS += plugin_cfg80211.o - -PLUGINS := $(PLUGIN_OBJS:.o=.so) - -ALL_OBJS = $(PEVENT_LIB_OBJS) $(PLUGIN_OBJS) +TE_IN := $(OUTPUT)libtraceevent-in.o +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) CMD_TARGETS = $(LIB_FILE) $(PLUGINS) TARGETS = $(CMD_TARGETS) - all: all_cmd all_cmd: $(CMD_TARGETS) -libtraceevent.so: $(PEVENT_LIB_OBJS) +$(TE_IN): force + $(Q)$(MAKE) $(build)=libtraceevent + +$(OUTPUT)libtraceevent.so: $(TE_IN) $(QUIET_LINK)$(CC) --shared $^ -o $@ -libtraceevent.a: $(PEVENT_LIB_OBJS) +$(OUTPUT)libtraceevent.a: $(TE_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ plugins: $(PLUGINS) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@ +__plugin_obj = $(notdir $@) + plugin_obj = $(__plugin_obj:-in.o=) -$(PLUGIN_OBJS): %.o : $(src)/%.c - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) -fPIC -o $@ $< +$(PLUGINS_IN): force + $(Q)$(MAKE) $(build)=$(plugin_obj) -$(PLUGINS): %.so: %.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $< +$(OUTPUT)%.so: $(OUTPUT)%-in.o + $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^ define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ @@ -255,40 +215,6 @@ define update_dir fi); endef -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - -TRACEEVENT-CFLAGS: force - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or cross compiler"; \ - echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ - fi - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -327,14 +253,9 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \ $(RM) TRACEEVENT-CFLAGS tags TAGS -endif # skip-makefile - PHONY += force plugins force: -plugins: - @echo > /dev/null - # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c908840..4eeec57 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -358,22 +358,17 @@ $(LIBPERF_IN): FORCE $(LIB_FILE): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -# libtraceevent.a -TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) - -LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT) -LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)" LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): $(TE_SOURCES) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins +$(LIBTRACEEVENT): FORCE + @$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null install-traceevent-plugins: $(LIBTRACEEVENT) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins + @$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins $(LIBAPI): FORCE @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a -- cgit v0.10.2 From 9244e2c673fb148abb7706e47b602d009c537c9b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 Jan 2015 17:11:04 +0100 Subject: tools lib lockdep: Use tools build framework Move the lockdep library building under tools build framework. Signed-off-by: Jiri Olsa Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: S. Lockwood-Childs Cc: Sasha Levin Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-i0t25buqyo5jfvzpw2347h1h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build new file mode 100644 index 0000000..6f66735 --- /dev/null +++ b/tools/lib/lockdep/Build @@ -0,0 +1 @@ +liblockdep-y += common.o lockdep.o preload.o rbtree.o diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 52f9279..8c3340a 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -35,6 +35,10 @@ bindir = $(prefix)/$(bindir_relative) export DESTDIR DESTDIR_SQ INSTALL +MAKEFLAGS += --no-print-directory + +include ../../scripts/Makefile.include + # copy a bit from Linux kbuild ifeq ("$(origin V)", "command line") @@ -44,56 +48,21 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(BUILD_OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \ - BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1 -endef - -saved-output := $(BUILD_OUTPUT) -BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd) -$(if $(BUILD_OUTPUT),, \ - $(error output directory "$(saved-output)" does not exist)) - -all: sub-make - -gui: force - $(call build_output, all_cmd) - -$(filter-out gui,$(MAKECMDGOALS)): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # BUILD_OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(realpath $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))) -objtree := $(realpath $(CURDIR)) -src := $(srctree) -obj := $(objtree) - -export prefix libdir bindir src obj - # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) bindir_SQ = $(subst ','\'',$(bindir)) -LIB_FILE = liblockdep.a liblockdep.so.$(LIBLOCKDEP_VERSION) +LIB_IN := $(OUTPUT)liblockdep-in.o + BIN_FILE = lockdep +LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION) CONFIG_INCLUDES = CONFIG_LIBS = @@ -108,33 +77,23 @@ INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include -I../../include $(C # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g +CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) ifeq ($(VERBOSE),1) Q = - print_compile = - print_app_build = - print_fpic_compile = print_shared_lib_compile = print_install = else Q = @ - print_compile = echo ' CC '$(OBJ); - print_app_build = echo ' BUILD '$(OBJ); - print_fpic_compile = echo ' CC FPIC '$(OBJ); - print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); - print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; + print_shared_lib_compile = echo ' LD '$(OBJ); + print_static_lib_build = echo ' LD '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif -do_fpic_compile = \ - ($(print_fpic_compile) \ - $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@) - -do_app_build = \ - ($(print_app_build) \ - $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS)) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj do_compile_shared_library = \ ($(print_shared_lib_compile) \ @@ -144,22 +103,6 @@ do_build_static_lib = \ ($(print_static_lib_build) \ $(RM) $@; $(AR) rcs $@ $^) - -define do_compile - $(print_compile) \ - $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; -endef - -$(obj)/%.o: $(src)/%.c - $(Q)$(call do_compile) - -%.o: $(src)/%.c - $(Q)$(call do_compile) - -PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o - -ALL_OBJS = $(PEVENT_LIB_OBJS) - CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) @@ -169,42 +112,15 @@ all: all_cmd all_cmd: $(CMD_TARGETS) -liblockdep.so.$(LIBLOCKDEP_VERSION): $(PEVENT_LIB_OBJS) +$(LIB_IN): force + $(Q)$(MAKE) $(build)=liblockdep + +liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) $(Q)$(do_compile_shared_library) -liblockdep.a: $(PEVENT_LIB_OBJS) +liblockdep.a: $(LIB_IN) $(Q)$(do_build_static_lib) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c - $(Q)$(do_fpic_compile) - -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -233,8 +149,6 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d $(RM) tags TAGS -endif # skip-makefile - PHONY += force force: -- cgit v0.10.2 From ceed252fe0b8b7975845ed4cb9e6069d8a12f233 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 11 Jan 2015 23:59:55 +0100 Subject: perf build: Display make commands on V=1 Get more verbose output wrt displaying executed commands from make. Signed-off-by: Jiri Olsa Tested-by: Sukadev Bhattiprolu Tested-by: Will Deacon Cc: Alexis Berlemont Cc: Borislav Petkov Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-68v67h59zoz7ilb1ggcuff3j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index b5ded20..10df572 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -13,8 +13,10 @@ __build: ifeq ($(V),1) quiet = + Q = else quiet=quiet_ + Q=@ endif build-dir := $(srctree)/tools/build @@ -102,7 +104,7 @@ in-target := $(prefix)$(obj)-in.o PHONY += $(subdir-y) $(subdir-y): - @$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj) + $(Q)$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj) $(sort $(subdir-obj-y)): $(subdir-y) ; diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4eeec57..efc5158 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -89,13 +89,19 @@ VPATH += $(OUTPUT) export VPATH endif +ifeq ($(V),1) + Q = +else + Q = @ +endif + # Do not use make's built-in rules # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD - @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) - @touch $(OUTPUT)PERF-VERSION-FILE + $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) + $(Q)touch $(OUTPUT)PERF-VERSION-FILE CC = $(CROSS_COMPILE)gcc LD = $(CROSS_COMPILE)ld @@ -251,7 +257,7 @@ SHELL = $(SHELL_PATH) all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) please_set_SHELL_PATH_to_a_more_modern_shell: - @$$(:) + $(Q)$$(:) shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell @@ -264,13 +270,13 @@ export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE - @$(MAKE) $(build)=perf + $(Q)$(MAKE) $(build)=perf $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ $(GTK_IN): FORCE - @$(MAKE) $(build)=gtk + $(Q)$(MAKE) $(build)=gtk $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) @@ -315,25 +321,25 @@ build-dir = $(if $(__build-dir),$(__build-dir),.) single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h $(OUTPUT)%.o: %.c single_dep FORCE - @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)%.i: %.c single_dep FORCE - @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)%.s: %.c single_dep FORCE - @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)%-bison.o: %.c single_dep FORCE - @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)%-flex.o: %.c single_dep FORCE - @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)%.o: %.S single_dep FORCE - @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)%.i: %.S single_dep FORCE - @$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -353,7 +359,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o $(LIBPERF_IN): FORCE - @$(MAKE) $(build)=libperf + $(Q)$(MAKE) $(build)=libperf $(LIB_FILE): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) @@ -361,21 +367,21 @@ $(LIB_FILE): $(LIBPERF_IN) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) $(LIBTRACEEVENT): FORCE - @$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) - @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null install-traceevent-plugins: $(LIBTRACEEVENT) - @$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins $(LIBAPI): FORCE - @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) - @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null help: @echo 'Perf make targets:' @@ -513,12 +519,12 @@ $(INSTALL_DOC_TARGETS): # config-clean: $(call QUIET_CLEAN, config) - @$(MAKE) -C config/feature-checks clean >/dev/null + $(Q)$(MAKE) -C config/feature-checks clean >/dev/null clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) - @find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete - @$(RM) .config-detected + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean -- cgit v0.10.2 From 3a03005ff9445834f3d3b577a11bcbdbdf7a89cf Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 13 Feb 2015 21:11:52 +0800 Subject: perf tools: Fix a bug of segmentation fault Fix the 'segmentation fault' bug of 'perf list --list-cmds', which also happens in other cases (e.g. record, report ...). This bug happens when there are no cmds to list at all. Example: Before this patch: $ perf list --list-cmds Segmentation fault $ After this patch: $ perf list --list-cmds $ As shown above, the result prints nothing rather than a segmentation fault. The null result means 'perf list' has no cmds to display at this time. Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1423833115-11199-5-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 4a015f7..4ee9a86 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -510,8 +510,10 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o } exit(130); case PARSE_OPT_LIST_SUBCMDS: - for (int i = 0; subcommands[i]; i++) - printf("%s ", subcommands[i]); + if (subcommands) { + for (int i = 0; subcommands[i]; i++) + printf("%s ", subcommands[i]); + } exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { -- cgit v0.10.2 From 74390aa5567827add5058a3b26eff0ed06a629ba Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Tue, 27 Jan 2015 17:55:12 +0800 Subject: perf: Remove the extra validity check on nr_pages The function is_power_of_2() also do the check on nr_pages, so the first check performed is unnecessary. On the other hand, the key point is to ensure @nr_pages is a power-of-two number and mostly @nr_pages is a nonzero value, so in the most cases, the function is_power_of_2() will be called. Signed-off-by: Kaixu Xia Cc: Peter Zijlstra Cc: Paul Mackerras Link: http://lkml.kernel.org/r/1422352512-75150-1-git-send-email-xiakaixu@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/kernel/events/core.c b/kernel/events/core.c index 7f2fbb8..0969c9b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4420,7 +4420,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) * If we have rb pages ensure they're a power-of-two number, so we * can do bitmasks instead of modulo. */ - if (nr_pages != 0 && !is_power_of_2(nr_pages)) + if (!is_power_of_2(nr_pages)) return -EINVAL; if (vma_size != PAGE_SIZE * (1 + nr_pages)) -- cgit v0.10.2 From 619a303c1b8bd22abc549477d038ef9b5c1fe1bd Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 13 Feb 2015 21:11:55 +0800 Subject: perf list: Place the header text in its right position The hearer text 'List of pre-defined events (to be used in -e):' is placed in an improper function, which causes an abnormal output, e.g. 'perf list hw' shows no guiding text at all, and 'perf list hw L1-dcache*' shows the guiding text incorrectly in the middle of the output. Example Before this patch: $ perf list hw L1-dcache* branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cache-misses [Hardware event] cache-references [Hardware event] cpu-cycles OR cycles [Hardware event] instructions [Hardware event] stalled-cycles-backend OR idle-cycles-backend [Hardware event] stalled-cycles-frontend OR idle-cycles-frontend [Hardware event] List of pre-defined events (to be used in -e): <-- incorrect position L1-dcache-load-misses [Hardware cache event] L1-dcache-loads [Hardware cache event] L1-dcache-prefetch-misses [Hardware cache event] L1-dcache-prefetches [Hardware cache event] L1-dcache-store-misses [Hardware cache event] L1-dcache-stores [Hardware cache event] After this patch: $ perf list hw L1-dcache* List of pre-defined events (to be used in -e): <-- correct position branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cache-misses [Hardware event] cache-references [Hardware event] cpu-cycles OR cycles [Hardware event] instructions [Hardware event] stalled-cycles-backend OR idle-cycles-backend [Hardware event] stalled-cycles-frontend OR idle-cycles-frontend [Hardware event] L1-dcache-load-misses [Hardware cache event] L1-dcache-loads [Hardware cache event] L1-dcache-prefetch-misses [Hardware cache event] L1-dcache-prefetches [Hardware cache event] L1-dcache-store-misses [Hardware cache event] L1-dcache-stores [Hardware cache event] Signed-off-by: Yunlong Song Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Wang Nan Link: http://lkml.kernel.org/r/1423833115-11199-8-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 198f3c3..ad8018e 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -41,6 +41,9 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) return 0; } + if (!raw_dump) + printf("\nList of pre-defined events (to be used in -e):\n\n"); + if (argc == 0) { print_events(NULL, false); return 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ecf069b..109ba5c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1319,11 +1319,6 @@ static void print_symbol_events(const char *event_glob, unsigned type, */ void print_events(const char *event_glob, bool name_only) { - if (!name_only) { - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); - } - print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); -- cgit v0.10.2 From 42052bea1683fad5a7a06d84a3b4f7bd16131ce8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 13 Feb 2015 12:32:45 -0300 Subject: perf trace: Print thread info when following children The default for 'trace workload' is to set perf_event_attr.inherit to 1, i.e. to make it equivalent to 'strace -f workload', so we were ending with syscalls for multiple processes mixed up, fix it: Before: [root@ssdandy ~]# trace -e brk time usleep 1 0.071 ( 0.002 ms): brk( ) = 0x100e000 0.802 ( 0.001 ms): brk( ) = 0x1d99000 1.132 ( 0.003 ms): brk( ) = 0x1d99000 1.136 ( 0.003 ms): brk(brk: 0x1dba000) = 0x1dba000 1.140 ( 0.001 ms): brk( ) = 0x1dba000 0.00user 0.00system 0:00.00elapsed 63%CPU (0avgtext+0avgdata 528maxresident)k 0inputs+0outputs (0major+181minor)pagefaults 0swaps [root@ssdandy ~]# After: [root@ssdandy ~]# trace -f -e brk time usleep 1 0.072 ( 0.002 ms): time/26308 brk( ) = 0x1e6e000 0.860 ( 0.001 ms): usleep/26309 brk( ) = 0xb91000 1.193 ( 0.003 ms): usleep/26309 brk( ) = 0xb91000 1.197 ( 0.003 ms): usleep/26309 brk(brk: 0xbb2000) = 0xbb2000 1.201 ( 0.001 ms): usleep/26309 brk( ) = 0xbb2000 0.00user 0.00system 0:00.00elapsed 0%CPU (0avgtext+0avgdata 524maxresident)k 0inputs+0outputs (0major+180minor)pagefaults 0swaps [root@ssdandy ~]# BTW: to achieve the 'strace workload' behaviour, i.e. without a explicit '-f', one has to use --no-inherit. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian echo Link: http://lkml.kernel.org/n/tip-`ranpwd -l 24`@git.kernel.org Link: http://lkml.kernel.org/n/tip-2wu2d5n65msxoq1i7vtcaft2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 66300ae..2bfb234 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2114,7 +2114,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) else perf_evlist__enable(evlist); - trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; + trace->multiple_threads = evlist->threads->map[0] == -1 || + evlist->threads->nr > 1 || + perf_evlist__first(evlist)->attr.inherit; again: before = trace->nr_events; -- cgit v0.10.2 From e596663ebb28a068f5cca57f83285b7b293a2c83 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 13 Feb 2015 13:22:21 -0300 Subject: perf trace: Handle multiple threads better wrt syscalls being intermixed $ trace time taskset -c 0 usleep 1 0.845 ( 0.021 ms): time/16722 wait4(upid: 4294967295, stat_addr: 0x7fff17f443d4, ru: 0x7fff17f44438 ) ... 0.865 ( 0.008 ms): time/16723 execve(arg0: 140733595272004, arg1: 140733595272720, arg2: 140733595272768, arg3: 139755107218496, arg4: 7307199665339051828, arg5: 3) = -2 2.395 ( 1.523 ms): taskset/16723 execve(arg0: 140733595272013, arg1: 140733595272720, arg2: 140733595272768, arg3: 139755107218496, arg4: 7307199665339051828, arg5: 3) = 0 2.411 ( 0.002 ms): taskset/16723 brk( ) = 0x1915000 3.300 ( 0.058 ms): usleep/16723 nanosleep(rqtp: 0x7ffff4ada190 ) = 0 3.305 ( 0.000 ms): usleep/16723 exit_group( 3.363 ( 2.539 ms): time/16722 ... [continued]: wait4()) = 16723 3.366 ( 0.001 ms): time/16722 rt_sigaction(sig: INT, act: 0x7fff17f44160, oact: 0x7fff17f44200, sigsetsize: 8) = 0 We we're not seeing this line: 0.845 ( 0.021 ms): time/16722 wait4(upid: 4294967295, stat_addr: 0x7fff17f443d4, ru: 0x7fff17f44438 ) ... just the one when it finishes: 3.363 ( 2.539 ms): time/16722 ... [continued]: wait4()) = 16723 Still some issues left till we move to ordered_samples when multiple CPUs/threads are involved... Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-zq9x30a1ky3djqewqn2v3ja3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2bfb234..feabd08 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1220,6 +1220,7 @@ struct trace { } syscalls; struct record_opts opts; struct machine *host; + struct thread *current; u64 base_time; FILE *output; unsigned long nr_events; @@ -1642,6 +1643,29 @@ static void thread__update_stats(struct thread_trace *ttrace, update_stats(stats, duration); } +static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) +{ + struct thread_trace *ttrace; + u64 duration; + size_t printed; + + if (trace->current == NULL) + return 0; + + ttrace = thread__priv(trace->current); + + if (!ttrace->entry_pending) + return 0; + + duration = sample->time - ttrace->entry_time; + + printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); + printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); + ttrace->entry_pending = false; + + return printed; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1673,6 +1697,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, return -1; } + printed += trace__printf_interrupted_entry(trace, sample); + ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); @@ -1688,6 +1714,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } else ttrace->entry_pending = true; + trace->current = thread; + return 0; } -- cgit v0.10.2 From 14a052df1cfa563093f20847d52caad4be5d2adc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Feb 2015 12:58:57 -0300 Subject: perf trace: Allow mixing with other events Basically adopting 'perf record' --event command line argument syntax: # trace -e \!mprotect,mmap,munmap,open,close,read,fstat,access,arch_prctl --event sched:*switch,sched:*exec,sched:*exit usleep 1 0.048 ( ): sched:sched_process_exec:filename=/bin/usleep pid=24732 old_pid=24732) 0.078 (0.002 ms): usleep/24732 brk( ) = 0x78f000 0.430 (0.002 ms): usleep/24732 brk( ) = 0x78f000 0.434 (0.003 ms): usleep/24732 brk(brk: 0x7b0000 ) = 0x7b0000 0.438 (0.001 ms): usleep/24732 brk( ) = 0x7b0000 0.460 (0.004 ms): usleep/24732 nanosleep(rqtp: 0x7ffff3696a40) ... 0.460 ( ): sched:sched_switch:prev_comm=usleep prev_pid=24732 prev_prio=120 prev_state=S ==> next_comm=swapper/1 next_pid=0 next_prio=120) 0.515 (0.058 ms): usleep/24732 ... [continued]: nanosleep()) = 0 0.520 (0.000 ms): usleep/24732 exit_group( 0.550 ( ): sched:sched_process_exit:comm=usleep pid=24732 prio=120) # Next steps, probably in this order: 1) Use ordered_events code, the logic in trace needs the events to be time ordered when needed, i.e. when multiple CPUs are involved. 2) Callchains! 3) Automatically account for interruptions when saying how long things took. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-gpst8mph575yb4wgf91qibyb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index feabd08..a44ac93 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1219,6 +1219,7 @@ struct trace { struct syscall *table; } syscalls; struct record_opts opts; + struct perf_evlist *evlist; struct machine *host; struct thread *current; u64 base_time; @@ -1833,6 +1834,24 @@ out_dump: return 0; } +static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, + struct perf_sample *sample) +{ + trace__printf_interrupted_entry(trace, sample); + trace__fprintf_tstamp(trace, sample->time, trace->output); + fprintf(trace->output, "(%9.9s): %s:", " ", evsel->name); + + if (evsel->tp_format) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, + trace->output); + } + + fprintf(trace->output, ")\n"); + return 0; +} + static void print_location(FILE *f, struct perf_sample *sample, struct addr_location *al, bool print_dso, bool print_sym) @@ -2067,7 +2086,7 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist, static int trace__run(struct trace *trace, int argc, const char **argv) { - struct perf_evlist *evlist = perf_evlist__new(); + struct perf_evlist *evlist = trace->evlist; struct perf_evsel *evsel; int err = -1, i; unsigned long before; @@ -2076,11 +2095,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; - if (evlist == NULL) { - fprintf(trace->output, "Not enough memory to run!\n"); - goto out; - } - if (trace->trace_syscalls && perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) @@ -2227,7 +2241,7 @@ out_disable: out_delete_evlist: perf_evlist__delete(evlist); -out: + trace->evlist = NULL; trace->live = false; return err; { @@ -2498,6 +2512,14 @@ static int parse_pagefaults(const struct option *opt, const char *str, return 0; } +static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + evsel->handler = handler; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const trace_usage[] = { @@ -2532,6 +2554,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const char *output_name = NULL; const char *ev_qualifier_str = NULL; const struct option trace_options[] = { + OPT_CALLBACK(0, "event", &trace.evlist, "event", + "event selector. use 'perf list' to list available events", + parse_events_option), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), @@ -2573,6 +2598,15 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) int err; char bf[BUFSIZ]; + trace.evlist = perf_evlist__new(); + if (trace.evlist == NULL) + return -ENOMEM; + + if (trace.evlist == NULL) { + pr_err("Not enough memory to run!\n"); + goto out; + } + argc = parse_options(argc, argv, trace_options, trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -2581,6 +2615,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) trace.opts.sample_time = true; } + if (trace.evlist->nr_entries > 0) + evlist__set_evsel_handler(trace.evlist, trace__event_handler); + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) return trace__record(&trace, argc-1, &argv[1]); -- cgit v0.10.2 From 726f3234dd125633438922a07a80f933f13daf82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 6 Feb 2015 10:16:45 +0100 Subject: perf trace: Support --events foo:bar --no-syscalls I.e. support tracing just tracepoints, without strace like raw_syscalls:*. [acme@ssdandy linux]$ trace --no-sys --ev sched:*exec,sched:*switch,sched:*exit usleep 1 0.048 ( ): sched:sched_process_exec:filename=/usr/bin/usleep pid=27298 old_pid=27298) 0.369 ( ): sched:sched_switch:usleep:27298 [120] S ==> swapper/5:0 [120]) 0.452 ( ): sched:sched_process_exit:comm=usleep pid=27298 prio=120) [acme@ssdandy linux]$ TODO: remove that (...) thing when --no-syscalls is specified. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-vn0hsixsbhm31b2rpj97r96k@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a44ac93..b1c1df9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2625,7 +2625,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) if (trace.summary_only) trace.summary = trace.summary_only; - if (!trace.trace_syscalls && !trace.trace_pgfaults) { + if (!trace.trace_syscalls && !trace.trace_pgfaults && + trace.evlist->nr_entries == 0 /* Was --events used? */) { pr_err("Please specify something to trace.\n"); return -1; } -- cgit v0.10.2 From 8e57c586c6a8333c266a6cf76d50e110f2954558 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 3 Feb 2015 12:40:54 +0100 Subject: perf/x86/intel/uncore: Delete an unnecessary check before pci_dev_put() call The pci_dev_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Linus Torvalds Link: http://lkml.kernel.org/r/54D0B59C.2060106@users.sourceforge.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 21af6149e..12d9548 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -1132,8 +1132,7 @@ static int snbep_pci2phy_map_init(int devid) } } - if (ubox_dev) - pci_dev_put(ubox_dev); + pci_dev_put(ubox_dev); return err ? pcibios_err_to_errno(err) : 0; } -- cgit v0.10.2 From 72f669c0086fbbbbebc92ce7390125722c4c0ec5 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 5 Feb 2015 15:55:31 -0800 Subject: perf: Update shadow timestamp before add event Update the shadow timestamp before start event, because .add might use the timestamp. Signed-off-by: Shaohua Li Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Paul Mackerras Link: http://lkml.kernel.org/r/9cd0276d6a047cb7c2885994f25e3a1f7c8c28af.1423180257.git.shli@fb.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 13209a9..e580e0f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1881,6 +1881,10 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); + event->tstamp_running += tstamp - event->tstamp_stopped; + + perf_set_shadow_time(event, ctx, tstamp); + if (event->pmu->add(event, PERF_EF_START)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; @@ -1888,10 +1892,6 @@ event_sched_in(struct perf_event *event, goto out; } - event->tstamp_running += tstamp - event->tstamp_stopped; - - perf_set_shadow_time(event, ctx, tstamp); - if (!is_software_event(event)) cpuctx->active_oncpu++; if (!ctx->nr_active++) -- cgit v0.10.2 From 6a694a607a97d58c042fb7fbd60ef1caea26950c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 5 Feb 2015 15:55:32 -0800 Subject: perf: Update userspace page info for software event For hardware events, the userspace page of the event gets updated in context switches, so if we read the timestamp in the page, we get fresh info. For software events, this is missing currently. This patch makes the behavior consistent. With this patch, we can implement clock_gettime(THREAD_CPUTIME) with PERF_COUNT_SW_DUMMY in userspace as suggested by Andy and Peter. Code like this: if (pc->cap_user_time) { do { seq = pc->lock; barrier(); running = pc->time_running; cyc = rdtsc(); time_mult = pc->time_mult; time_shift = pc->time_shift; time_offset = pc->time_offset; barrier(); } while (pc->lock != seq); quot = (cyc >> time_shift); rem = cyc & ((1 << time_shift) - 1); delta = time_offset + quot * time_mult + ((rem * time_mult) >> time_shift); running += delta; return running; } I tried it on a busy system, the userspace page updating doesn't have noticeable overhead. Signed-off-by: Shaohua Li Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Linus Torvalds Link: http://lkml.kernel.org/r/aa2dd2e4f1e9f2225758be5ba00f14d6909a8ce1.1423180257.git.shli@fb.com [ Improved the changelog. ] Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index e580e0f..fef45b4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6123,6 +6123,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) } hlist_add_head_rcu(&event->hlist_entry, head); + perf_event_update_userpage(event); return 0; } @@ -6592,6 +6593,7 @@ static int cpu_clock_event_add(struct perf_event *event, int flags) { if (flags & PERF_EF_START) cpu_clock_event_start(event, flags); + perf_event_update_userpage(event); return 0; } @@ -6666,6 +6668,7 @@ static int task_clock_event_add(struct perf_event *event, int flags) { if (flags & PERF_EF_START) task_clock_event_start(event, flags); + perf_event_update_userpage(event); return 0; } -- cgit v0.10.2 From c796b205b88c775fd220c1a63390bac6a8cdda3f Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Fri, 23 Jan 2015 12:19:35 -0600 Subject: perf/x86/amd/ibs: Convert force_ibs_eilvt_setup() to void The caller of force_ibs_eilvt_setup() is ibs_eilvt_setup() which does not care about the return values. So mark it void and clean up the return statements. Signed-off-by: Aravind Gopalakrishnan Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Cc: Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1422037175-20957-1-git-send-email-aravind.gopalakrishnan@amd.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index a61f5c6..989d3c2 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -796,7 +796,7 @@ static int setup_ibs_ctl(int ibs_eilvt_off) * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that * is using the new offset. */ -static int force_ibs_eilvt_setup(void) +static void force_ibs_eilvt_setup(void) { int offset; int ret; @@ -811,26 +811,24 @@ static int force_ibs_eilvt_setup(void) if (offset == APIC_EILVT_NR_MAX) { printk(KERN_DEBUG "No EILVT entry available\n"); - return -EBUSY; + return; } ret = setup_ibs_ctl(offset); if (ret) goto out; - if (!ibs_eilvt_valid()) { - ret = -EFAULT; + if (!ibs_eilvt_valid()) goto out; - } pr_info("IBS: LVT offset %d assigned\n", offset); - return 0; + return; out: preempt_disable(); put_eilvt(offset); preempt_enable(); - return ret; + return; } static void ibs_eilvt_setup(void) -- cgit v0.10.2 From 27ac905b8f88d28779b0661809286b5ba2817d37 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:55:57 -0500 Subject: perf/x86/intel: Reduce lbr_sel_map[] size The index of lbr_sel_map is bit value of perf branch_sample_type. PERF_SAMPLE_BRANCH_MAX is 1024 at present, so each lbr_sel_map uses 4096 bytes. By using bit shift as index, we can reduce lbr_sel_map size to 40 bytes. This patch defines 'bit shift' for branch types, and use 'bit shift' to define lbr_sel_maps. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Stephane Eranian Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: jolsa@redhat.com Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/1415156173-10035-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index df525d2..0c45b22 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -515,6 +515,10 @@ struct x86_pmu { struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); }; +enum { + PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE = PERF_SAMPLE_BRANCH_MAX_SHIFT, +}; + #define x86_add_quirk(func_) \ do { \ static struct x86_pmu_quirk __quirk __initdata = { \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 58f1a94..8bc078f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -69,10 +69,6 @@ static enum { #define LBR_FROM_FLAG_IN_TX (1ULL << 62) #define LBR_FROM_FLAG_ABORT (1ULL << 61) -#define for_each_branch_sample_type(x) \ - for ((x) = PERF_SAMPLE_BRANCH_USER; \ - (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) - /* * x86control flow change classification * x86control flow changes include branches, interrupts, traps, faults @@ -403,14 +399,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) { struct hw_perf_event_extra *reg; u64 br_type = event->attr.branch_sample_type; - u64 mask = 0, m; - u64 v; + u64 mask = 0, v; + int i; - for_each_branch_sample_type(m) { - if (!(br_type & m)) + for (i = 0; i < PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE; i++) { + if (!(br_type & (1ULL << i))) continue; - v = x86_pmu.lbr_sel_map[m]; + v = x86_pmu.lbr_sel_map[i]; if (v == LBR_NOT_SUPP) return -EOPNOTSUPP; @@ -678,35 +674,35 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) /* * Map interface branch filters onto LBR filters */ -static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { - [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, - [PERF_SAMPLE_BRANCH_USER] = LBR_USER, - [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, - [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, - [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP - | LBR_IND_JMP | LBR_FAR, +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP + | LBR_IND_JMP | LBR_FAR, /* * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches */ - [PERF_SAMPLE_BRANCH_ANY_CALL] = + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, /* * NHM/WSM erratum: must include IND_JMP to capture IND_CALL */ - [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, - [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; -static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { - [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, - [PERF_SAMPLE_BRANCH_USER] = LBR_USER, - [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, - [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, - [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, - [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL - | LBR_FAR, - [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, - [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; /* core */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9b79abb..e46b932 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -152,21 +152,42 @@ enum perf_event_sample_format { * The branch types can be combined, however BRANCH_ANY covers all types * of branches and therefore it supersedes all the other types. */ +enum perf_branch_sample_type_shift { + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ + + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ +}; + enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */ - PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */ - - PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */ + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = + 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = + 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = + 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = + 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; #define PERF_SAMPLE_BRANCH_PLM_ALL \ -- cgit v0.10.2 From ba532500c5651a4be4108acc64ed99a95cb005b3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:55:58 -0500 Subject: perf: Introduce pmu context switch callback The callback is invoked when process is scheduled in or out. It provides mechanism for later patches to save/store the LBR stack. For the schedule in case, the callback is invoked at the same place that flush branch stack callback is invoked. So it also can replace the flush branch stack callback. To avoid unnecessary overhead, the callback is enabled only when there are events use the LBR stack. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-3-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b71a7f8..0efbd6c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1914,6 +1914,12 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + if (x86_pmu.sched_task) + x86_pmu.sched_task(ctx, sched_in); +} + static void x86_pmu_flush_branch_stack(void) { if (x86_pmu.flush_branch_stack) @@ -1950,6 +1956,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .flush_branch_stack = x86_pmu_flush_branch_stack, + .sched_task = x86_pmu_sched_task, }; void arch_perf_update_userpage(struct perf_event *event, diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 0c45b22..211b54c 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -473,6 +473,8 @@ struct x86_pmu { void (*check_microcode)(void); void (*flush_branch_stack)(void); + void (*sched_task)(struct perf_event_context *ctx, + bool sched_in); /* * Intel Arch Perfmon v2+ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3326200..fbab623 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -265,6 +265,13 @@ struct pmu { * flush branch stack on context-switches (needed in cpu-wide mode) */ void (*flush_branch_stack) (void); + + /* + * context-switches callback + */ + void (*sched_task) (struct perf_event_context *ctx, + bool sched_in); + }; /** @@ -558,6 +565,8 @@ extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); +extern void perf_sched_cb_dec(struct pmu *pmu); +extern void perf_sched_cb_inc(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int perf_event_refresh(struct perf_event *event, int refresh); diff --git a/kernel/events/core.c b/kernel/events/core.c index fef45b4..6c8b31b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -154,6 +154,7 @@ enum event_type_t { struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -2577,6 +2578,56 @@ unlock: } } +void perf_sched_cb_dec(struct pmu *pmu) +{ + this_cpu_dec(perf_sched_cb_usages); +} + +void perf_sched_cb_inc(struct pmu *pmu) +{ + this_cpu_inc(perf_sched_cb_usages); +} + +/* + * This function provides the context switch callback to the lower code + * layer. It is invoked ONLY when the context switch callback is enabled. + */ +static void perf_pmu_sched_task(struct task_struct *prev, + struct task_struct *next, + bool sched_in) +{ + struct perf_cpu_context *cpuctx; + struct pmu *pmu; + unsigned long flags; + + if (prev == next) + return; + + local_irq_save(flags); + + rcu_read_lock(); + + list_for_each_entry_rcu(pmu, &pmus, entry) { + if (pmu->sched_task) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + perf_pmu_disable(pmu); + + pmu->sched_task(cpuctx->task_ctx, sched_in); + + perf_pmu_enable(pmu); + + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); + } + } + + rcu_read_unlock(); + + local_irq_restore(flags); +} + #define for_each_task_context_nr(ctxn) \ for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) @@ -2596,6 +2647,9 @@ void __perf_event_task_sched_out(struct task_struct *task, { int ctxn; + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(task, next, false); + for_each_task_context_nr(ctxn) perf_event_context_sched_out(task, ctxn, next); @@ -2847,6 +2901,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, /* check for system-wide branch_stack events */ if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) perf_branch_stack_sched_in(prev, task); + + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(prev, task, true); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) -- cgit v0.10.2 From 2a0ad3b326a9024ba86dca4028499d31fa0c6c4d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:55:59 -0500 Subject: perf/x86/intel: Use context switch callback to flush LBR stack Previous commit introduces context switch callback, its function overlaps with the flush branch stack callback. So we can use the context switch callback to flush LBR stack. This patch adds code that uses the flush branch callback to flush the LBR stack when task is being scheduled in. The callback is enabled only when there are events use the LBR hardware. This patch also removes all old flush branch stack code. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-4-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0efbd6c..6b1fd26 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1920,12 +1920,6 @@ static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) x86_pmu.sched_task(ctx, sched_in); } -static void x86_pmu_flush_branch_stack(void) -{ - if (x86_pmu.flush_branch_stack) - x86_pmu.flush_branch_stack(); -} - void perf_check_microcode(void) { if (x86_pmu.check_microcode) @@ -1955,7 +1949,6 @@ static struct pmu pmu = { .commit_txn = x86_pmu_commit_txn, .event_idx = x86_pmu_event_idx, - .flush_branch_stack = x86_pmu_flush_branch_stack, .sched_task = x86_pmu_sched_task, }; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 211b54c..949d008 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -472,7 +472,6 @@ struct x86_pmu { void (*cpu_dead)(int cpu); void (*check_microcode)(void); - void (*flush_branch_stack)(void); void (*sched_task)(struct perf_event_context *ctx, bool sched_in); @@ -733,6 +732,8 @@ void intel_pmu_pebs_disable_all(void); void intel_ds_init(void); +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); + void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 498b6d9..424fbf7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2044,18 +2044,6 @@ static void intel_pmu_cpu_dying(int cpu) fini_debug_store_on_cpu(cpu); } -static void intel_pmu_flush_branch_stack(void) -{ - /* - * Intel LBR does not tag entries with the - * PID of the current task, then we need to - * flush it on ctxsw - * For now, we simply reset it - */ - if (x86_pmu.lbr_nr) - intel_pmu_lbr_reset(); -} - PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); @@ -2107,7 +2095,7 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, .guest_get_msrs = intel_guest_get_msrs, - .flush_branch_stack = intel_pmu_flush_branch_stack, + .sched_task = intel_pmu_lbr_sched_task, }; static __init void intel_clovertown_quirk(void) diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 8bc078f..c0e23c5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -177,6 +177,31 @@ void intel_pmu_lbr_reset(void) intel_pmu_lbr_reset_64(); } +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + /* + * When sampling the branck stack in system-wide, it may be + * necessary to flush the stack on context switch. This happens + * when the branch stack does not tag its entries with the pid + * of the current task. Otherwise it becomes impossible to + * associate a branch entry with a task. This ambiguity is more + * likely to appear when the branch stack supports priv level + * filtering and the user sets it to monitor only at the user + * level (which could be a useful measurement in system-wide + * mode). In that case, the risk is high of having a branch + * stack with branch from multiple tasks. + */ + if (sched_in) { + intel_pmu_lbr_reset(); + cpuc->lbr_context = ctx; + } +} + void intel_pmu_lbr_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -195,6 +220,7 @@ void intel_pmu_lbr_enable(struct perf_event *event) cpuc->br_sel = event->hw.branch_reg.reg; cpuc->lbr_users++; + perf_sched_cb_inc(event->ctx->pmu); } void intel_pmu_lbr_disable(struct perf_event *event) @@ -206,6 +232,7 @@ void intel_pmu_lbr_disable(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); + perf_sched_cb_dec(event->ctx->pmu); if (cpuc->enabled && !cpuc->lbr_users) { __intel_pmu_lbr_disable(); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fbab623..c7007a5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -511,7 +511,6 @@ struct perf_event_context { u64 generation; int pin_count; int nr_cgroups; /* cgroup evts */ - int nr_branch_stack; /* branch_stack evt */ struct rcu_head rcu_head; struct delayed_work orphans_remove; diff --git a/kernel/events/core.c b/kernel/events/core.c index 6c8b31b..f563ce7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -153,7 +153,6 @@ enum event_type_t { */ struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); -static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); static DEFINE_PER_CPU(int, perf_sched_cb_usages); static atomic_t nr_mmap_events __read_mostly; @@ -1240,9 +1239,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) if (is_cgroup_event(event)) ctx->nr_cgroups++; - if (has_branch_stack(event)) - ctx->nr_branch_stack++; - list_add_rcu(&event->event_entry, &ctx->event_list); ctx->nr_events++; if (event->attr.inherit_stat) @@ -1409,9 +1405,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) cpuctx->cgrp = NULL; } - if (has_branch_stack(event)) - ctx->nr_branch_stack--; - ctx->nr_events--; if (event->attr.inherit_stat) ctx->nr_stat--; @@ -2809,64 +2802,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, } /* - * When sampling the branck stack in system-wide, it may be necessary - * to flush the stack on context switch. This happens when the branch - * stack does not tag its entries with the pid of the current task. - * Otherwise it becomes impossible to associate a branch entry with a - * task. This ambiguity is more likely to appear when the branch stack - * supports priv level filtering and the user sets it to monitor only - * at the user level (which could be a useful measurement in system-wide - * mode). In that case, the risk is high of having a branch stack with - * branch from multiple tasks. Flushing may mean dropping the existing - * entries or stashing them somewhere in the PMU specific code layer. - * - * This function provides the context switch callback to the lower code - * layer. It is invoked ONLY when there is at least one system-wide context - * with at least one active event using taken branch sampling. - */ -static void perf_branch_stack_sched_in(struct task_struct *prev, - struct task_struct *task) -{ - struct perf_cpu_context *cpuctx; - struct pmu *pmu; - unsigned long flags; - - /* no need to flush branch stack if not changing task */ - if (prev == task) - return; - - local_irq_save(flags); - - rcu_read_lock(); - - list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - - /* - * check if the context has at least one - * event using PERF_SAMPLE_BRANCH_STACK - */ - if (cpuctx->ctx.nr_branch_stack > 0 - && pmu->flush_branch_stack) { - - perf_ctx_lock(cpuctx, cpuctx->task_ctx); - - perf_pmu_disable(pmu); - - pmu->flush_branch_stack(); - - perf_pmu_enable(pmu); - - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); - } - } - - rcu_read_unlock(); - - local_irq_restore(flags); -} - -/* * Called from scheduler to add the events of the current task * with interrupts disabled. * @@ -2898,10 +2833,6 @@ void __perf_event_task_sched_in(struct task_struct *prev, if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_in(prev, task); - /* check for system-wide branch_stack events */ - if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) - perf_branch_stack_sched_in(prev, task); - if (__this_cpu_read(perf_sched_cb_usages)) perf_pmu_sched_task(prev, task, true); } @@ -3480,10 +3411,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) if (event->parent) return; - if (has_branch_stack(event)) { - if (!(event->attach_state & PERF_ATTACH_TASK)) - atomic_dec(&per_cpu(perf_branch_stack_events, cpu)); - } if (is_cgroup_event(event)) atomic_dec(&per_cpu(perf_cgroup_events, cpu)); } @@ -7139,10 +7066,6 @@ static void account_event_cpu(struct perf_event *event, int cpu) if (event->parent) return; - if (has_branch_stack(event)) { - if (!(event->attach_state & PERF_ATTACH_TASK)) - atomic_inc(&per_cpu(perf_branch_stack_events, cpu)); - } if (is_cgroup_event(event)) atomic_inc(&per_cpu(perf_cgroup_events, cpu)); } -- cgit v0.10.2 From e9d7f7cd97c45e2c612d3b38be05b4cfb27939ee Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:00 -0500 Subject: perf/x86/intel: Add basic Haswell LBR call stack support Haswell has a new feature that utilizes the existing LBR facility to record call chains. To enable this feature, bits (JCC, NEAR_IND_JMP, NEAR_REL_JMP, FAR_BRANCH, EN_CALLSTACK) in LBR_SELECT must be set to 1, bits (NEAR_REL_CALL, NEAR-IND_CALL, NEAR_RET) must be cleared. Due to a hardware bug of Haswell, this feature doesn't work well with FREEZE_LBRS_ON_PMI. When the call stack feature is enabled, the LBR stack will capture unfiltered call data normally, but as return instructions are executed, the last captured branch record is flushed from the on-chip registers in a last-in first-out (LIFO) manner. Thus, branch information relative to leaf functions will not be captured, while preserving the call stack information of the main line execution path. This patch defines a separate lbr_sel map for Haswell. The map contains a new entry for the call stack feature. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-5-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 949d008..c9a62c5 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -517,7 +517,11 @@ struct x86_pmu { }; enum { - PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE = PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE, + + PERF_SAMPLE_BRANCH_CALL_STACK = + 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, }; #define x86_add_quirk(func_) \ @@ -551,6 +555,12 @@ static struct perf_pmu_events_attr event_attr_##v = { \ extern struct x86_pmu x86_pmu __read_mostly; +static inline bool x86_pmu_has_lbr_callstack(void) +{ + return x86_pmu.lbr_sel_map && + x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0; +} + DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); int x86_perf_event_set_period(struct perf_event *event); @@ -754,6 +764,8 @@ void intel_pmu_lbr_init_atom(void); void intel_pmu_lbr_init_snb(void); +void intel_pmu_lbr_init_hsw(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); int p4_pmu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 424fbf7..a485ba1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2537,7 +2537,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - intel_pmu_lbr_init_snb(); + intel_pmu_lbr_init_hsw(); x86_pmu.event_constraints = intel_hsw_event_constraints; x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index c0e23c5..ac8279e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -39,6 +39,7 @@ static enum { #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ #define LBR_FAR_BIT 8 /* do not capture far branches */ +#define LBR_CALL_STACK_BIT 9 /* enable call stack */ #define LBR_KERNEL (1 << LBR_KERNEL_BIT) #define LBR_USER (1 << LBR_USER_BIT) @@ -49,6 +50,7 @@ static enum { #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) #define LBR_FAR (1 << LBR_FAR_BIT) +#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT) #define LBR_PLM (LBR_KERNEL | LBR_USER) @@ -74,24 +76,25 @@ static enum { * x86control flow changes include branches, interrupts, traps, faults */ enum { - X86_BR_NONE = 0, /* unknown */ - - X86_BR_USER = 1 << 0, /* branch target is user */ - X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ - - X86_BR_CALL = 1 << 2, /* call */ - X86_BR_RET = 1 << 3, /* return */ - X86_BR_SYSCALL = 1 << 4, /* syscall */ - X86_BR_SYSRET = 1 << 5, /* syscall return */ - X86_BR_INT = 1 << 6, /* sw interrupt */ - X86_BR_IRET = 1 << 7, /* return from interrupt */ - X86_BR_JCC = 1 << 8, /* conditional */ - X86_BR_JMP = 1 << 9, /* jump */ - X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ - X86_BR_IND_CALL = 1 << 11,/* indirect calls */ - X86_BR_ABORT = 1 << 12,/* transaction abort */ - X86_BR_IN_TX = 1 << 13,/* in transaction */ - X86_BR_NO_TX = 1 << 14,/* not in transaction */ + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ + X86_BR_ABORT = 1 << 12,/* transaction abort */ + X86_BR_IN_TX = 1 << 13,/* in transaction */ + X86_BR_NO_TX = 1 << 14,/* not in transaction */ + X86_BR_CALL_STACK = 1 << 15,/* call stack */ }; #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) @@ -373,7 +376,7 @@ void intel_pmu_lbr_read(void) * - in case there is no HW filter * - in case the HW filter has errata or limitations */ -static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) { u64 br_type = event->attr.branch_sample_type; int mask = 0; @@ -410,11 +413,21 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_COND) mask |= X86_BR_JCC; + if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) { + if (!x86_pmu_has_lbr_callstack()) + return -EOPNOTSUPP; + if (mask & ~(X86_BR_USER | X86_BR_KERNEL)) + return -EINVAL; + mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET | + X86_BR_CALL_STACK; + } + /* * stash actual user request into reg, it may * be used by fixup code for some CPU */ event->hw.branch_reg.reg = mask; + return 0; } /* @@ -443,8 +456,12 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) reg = &event->hw.branch_reg; reg->idx = EXTRA_REG_LBR; - /* LBR_SELECT operates in suppress mode so invert mask */ - reg->config = ~mask & x86_pmu.lbr_sel_mask; + /* + * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate + * in suppress mode. So LBR_SELECT should be set to + * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) + */ + reg->config = mask ^ x86_pmu.lbr_sel_mask; return 0; } @@ -462,7 +479,9 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) /* * setup SW LBR filter */ - intel_pmu_setup_sw_lbr_filter(event); + ret = intel_pmu_setup_sw_lbr_filter(event); + if (ret) + return ret; /* * setup HW LBR filter, if any @@ -732,6 +751,20 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; +static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { + [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, + [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL + | LBR_RETURN | LBR_CALL_STACK, +}; + /* core */ void __init intel_pmu_lbr_init_core(void) { @@ -788,6 +821,20 @@ void __init intel_pmu_lbr_init_snb(void) pr_cont("16-deep LBR, "); } +/* haswell */ +void intel_pmu_lbr_init_hsw(void) +{ + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = hsw_lbr_sel_map; + + pr_cont("16-deep LBR, "); +} + /* atom */ void __init intel_pmu_lbr_init_atom(void) { -- cgit v0.10.2 From 4af57ef28c2c1047fda9e1a5be02aa7a6a69cf9e Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:01 -0500 Subject: perf: Add pmu specific data for perf task context Introduce a new flag PERF_ATTACH_TASK_DATA for perf event's attach stata. The flag is set by PMU's event_init() callback, it indicates that perf event needs PMU specific data. The PMU specific data are initialized to zeros. Later patches will use PMU specific data to save LBR stack. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-6-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c7007a5..270cd01 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -271,6 +271,10 @@ struct pmu { */ void (*sched_task) (struct perf_event_context *ctx, bool sched_in); + /* + * PMU specific data size + */ + size_t task_ctx_size; }; @@ -307,6 +311,7 @@ struct swevent_hlist { #define PERF_ATTACH_CONTEXT 0x01 #define PERF_ATTACH_GROUP 0x02 #define PERF_ATTACH_TASK 0x04 +#define PERF_ATTACH_TASK_DATA 0x08 struct perf_cgroup; struct ring_buffer; @@ -511,6 +516,7 @@ struct perf_event_context { u64 generation; int pin_count; int nr_cgroups; /* cgroup evts */ + void *task_ctx_data; /* pmu specific data */ struct rcu_head rcu_head; struct delayed_work orphans_remove; diff --git a/kernel/events/core.c b/kernel/events/core.c index f563ce7..688086b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -905,6 +905,15 @@ static void get_ctx(struct perf_event_context *ctx) WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); } +static void free_ctx(struct rcu_head *head) +{ + struct perf_event_context *ctx; + + ctx = container_of(head, struct perf_event_context, rcu_head); + kfree(ctx->task_ctx_data); + kfree(ctx); +} + static void put_ctx(struct perf_event_context *ctx) { if (atomic_dec_and_test(&ctx->refcount)) { @@ -912,7 +921,7 @@ static void put_ctx(struct perf_event_context *ctx) put_ctx(ctx->parent_ctx); if (ctx->task) put_task_struct(ctx->task); - kfree_rcu(ctx, rcu_head); + call_rcu(&ctx->rcu_head, free_ctx); } } @@ -3309,12 +3318,15 @@ errout: * Returns a matching context with refcount and pincount. */ static struct perf_event_context * -find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) +find_get_context(struct pmu *pmu, struct task_struct *task, + struct perf_event *event) { struct perf_event_context *ctx, *clone_ctx = NULL; struct perf_cpu_context *cpuctx; + void *task_ctx_data = NULL; unsigned long flags; int ctxn, err; + int cpu = event->cpu; if (!task) { /* Must be root to operate on a CPU event: */ @@ -3342,11 +3354,24 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) if (ctxn < 0) goto errout; + if (event->attach_state & PERF_ATTACH_TASK_DATA) { + task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL); + if (!task_ctx_data) { + err = -ENOMEM; + goto errout; + } + } + retry: ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { clone_ctx = unclone_ctx(ctx); ++ctx->pin_count; + + if (task_ctx_data && !ctx->task_ctx_data) { + ctx->task_ctx_data = task_ctx_data; + task_ctx_data = NULL; + } raw_spin_unlock_irqrestore(&ctx->lock, flags); if (clone_ctx) @@ -3357,6 +3382,11 @@ retry: if (!ctx) goto errout; + if (task_ctx_data) { + ctx->task_ctx_data = task_ctx_data; + task_ctx_data = NULL; + } + err = 0; mutex_lock(&task->perf_event_mutex); /* @@ -3383,9 +3413,11 @@ retry: } } + kfree(task_ctx_data); return ctx; errout: + kfree(task_ctx_data); return ERR_PTR(err); } @@ -7559,7 +7591,7 @@ SYSCALL_DEFINE5(perf_event_open, /* * Get the target context (task or percpu): */ - ctx = find_get_context(pmu, task, event->cpu); + ctx = find_get_context(pmu, task, event); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_alloc; @@ -7765,7 +7797,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, account_event(event); - ctx = find_get_context(event->pmu, task, cpu); + ctx = find_get_context(event->pmu, task, event); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err_free; -- cgit v0.10.2 From 5a158c3ccd2183a7b0866be6685d001fe653430f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:02 -0500 Subject: perf: Always switch pmu specific data during context switch If two tasks were both forked from the same parent task, Events in their perf task contexts can be the same. Perf core may leave out switching the perf event contexts. Previous patch inroduces pmu specific data. The data is for saving the LBR stack, it is task specific. So we need to switch the data even when context switch is optimized out. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-7-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 688086b..84451c0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2562,6 +2562,9 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, next->perf_event_ctxp[ctxn] = ctx; ctx->task = next; next_ctx->task = task; + + swap(ctx->task_ctx_data, next_ctx->task_ctx_data); + do_switch = 0; perf_event_sync_stat(ctx, next_ctx); -- cgit v0.10.2 From e18bf526422769611e7248135e36a4cea0e4e38d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:03 -0500 Subject: perf/x86/intel: Allocate space for storing LBR stack When the LBR call stack is enabled, it is necessary to save/restore the LBR stack on context switch. We can use pmu specific data to store LBR stack when task is scheduled out. This patch adds code that allocates the pmu specific data. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Stephane Eranian Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-8-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6b1fd26..8ffd71e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -432,6 +432,9 @@ int x86_pmu_hw_config(struct perf_event *event) } } + if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK) + event->attach_state |= PERF_ATTACH_TASK_DATA; + /* * Generate PMC IRQs: * (keep 'enabled' bit clear for now) @@ -1950,6 +1953,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .sched_task = x86_pmu_sched_task, + .task_ctx_size = sizeof(struct x86_perf_task_context), }; void arch_perf_update_userpage(struct perf_event *event, diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c9a62c5..69c26b3 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -516,6 +516,13 @@ struct x86_pmu { struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); }; +struct x86_perf_task_context { + u64 lbr_from[MAX_LBR_ENTRIES]; + u64 lbr_to[MAX_LBR_ENTRIES]; + int lbr_callstack_users; + int lbr_stack_state; +}; + enum { PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = PERF_SAMPLE_BRANCH_MAX_SHIFT, PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE, -- cgit v0.10.2 From 63f0c1d84196b712fe9de99a8514486ab416d517 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:04 -0500 Subject: perf/x86/intel: Track number of events that use the LBR callstack When enabling/disabling an event, check if the event uses the LBR callstack feature, adjust the LBR callstack usage count accordingly. Later patch will use the usage count to decide if LBR stack should be saved/restored. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-9-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index ac8279e..ac8e542 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -205,9 +205,15 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) } } +static inline bool branch_user_callstack(unsigned br_sel) +{ + return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK); +} + void intel_pmu_lbr_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx; if (!x86_pmu.lbr_nr) return; @@ -222,6 +228,12 @@ void intel_pmu_lbr_enable(struct perf_event *event) } cpuc->br_sel = event->hw.branch_reg.reg; + if (branch_user_callstack(cpuc->br_sel) && event->ctx && + event->ctx->task_ctx_data) { + task_ctx = event->ctx->task_ctx_data; + task_ctx->lbr_callstack_users++; + } + cpuc->lbr_users++; perf_sched_cb_inc(event->ctx->pmu); } @@ -229,10 +241,17 @@ void intel_pmu_lbr_enable(struct perf_event *event) void intel_pmu_lbr_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx; if (!x86_pmu.lbr_nr) return; + if (branch_user_callstack(cpuc->br_sel) && event->ctx && + event->ctx->task_ctx_data) { + task_ctx = event->ctx->task_ctx_data; + task_ctx->lbr_callstack_users--; + } + cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); perf_sched_cb_dec(event->ctx->pmu); -- cgit v0.10.2 From 76cb2c617f12a4dd53c0e899972813b805ad6cc2 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:05 -0500 Subject: perf/x86/intel: Save/restore LBR stack during context switch When the LBR call stack is enabled, it is necessary to save/restore the LBR stack on context switch. The solution is saving/restoring the LBR stack to/from task's perf event context. The LBR stack is saved/restored only when there are events that use the LBR call stack. If no event uses LBR call stack, the LBR stack is reset when task is scheduled in. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-10-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index ac8e542..a8b3f23 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -180,14 +180,90 @@ void intel_pmu_lbr_reset(void) intel_pmu_lbr_reset_64(); } +/* + * TOS = most recently recorded branch + */ +static inline u64 intel_pmu_lbr_tos(void) +{ + u64 tos; + + rdmsrl(x86_pmu.lbr_tos, tos); + return tos; +} + +enum { + LBR_NONE, + LBR_VALID, +}; + +static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) +{ + int i; + unsigned lbr_idx, mask; + u64 tos; + + if (task_ctx->lbr_callstack_users == 0 || + task_ctx->lbr_stack_state == LBR_NONE) { + intel_pmu_lbr_reset(); + return; + } + + mask = x86_pmu.lbr_nr - 1; + tos = intel_pmu_lbr_tos(); + for (i = 0; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); + wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + } + task_ctx->lbr_stack_state = LBR_NONE; +} + +static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) +{ + int i; + unsigned lbr_idx, mask; + u64 tos; + + if (task_ctx->lbr_callstack_users == 0) { + task_ctx->lbr_stack_state = LBR_NONE; + return; + } + + mask = x86_pmu.lbr_nr - 1; + tos = intel_pmu_lbr_tos(); + for (i = 0; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); + rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); + } + task_ctx->lbr_stack_state = LBR_VALID; +} + void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_task_context *task_ctx; if (!x86_pmu.lbr_nr) return; /* + * If LBR callstack feature is enabled and the stack was saved when + * the task was scheduled out, restore the stack. Otherwise flush + * the LBR stack. + */ + task_ctx = ctx ? ctx->task_ctx_data : NULL; + if (task_ctx) { + if (sched_in) { + __intel_pmu_lbr_restore(task_ctx); + cpuc->lbr_context = ctx; + } else { + __intel_pmu_lbr_save(task_ctx); + } + return; + } + + /* * When sampling the branck stack in system-wide, it may be * necessary to flush the stack on context switch. This happens * when the branch stack does not tag its entries with the pid @@ -279,18 +355,6 @@ void intel_pmu_lbr_disable_all(void) __intel_pmu_lbr_disable(); } -/* - * TOS = most recently recorded branch - */ -static inline u64 intel_pmu_lbr_tos(void) -{ - u64 tos; - - rdmsrl(x86_pmu.lbr_tos, tos); - - return tos; -} - static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) { unsigned long mask = x86_pmu.lbr_nr - 1; -- cgit v0.10.2 From a46a23000198d929391aa9dac8de68734efa2703 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:06 -0500 Subject: perf: Simplify the branch stack check Use event->attr.branch_sample_type to replace intel_pmu_needs_lbr_smpl() for avoiding duplicated code that implicitly enables the LBR. Currently, branch stack can be enabled by user explicitly requesting branch sampling or implicit branch sampling to correct PEBS skid. For user explicitly requested branch sampling, the branch_sample_type is explicitly set by user. For PEBS case, the branch_sample_type is also implicitly set to PERF_SAMPLE_BRANCH_ANY in x86_pmu_hw_config. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-11-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a485ba1..9f1dd18 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1029,20 +1029,6 @@ static __initconst const u64 slm_hw_cache_event_ids }, }; -static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) -{ - /* user explicitly requested branch sampling */ - if (has_branch_stack(event)) - return true; - - /* implicit branch sampling to correct PEBS skid */ - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 && - x86_pmu.intel_cap.pebs_format < 2) - return true; - - return false; -} - static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1207,7 +1193,7 @@ static void intel_pmu_disable_event(struct perf_event *event) * must disable before any actual event * because any event may be combined with LBR */ - if (intel_pmu_needs_lbr_smpl(event)) + if (needs_branch_stack(event)) intel_pmu_lbr_disable(event); if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { @@ -1268,7 +1254,7 @@ static void intel_pmu_enable_event(struct perf_event *event) * must enabled before any actual event * because any event may be combined with LBR */ - if (intel_pmu_needs_lbr_smpl(event)) + if (needs_branch_stack(event)) intel_pmu_lbr_enable(event); if (event->attr.exclude_host) @@ -1747,7 +1733,7 @@ static int intel_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip && x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); - if (intel_pmu_needs_lbr_smpl(event)) { + if (needs_branch_stack(event)) { ret = intel_pmu_setup_lbr_filter(event); if (ret) return ret; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 270cd01..43cc158 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -814,6 +814,11 @@ static inline bool has_branch_stack(struct perf_event *event) return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; } +static inline bool needs_branch_stack(struct perf_event *event) +{ + return event->attr.branch_sample_type != 0; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); diff --git a/kernel/events/core.c b/kernel/events/core.c index 84451c0..257eccf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7232,6 +7232,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto err_ns; + if (!has_branch_stack(event)) + event->attr.branch_sample_type = 0; + pmu = perf_init_event(event); if (!pmu) goto err_ns; -- cgit v0.10.2 From 4b854900995194601d767fcd112307b21ed278b2 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:08 -0500 Subject: perf/x86/intel: Re-organize code that implicitly enables LBR/PEBS Make later patch more readable, no logic change. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-13-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8ffd71e..e0dab5c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -399,36 +399,35 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip > precise) return -EOPNOTSUPP; - /* - * check that PEBS LBR correction does not conflict with - * whatever the user is asking with attr->branch_sample_type - */ - if (event->attr.precise_ip > 1 && - x86_pmu.intel_cap.pebs_format < 2) { - u64 *br_type = &event->attr.branch_sample_type; - - if (has_branch_stack(event)) { - if (!precise_br_compat(event)) - return -EOPNOTSUPP; - - /* branch_sample_type is compatible */ - - } else { - /* - * user did not specify branch_sample_type - * - * For PEBS fixups, we capture all - * the branches at the priv level of the - * event. - */ - *br_type = PERF_SAMPLE_BRANCH_ANY; - - if (!event->attr.exclude_user) - *br_type |= PERF_SAMPLE_BRANCH_USER; - - if (!event->attr.exclude_kernel) - *br_type |= PERF_SAMPLE_BRANCH_KERNEL; - } + } + /* + * check that PEBS LBR correction does not conflict with + * whatever the user is asking with attr->branch_sample_type + */ + if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) { + u64 *br_type = &event->attr.branch_sample_type; + + if (has_branch_stack(event)) { + if (!precise_br_compat(event)) + return -EOPNOTSUPP; + + /* branch_sample_type is compatible */ + + } else { + /* + * user did not specify branch_sample_type + * + * For PEBS fixups, we capture all + * the branches at the priv level of the + * event. + */ + *br_type = PERF_SAMPLE_BRANCH_ANY; + + if (!event->attr.exclude_user) + *br_type |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + *br_type |= PERF_SAMPLE_BRANCH_KERNEL; } } -- cgit v0.10.2 From 2c70d0086e4e9e2440f0f78098090f32bde14277 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:10 -0500 Subject: perf/x86/intel: Disable FREEZE_LBRS_ON_PMI when LBR operates in callstack mode LBR callstack is designed for PEBS, It does not work well with FREEZE_LBRS_ON_PMI for non PEBS event. If FREEZE_LBRS_ON_PMI is set for non PEBS event, PMIs near call/return instructions may cause superfluous increase/decrease of LBR_TOS. This patch modifies __intel_pmu_lbr_enable() to not enable FREEZE_LBRS_ON_PMI when LBR operates in callstack mode. We currently don't use LBR callstack to capture kernel space callchain, so disabling FREEZE_LBRS_ON_PMI should not be a problem. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-15-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index a8b3f23..92a44fd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -131,14 +131,23 @@ static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); static void __intel_pmu_lbr_enable(void) { - u64 debugctl; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + u64 debugctl, lbr_select = 0; - if (cpuc->lbr_sel) - wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); + if (cpuc->lbr_sel) { + lbr_select = cpuc->lbr_sel->config; + wrmsrl(MSR_LBR_SELECT, lbr_select); + } rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + debugctl |= DEBUGCTLMSR_LBR; + /* + * LBR callstack does not work well with FREEZE_LBRS_ON_PMI. + * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions + * may cause superfluous increase/decrease of LBR_TOS. + */ + if (!(lbr_select & LBR_CALL_STACK)) + debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } -- cgit v0.10.2 From aa54ae9b87b83af7edabcc34a299e7e014609af4 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 4 Nov 2014 21:56:11 -0500 Subject: perf/x86/intel: Discard zero length call entries in LBR call stack "Zero length call" uses the attribute of the call instruction to push the immediate instruction pointer on to the stack and then pops off that address into a register. This is accomplished without any matching return instruction. It confuses the hardware and make the recorded call stack incorrect. We can partially resolve this issue by: decode call instructions and discard any zero length call entry in the LBR stack. Signed-off-by: Yan, Zheng Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Paul Mackerras Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-16-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 92a44fd..084f2eb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -94,7 +94,8 @@ enum { X86_BR_ABORT = 1 << 12,/* transaction abort */ X86_BR_IN_TX = 1 << 13,/* in transaction */ X86_BR_NO_TX = 1 << 14,/* not in transaction */ - X86_BR_CALL_STACK = 1 << 15,/* call stack */ + X86_BR_ZERO_CALL = 1 << 15,/* zero length call */ + X86_BR_CALL_STACK = 1 << 16,/* call stack */ }; #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) @@ -111,13 +112,15 @@ enum { X86_BR_JMP |\ X86_BR_IRQ |\ X86_BR_ABORT |\ - X86_BR_IND_CALL) + X86_BR_IND_CALL |\ + X86_BR_ZERO_CALL) #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) #define X86_BR_ANY_CALL \ (X86_BR_CALL |\ X86_BR_IND_CALL |\ + X86_BR_ZERO_CALL |\ X86_BR_SYSCALL |\ X86_BR_IRQ |\ X86_BR_INT) @@ -702,6 +705,12 @@ static int branch_type(unsigned long from, unsigned long to, int abort) ret = X86_BR_INT; break; case 0xe8: /* call near rel */ + insn_get_immediate(&insn); + if (insn.immediate1.value == 0) { + /* zero length call */ + ret = X86_BR_ZERO_CALL; + break; + } case 0x9a: /* call far absolute */ ret = X86_BR_CALL; break; -- cgit v0.10.2 From 2c44b1936bb3b135a3fac8b3493394d42e51cf70 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Nov 2014 10:36:45 +0100 Subject: perf/x86/intel: Expose LBR callstack to user space tooling With LBR call stack feature enable, there are three callchain options. Enable the 3rd callchain option (LBR callstack) to user space tooling. Signed-off-by: Peter Zijlstra (Intel) Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Andy Lutomirski Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vince Weaver Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/20141105093759.GQ10501@worktop.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 69c26b3..a371d27 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -523,14 +523,6 @@ struct x86_perf_task_context { int lbr_stack_state; }; -enum { - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = PERF_SAMPLE_BRANCH_MAX_SHIFT, - PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE, - - PERF_SAMPLE_BRANCH_CALL_STACK = - 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, -}; - #define x86_add_quirk(func_) \ do { \ static struct x86_pmu_quirk __quirk __initdata = { \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 084f2eb..0473874 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -537,7 +537,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) u64 mask = 0, v; int i; - for (i = 0; i < PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE; i++) { + for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { if (!(br_type & (1ULL << i))) continue; @@ -821,7 +821,7 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) /* * Map interface branch filters onto LBR filters */ -static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, @@ -840,7 +840,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; -static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, @@ -852,7 +852,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, }; -static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { +static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e46b932..1e3cd07 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -166,6 +166,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -175,18 +177,16 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = - 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = - 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = - 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = - 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; -- cgit v0.10.2 From aad2b21c151273fa7abc419dac51a980eff1dd17 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 5 Jan 2015 13:23:04 -0500 Subject: perf tools: Enable LBR call stack support Currently, there are two call chain recording options, fp and dwarf. Haswell has a new feature that utilizes the existing LBR facility to record call chains. Kernel side LBR support code provides this as a third option to record call chains. This patch enables the lbr call stack support on the tooling side. LBR call stack has some limitations: - It reuses current LBR facility, so LBR call stack and branch record can not be enabled at the same time. - It is only available for user-space callchains. However, it also offers some advantages: - LBR call stack can work on user apps which don't have frame-pointers or dwarf debug info compiled. It is a good alternative when nothing else works. Tested-by: Jiri Olsa Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Adrian Hunter Cc: Anshuman Khandual Cc: Arnaldo Carvalho de Melo Cc: Cody P Schafer Cc: David Ahern Cc: Don Zickus Cc: Jacob Shin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masanari Iida Cc: Namhyung Kim Cc: Paul Mackerras Cc: Rodrigo Campos Cc: Stephane Eranian Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/1420482185-29830-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 31e9774..1c7e50f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -115,13 +115,19 @@ OPTIONS implies -g. Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) as the method to collect + (DWARF's CFI - Call Frame Information) or "lbr" + (Hardware Last Branch Record facility) as the method to collect the information used to show the call graphs. In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus call graphs, using "dwarf", if available (perf tools linked to the libunwind library) should be used instead. + Using the "lbr" method doesn't require any compiler options. It + will produce call graphs from the hardware LBR registers. The + main limition is that it is only available on new Intel + platforms, such as Haswell. It can only get user call chain. It + doesn't work with branch stack sampling at the same time. -q:: --quiet:: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 404ab34..d0d02a8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -658,7 +658,7 @@ error: static void callchain_debug(void) { - static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" }; + static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); @@ -751,9 +751,9 @@ static struct record record = { #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " #ifdef HAVE_DWARF_UNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; #else -const char record_callchain_help[] = CALLCHAIN_HELP "fp"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; #endif /* diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2f91094..0ba5f07 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -249,6 +249,8 @@ static int report__setup_sample_type(struct report *rep) if ((sample_type & PERF_SAMPLE_REGS_USER) && (sample_type & PERF_SAMPLE_STACK_USER)) callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 14e7a12..9f643ee 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg) callchain_param.dump_size = size; } #endif /* HAVE_DWARF_UNWIND_SUPPORT */ + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + callchain_param.record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; } else { pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c0ec1ac..6033a0a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,6 +11,7 @@ enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, CALLCHAIN_DWARF, + CALLCHAIN_LBR, CALLCHAIN_MAX }; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ea51a90..f93e520 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -537,13 +537,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) } static void -perf_evsel__config_callgraph(struct perf_evsel *evsel) +perf_evsel__config_callgraph(struct perf_evsel *evsel, + struct record_opts *opts) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); + if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (!opts->branch_stack) { + if (attr->exclude_user) { + pr_warning("LBR callstack option is only available " + "to get user callchain information. " + "Falling back to framepointers.\n"); + } else { + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK; + } + } else + pr_warning("Cannot use LBR callstack with branch stack. " + "Falling back to framepointers.\n"); + } + if (callchain_param.record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); @@ -667,7 +684,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel); + perf_evsel__config_callgraph(evsel, opts); if (opts->sample_intr_regs) { attr->sample_regs_intr = PERF_REGS_MASK; -- cgit v0.10.2 From 384b60557b5522fcb99646f0eb6e7a344cdb94c6 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 5 Jan 2015 13:23:05 -0500 Subject: perf tools: Construct LBR call chain LBR call stack only has user-space callchains. It is output in the PERF_SAMPLE_BRANCH_STACK data format. For kernel callchains, it's still in the form of PERF_SAMPLE_CALLCHAIN. The perf tool has to handle both data sources to construct a complete callstack. For the "perf report -D" option, both lbr and fp information will be displayed. A new call chain recording option "lbr" is introduced into the perf tool for LBR call stack. The user can use --call-graph lbr to get the call stack information from hardware. Here are some examples. When profiling bc(1) on Fedora 19: echo 'scale=2000; 4*a(1)' > cmd; perf record --call-graph lbr bc -l < cmd If enabling LBR, perf report output looks like: 50.36% bc bc [.] bc_divide | --- bc_divide execute run_code yyparse main __libc_start_main _start 33.66% bc bc [.] _one_mult | --- _one_mult bc_divide execute run_code yyparse main __libc_start_main _start 7.62% bc bc [.] _bc_do_add | --- _bc_do_add | |--99.89%-- 0x2000186a8 --0.11%-- [...] 6.83% bc bc [.] _bc_do_sub | --- _bc_do_sub | |--99.94%-- bc_add | execute | run_code | yyparse | main | __libc_start_main | _start --0.06%-- [...] 0.46% bc libc-2.17.so [.] __memset_sse2 | --- __memset_sse2 | |--54.13%-- bc_new_num | | | |--51.00%-- bc_divide | | execute | | run_code | | yyparse | | main | | __libc_start_main | | _start | | | |--30.46%-- _bc_do_sub | | bc_add | | execute | | run_code | | yyparse | | main | | __libc_start_main | | _start | | | --18.55%-- _bc_do_add | bc_add | execute | run_code | yyparse | main | __libc_start_main | _start | --45.87%-- bc_divide execute run_code yyparse main __libc_start_main _start If using FP, perf report output looks like: echo 'scale=2000; 4*a(1)' > cmd; perf record --call-graph fp bc -l < cmd 50.49% bc bc [.] bc_divide | --- bc_divide 33.57% bc bc [.] _one_mult | --- _one_mult 7.61% bc bc [.] _bc_do_add | --- _bc_do_add 0x2000186a8 6.88% bc bc [.] _bc_do_sub | --- _bc_do_sub 0.42% bc libc-2.17.so [.] __memcpy_ssse3_back | --- __memcpy_ssse3_back If using LBR, perf report -D output looks like: 3458145275743 0x2fd750 [0xd8]: PERF_RECORD_SAMPLE(IP, 0x2): 9748/9748: 0x408ea8 period: 609644 addr: 0 ... LBR call chain: nr:8 ..... 0: fffffffffffffe00 ..... 1: 0000000000408e50 ..... 2: 000000000040a458 ..... 3: 000000000040562e ..... 4: 0000000000408590 ..... 5: 00000000004022c0 ..... 6: 00000000004015dd ..... 7: 0000003d1cc21b43 ... FP chain: nr:2 ..... 0: fffffffffffffe00 ..... 1: 0000000000408ea8 ... thread: bc:9748 ...... dso: /usr/bin/bc The LBR call stack has the following known limitations: - Zero length calls are not filtered out by the hardware - Exception handing such as setjmp/longjmp will have calls/returns not match - Pushing different return address onto the stack will have calls/returns not match - If callstack is deeper than the LBR, only the last entries are captured Tested-by: Jiri Olsa Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul Mackerras Cc: Simon Que Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1420482185-29830-3-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 3862274..dcf202a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -355,4 +355,8 @@ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) +static inline bool has_branch_callstack(struct perf_evsel *evsel) +{ + return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1bca3a9..9e0f60a 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1502,18 +1502,100 @@ static int remove_loops(struct branch_entry *l, int nr) return nr; } -static int thread__resolve_callchain_sample(struct thread *thread, - struct ip_callchain *chain, - struct branch_stack *branch, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +/* + * Recolve LBR callstack chain sample + * Return: + * 1 on success get LBR callchain information + * 0 no available LBR callchain information, should try fp + * negative error code on other errors. + */ +static int resolve_lbr_callchain_sample(struct thread *thread, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) { + struct ip_callchain *chain = sample->callchain; + int chain_nr = min(max_stack, (int)chain->nr); + int i, j, err; + u64 ip; + + for (i = 0; i < chain_nr; i++) { + if (chain->ips[i] == PERF_CONTEXT_USER) + break; + } + + /* LBR only affects the user callchain */ + if (i != chain_nr) { + struct branch_stack *lbr_stack = sample->branch_stack; + int lbr_nr = lbr_stack->nr; + /* + * LBR callstack can only get user call chain. + * The mix_chain_nr is kernel call chain + * number plus LBR user call chain number. + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER, + * lbr_nr + 1 is the user call chain number. + * For details, please refer to the comments + * in callchain__printf + */ + int mix_chain_nr = i + 1 + lbr_nr + 1; + + if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + + for (j = 0; j < mix_chain_nr; j++) { + if (callchain_param.order == ORDER_CALLEE) { + if (j < i + 1) + ip = chain->ips[j]; + else if (j > i + 1) + ip = lbr_stack->entries[j - i - 2].from; + else + ip = lbr_stack->entries[0].to; + } else { + if (j < lbr_nr) + ip = lbr_stack->entries[lbr_nr - j - 1].from; + else if (j > lbr_nr) + ip = chain->ips[i + 1 - (j - lbr_nr)]; + else + ip = lbr_stack->entries[0].to; + } + + err = add_callchain_ip(thread, parent, root_al, false, ip); + if (err) + return (err < 0) ? err : 0; + } + return 1; + } + + return 0; +} + +static int thread__resolve_callchain_sample(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + struct branch_stack *branch = sample->branch_stack; + struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr); int i, j, err; int skip_idx = -1; int first_call = 0; + callchain_cursor_reset(&callchain_cursor); + + if (has_branch_callstack(evsel)) { + err = resolve_lbr_callchain_sample(thread, sample, parent, + root_al, max_stack); + if (err) + return (err < 0) ? err : 0; + } + /* * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. @@ -1521,8 +1603,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (chain->nr < PERF_MAX_STACK_DEPTH) skip_idx = arch_skip_callchain_idx(thread, chain); - callchain_cursor_reset(&callchain_cursor); - /* * Add branches to call stack for easier browsing. This gives * more context for a sample than just the callers. @@ -1623,9 +1703,9 @@ int thread__resolve_callchain(struct thread *thread, struct addr_location *root_al, int max_stack) { - int ret = thread__resolve_callchain_sample(thread, sample->callchain, - sample->branch_stack, - parent, root_al, max_stack); + int ret = thread__resolve_callchain_sample(thread, evsel, + sample, parent, + root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0baf75f..504b7e6 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -553,15 +553,67 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, return 0; } -static void callchain__printf(struct perf_sample *sample) +static void callchain__lbr_callstack_printf(struct perf_sample *sample) { + struct ip_callchain *callchain = sample->callchain; + struct branch_stack *lbr_stack = sample->branch_stack; + u64 kernel_callchain_nr = callchain->nr; unsigned int i; - printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr); + for (i = 0; i < kernel_callchain_nr; i++) { + if (callchain->ips[i] == PERF_CONTEXT_USER) + break; + } + + if ((i != kernel_callchain_nr) && lbr_stack->nr) { + u64 total_nr; + /* + * LBR callstack can only get user call chain, + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER. + * + * The user call chain is stored in LBR registers. + * LBR are pair registers. The caller is stored + * in "from" register, while the callee is stored + * in "to" register. + * For example, there is a call stack + * "A"->"B"->"C"->"D". + * The LBR registers will recorde like + * "C"->"D", "B"->"C", "A"->"B". + * So only the first "to" register and all "from" + * registers are needed to construct the whole stack. + */ + total_nr = i + 1 + lbr_stack->nr + 1; + kernel_callchain_nr = i + 1; + + printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr); + + for (i = 0; i < kernel_callchain_nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + i, callchain->ips[i]); + + printf("..... %2d: %016" PRIx64 "\n", + (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + for (i = 0; i < lbr_stack->nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + } +} + +static void callchain__printf(struct perf_evsel *evsel, + struct perf_sample *sample) +{ + unsigned int i; + struct ip_callchain *callchain = sample->callchain; + + if (has_branch_callstack(evsel)) + callchain__lbr_callstack_printf(sample); + + printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); - for (i = 0; i < sample->callchain->nr; i++) + for (i = 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - i, sample->callchain->ips[i]); + i, callchain->ips[i]); } static void branch_stack__printf(struct perf_sample *sample) @@ -718,9 +770,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_type = evsel->attr.sample_type; if (sample_type & PERF_SAMPLE_CALLCHAIN) - callchain__printf(sample); + callchain__printf(evsel, sample); - if (sample_type & PERF_SAMPLE_BRANCH_STACK) + if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel)) branch_stack__printf(sample); if (sample_type & PERF_SAMPLE_REGS_USER) -- cgit v0.10.2 From acba3c7e4652ca5fcb2fd9376d58c2dffd8ddf2a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2015 14:15:39 +0100 Subject: perf, powerpc: Fix up flush_branch_stack() users The recent LBR rework for x86 left a stray flush_branch_stack() user in the PowerPC code, fix that up. Signed-off-by: Peter Zijlstra (Intel) Cc: Anshuman Khandual Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Christoph Lameter Cc: Joel Stanley Cc: Linus Torvalds Cc: Michael Ellerman Cc: Michael Neuling Cc: Paul Mackerras Cc: Tejun Heo Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 7c4f669..7fd60dc 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -124,7 +124,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} -static void power_pmu_flush_branch_stack(void) {} +static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ @@ -350,6 +350,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) cpuhw->bhrb_context = event->ctx; } cpuhw->bhrb_users++; + perf_sched_cb_inc(event->ctx->pmu); } static void power_pmu_bhrb_disable(struct perf_event *event) @@ -361,6 +362,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event) cpuhw->bhrb_users--; WARN_ON_ONCE(cpuhw->bhrb_users < 0); + perf_sched_cb_dec(event->ctx->pmu); if (!cpuhw->disabled && !cpuhw->bhrb_users) { /* BHRB cannot be turned off when other @@ -375,9 +377,12 @@ static void power_pmu_bhrb_disable(struct perf_event *event) /* Called from ctxsw to prevent one process's branch entries to * mingle with the other process's entries during context switch. */ -static void power_pmu_flush_branch_stack(void) +static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) { - if (ppmu->bhrb_nr) + if (!ppmu->bhrb_nr) + return; + + if (sched_in) power_pmu_bhrb_reset(); } /* Calculate the to address for a branch */ @@ -1901,7 +1906,7 @@ static struct pmu power_pmu = { .cancel_txn = power_pmu_cancel_txn, .commit_txn = power_pmu_commit_txn, .event_idx = power_pmu_event_idx, - .flush_branch_stack = power_pmu_flush_branch_stack, + .sched_task = power_pmu_sched_task, }; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 43cc158..724d372 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -262,11 +262,6 @@ struct pmu { int (*event_idx) (struct perf_event *event); /*optional */ /* - * flush branch stack on context-switches (needed in cpu-wide mode) - */ - void (*flush_branch_stack) (void); - - /* * context-switches callback */ void (*sched_task) (struct perf_event_context *ctx, -- cgit v0.10.2 From 0808921a14ffa170186288508c807f2166b7d8df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Feb 2015 21:51:50 -0800 Subject: perf trace: Only insert blank duration bracket when tracing syscalls When printing just events, i.e. '--no-sys --ev some:events' it makes no sense to waste screen space. Before: # trace --no-sys --ev probe:* 84481.704 ( ): probe:vfs_getname:(ffffffff811ed023) pathname="/etc/services") 84481.892 ( ): probe:vfs_getname:(ffffffff811ed023) pathname="/etc/services") 84482.230 ( ): probe:vfs_getname:(ffffffff811ed023) pathname="/etc/resolv.conf") 84482.481 ( ): probe:vfs_getname:(ffffffff811ed023) pathname="/etc/hosts") 85097.725 ( ): probe:vfs_getname:(ffffffff811ed023) pathname="/root" # After: # trace --no-sys --ev probe:* 0.000 probe:vfs_getname:(ffffffff811ed023) pathname="/root") 1.711 probe:vfs_getname:(ffffffff811ed023) pathname="/etc/localtime") 2.103 probe:vfs_getname:(ffffffff811ed023) pathname="/etc/localtime") ^C# Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-jhryxgnam8zecq0q0wsy6pyb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b1c1df9..3a696aa 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1840,7 +1840,11 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, { trace__printf_interrupted_entry(trace, sample); trace__fprintf_tstamp(trace, sample->time, trace->output); - fprintf(trace->output, "(%9.9s): %s:", " ", evsel->name); + + if (trace->trace_syscalls) + fprintf(trace->output, "( ): "); + + fprintf(trace->output, "%s:", evsel->name); if (evsel->tp_format) { event_format__fprintf(evsel->tp_format, sample->cpu, -- cgit v0.10.2 From cfd70a26aadf0a9af80bbce035e5760736727a94 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Feb 2015 10:09:55 -0800 Subject: perf evlist: Introduce set_filter_pid method To filter out events for a certain pid, for instance, when tracing system wide, so that the tracer itself doesn't creates an event loop. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-byoia9dzu4gmkdv87etnd9zf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a8b2c57..39302a4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1085,6 +1085,19 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) return err; } +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) +{ + char *filter; + int ret; + + if (asprintf(&filter, "common_pid != %d", pid) < 0) + return -1; + + ret = perf_evlist__set_filter(evlist, filter); + free(filter); + return ret; +} + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { struct perf_evsel *pos; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c94a9e0..715fa3a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -77,6 +77,7 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); -- cgit v0.10.2 From 241b057ce5c01a24c280f124fab60109cb562589 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Feb 2015 10:15:21 -0800 Subject: perf trace: Filter out the trace pid when no threads are specified To avoid tracing the tracer. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-shmwd1khzpaobr3i0j1ygapg@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3a696aa..cb33e4c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2151,6 +2151,15 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + /* + * Better not use !target__has_task() here because we need to cover the + * case where no threads were specified in the command line, but a + * workload was, and in that case we will fill in the thread_map when + * we fork the workload in perf_evlist__prepare_workload. + */ + if (evlist->threads->map[0] == -1) + perf_evlist__set_filter_pid(evlist, getpid()); + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) goto out_error_mmap; -- cgit v0.10.2 From be199ada4fbbe5f742f854dce8e455cfcfcf7adb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Feb 2015 11:33:47 -0800 Subject: perf evlist: Introduce set_filter_pids method We need to filter multiple pids in trace, i.e. trace itself, gnome-terminal, X.org, etc. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-frtpkg7qapqwf7asa35wf8am@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 39302a4..8d0b623 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1085,19 +1085,38 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) return err; } -int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) { char *filter; - int ret; + int ret = -1; + size_t i; - if (asprintf(&filter, "common_pid != %d", pid) < 0) - return -1; + for (i = 0; i < npids; ++i) { + if (i == 0) { + if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) + return -1; + } else { + char *tmp; + + if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) + goto out_free; + + free(filter); + filter = tmp; + } + } ret = perf_evlist__set_filter(evlist, filter); +out_free: free(filter); return ret; } +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) +{ + return perf_evlist__set_filter_pids(evlist, 1, &pid); +} + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { struct perf_evsel *pos; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 715fa3a..c19ff45 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -78,6 +78,7 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); -- cgit v0.10.2 From f078c3852c7367b78552be432bc24ca93ebbd4cf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Feb 2015 11:36:52 -0800 Subject: perf trace: Introduce --filter-pids When tracing in X we get event loops due to the tracing activity, i.e. updates to a gnome-terminal that generate syscalls for X.org, etc. To get a more useful view of what is happening, syscall wise, system wide, we need to filter those, like in: # ps ax|egrep '981|2296|1519' | grep -v egrep 981 tty1 Ss+ 5:40 /usr/bin/Xorg :0 -background none ... 1519 ? Sl 2:22 /usr/bin/gnome-shell 2296 ? Sl 4:16 /usr/libexec/gnome-terminal-server # # trace -e write --filter-pids 981,2296,1519 0.385 ( 0.021 ms): goa-daemon/2061 write(fd: 1, buf: 0x7fbeb017b000, count: 136) = 136 0.922 ( 0.014 ms): goa-daemon/2061 write(fd: 1, buf: 0x7fbeb017b000, count: 140) = 140 5006.525 ( 0.029 ms): goa-daemon/2061 write(fd: 1, buf: 0x7fbeb017b000, count: 136) = 136 5007.235 ( 0.023 ms): goa-daemon/2061 write(fd: 1, buf: 0x7fbeb017b000, count: 140) = 140 5177.646 ( 0.018 ms): rtkit-daemon/782 write(fd: 5, buf: 0x7f7eea70be88, count: 8) = 8 8314.497 ( 0.004 ms): gsd-locate-poi/2084 write(fd: 5, buf: 0x7fffe96af7b0, count: 8) = 8 8314.518 ( 0.002 ms): gsd-locate-poi/2084 write(fd: 5, buf: 0x7fffe96af0e0, count: 8) = 8 ^C# When this option is used the tracer pid is also filtered. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-f5qmiyy7c0uxdm21ncatpeek@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 7e1b1f2..d6778e6 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -55,6 +55,9 @@ OPTIONS --uid=:: Record events in threads owned by uid. Name or number. +--filter-pids=:: + Filter out events for these pids and for 'trace' itself (comma separated list). + -v:: --verbose=:: Verbosity level. diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index cb33e4c..60ccfd5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1229,6 +1229,10 @@ struct trace { const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; + struct { + size_t nr; + pid_t *entries; + } filter_pids; double duration_filter; double runtime_ms; struct { @@ -2157,8 +2161,15 @@ static int trace__run(struct trace *trace, int argc, const char **argv) * workload was, and in that case we will fill in the thread_map when * we fork the workload in perf_evlist__prepare_workload. */ - if (evlist->threads->map[0] == -1) - perf_evlist__set_filter_pid(evlist, getpid()); + if (trace->filter_pids.nr > 0) + err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); + else if (evlist->threads->map[0] == -1) + err = perf_evlist__set_filter_pid(evlist, getpid()); + + if (err < 0) { + printf("err=%d,%s\n", -err, strerror(-err)); + exit(1); + } err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) @@ -2491,6 +2502,38 @@ static int trace__set_duration(const struct option *opt, const char *str, return 0; } +static int trace__set_filter_pids(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + int ret = -1; + size_t i; + struct trace *trace = opt->value; + /* + * FIXME: introduce a intarray class, plain parse csv and create a + * { int nr, int entries[] } struct... + */ + struct intlist *list = intlist__new(str); + + if (list == NULL) + return -1; + + i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; + trace->filter_pids.entries = calloc(i, sizeof(pid_t)); + + if (trace->filter_pids.entries == NULL) + goto out; + + trace->filter_pids.entries[0] = getpid(); + + for (i = 1; i < trace->filter_pids.nr; ++i) + trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; + + intlist__delete(list); + ret = 0; +out: + return ret; +} + static int trace__open_output(struct trace *trace, const char *filename) { struct stat st; @@ -2581,6 +2624,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) "trace events on existing process id"), OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", "trace events on existing thread id"), + OPT_CALLBACK(0, "filter-pids", &trace, "float", + "show only events with duration > N.M ms", trace__set_filter_pids), OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, "system-wide collection from all CPUs"), OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", -- cgit v0.10.2 From 77c92582a52308868b6ef30a7e551eaceb0fc246 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Feb 2015 11:49:10 -0800 Subject: perf trace: Add man page entry for --event Forgot to do it when adding the feature. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-mx152b6x9cgknhw91vsyjlnd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index d6778e6..ba03fd5 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -118,6 +118,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --syscalls:: Trace system calls. This options is enabled by default. +--event:: + Trace other events, see 'perf list' for a complete list. + PAGEFAULTS ---------- -- cgit v0.10.2 From ddbb1b131062d020085577db700c3e816a227901 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Feb 2015 12:10:29 -0800 Subject: perf trace: Separate routine that handles an event from the one that reads it Because we need to use ordered_events in some cases, so we will need to first have them in a queue, order that queue, and then process the event. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cmkw9zgoh0z4r218957ftp1a@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 60ccfd5..fbdfb33 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2092,10 +2092,39 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist, return 0; } +static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) +{ + const u32 type = event->header.type; + struct perf_evsel *evsel; + + if (!trace->full_time && trace->base_time == 0) + trace->base_time = sample->time; + + if (type != PERF_RECORD_SAMPLE) { + trace__process_event(trace, trace->host, event, sample); + return; + } + + evsel = perf_evlist__id2evsel(trace->evlist, sample->id); + if (evsel == NULL) { + fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); + return; + } + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && + sample->raw_data == NULL) { + fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", + perf_evsel__name(evsel), sample->tid, + sample->cpu, sample->raw_size); + } else { + tracepoint_handler handler = evsel->handler; + handler(trace, evsel, event, sample); + } +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct perf_evlist *evlist = trace->evlist; - struct perf_evsel *evsel; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2190,8 +2219,6 @@ again: union perf_event *event; while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { - const u32 type = event->header.type; - tracepoint_handler handler; struct perf_sample sample; ++trace->nr_events; @@ -2202,30 +2229,7 @@ again: goto next_event; } - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample.time; - - if (type != PERF_RECORD_SAMPLE) { - trace__process_event(trace, trace->host, event, &sample); - continue; - } - - evsel = perf_evlist__id2evsel(evlist, sample.id); - if (evsel == NULL) { - fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); - goto next_event; - } - - if (evsel->attr.type == PERF_TYPE_TRACEPOINT && - sample.raw_data == NULL) { - fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", - perf_evsel__name(evsel), sample.tid, - sample.cpu, sample.raw_size); - goto next_event; - } - - handler = evsel->handler; - handler(trace, evsel, event, &sample); + trace__handle_event(trace, event, &sample); next_event: perf_evlist__mmap_consume(evlist, i); -- cgit v0.10.2 From 54245fdc357613633954bfd38cffb71cb9def067 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 14 Feb 2015 14:26:15 -0300 Subject: perf session: Remove wrappers to machines__find Start to untangle session from delivering samples, as there are tools that want to use ordered_events and don't use perf_session at all. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rn4pk3pjxd78sgzrkn19tktp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 504b7e6..fac08e1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -797,8 +797,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->attr.read_format); } -static struct machine * - perf_session__find_machine_for_cpumode(struct perf_session *session, +static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { @@ -816,14 +815,13 @@ static struct machine * else pid = sample->pid; - machine = perf_session__find_machine(session, pid); + machine = machines__find(machines, pid); if (!machine) - machine = perf_session__findnew_machine(session, - DEFAULT_GUEST_KERNEL_ID); + machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); return machine; } - return &session->machines.host; + return &machines->host; } static int deliver_sample_value(struct perf_session *session, @@ -907,8 +905,7 @@ int perf_session__deliver_event(struct perf_session *session, evsel = perf_evlist__id2evsel(session->evlist, sample->id); - machine = perf_session__find_machine_for_cpumode(session, event, - sample); + machine = machines__find_for_cpumode(&session->machines, event, sample); switch (event->header.type) { case PERF_RECORD_SAMPLE: -- cgit v0.10.2 From 75be989a7a18e9666efd92b846ee48bed79e8086 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 14 Feb 2015 14:50:11 -0300 Subject: perf evlist: Adopt events_stats from perf_session For tools that don't deal with perf.data files, thus do not need to use perf_session. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-kglq67gvauq9tak02a4se00r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 891c393..7ce2966 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1473,9 +1473,9 @@ static int perf_sched__read_events(struct perf_sched *sched, goto out_delete; } - sched->nr_events = session->stats.nr_events[0]; - sched->nr_lost_events = session->stats.total_lost; - sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->evlist->stats.nr_events[0]; + sched->nr_lost_events = session->evlist->stats.total_lost; + sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; } if (psession) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c4c7eac..5fb8723 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -716,7 +716,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine) { pr_err("%u unprocessable samples recorded.\r", - top->session->stats.nr_unprocessable_samples++); + top->session->evlist->stats.nr_unprocessable_samples++); return; } @@ -856,7 +856,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) hists__inc_nr_events(evsel__hists(evsel), event->header.type); machine__process_event(machine, event, &sample); } else - ++session->stats.nr_unknown_events; + ++session->evlist->stats.nr_unknown_events; next_event: perf_evlist__mmap_consume(top->evlist, idx); } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c19ff45..d4768a3 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -51,6 +51,7 @@ struct perf_evlist { struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; + struct events_stats stats; }; struct perf_evsel_str_handler { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fac08e1..06ef1c3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -537,7 +537,7 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", oe->last_flush_type); - s->stats.nr_unordered_events++; + s->evlist->stats.nr_unordered_events++; } new = ordered_events__new(oe, timestamp, event); @@ -841,7 +841,7 @@ static int deliver_sample_value(struct perf_session *session, } if (!sid || sid->evsel == NULL) { - ++session->stats.nr_unknown_id; + ++session->evlist->stats.nr_unknown_id; return 0; } @@ -911,11 +911,11 @@ int perf_session__deliver_event(struct perf_session *session, case PERF_RECORD_SAMPLE: dump_sample(evsel, event, sample); if (evsel == NULL) { - ++session->stats.nr_unknown_id; + ++session->evlist->stats.nr_unknown_id; return 0; } if (machine == NULL) { - ++session->stats.nr_unprocessable_samples; + ++session->evlist->stats.nr_unprocessable_samples; return 0; } return perf_session__deliver_sample(session, tool, event, @@ -932,7 +932,7 @@ int perf_session__deliver_event(struct perf_session *session, return tool->exit(tool, event, sample, machine); case PERF_RECORD_LOST: if (tool->lost == perf_event__process_lost) - session->stats.total_lost += event->lost.lost; + session->evlist->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); case PERF_RECORD_READ: return tool->read(tool, event, sample, evsel, machine); @@ -941,7 +941,7 @@ int perf_session__deliver_event(struct perf_session *session, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); default: - ++session->stats.nr_unknown_events; + ++session->evlist->stats.nr_unknown_events; return -1; } } @@ -991,7 +991,7 @@ int perf_session__deliver_synth_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - events_stats__inc(&session->stats, event->header.type); + events_stats__inc(&session->evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, tool, 0); @@ -1077,7 +1077,7 @@ static s64 perf_session__process_event(struct perf_session *session, if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; - events_stats__inc(&session->stats, event->header.type); + events_stats__inc(&session->evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, tool, file_offset); @@ -1129,43 +1129,43 @@ static void perf_session__warn_about_errors(const struct perf_session *session, const struct perf_tool *tool) { if (tool->lost == perf_event__process_lost && - session->stats.nr_events[PERF_RECORD_LOST] != 0) { + session->evlist->stats.nr_events[PERF_RECORD_LOST] != 0) { ui__warning("Processed %d events and lost %d chunks!\n\n" "Check IO/CPU overload!\n\n", - session->stats.nr_events[0], - session->stats.nr_events[PERF_RECORD_LOST]); + session->evlist->stats.nr_events[0], + session->evlist->stats.nr_events[PERF_RECORD_LOST]); } - if (session->stats.nr_unknown_events != 0) { + if (session->evlist->stats.nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " "file generated by a more recent tool?\n\n" "If that is not the case, consider " "reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_unknown_events); + session->evlist->stats.nr_unknown_events); } - if (session->stats.nr_unknown_id != 0) { + if (session->evlist->stats.nr_unknown_id != 0) { ui__warning("%u samples with id not present in the header\n", - session->stats.nr_unknown_id); + session->evlist->stats.nr_unknown_id); } - if (session->stats.nr_invalid_chains != 0) { - ui__warning("Found invalid callchains!\n\n" - "%u out of %u events were discarded for this reason.\n\n" - "Consider reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_invalid_chains, - session->stats.nr_events[PERF_RECORD_SAMPLE]); - } + if (session->evlist->stats.nr_invalid_chains != 0) { + ui__warning("Found invalid callchains!\n\n" + "%u out of %u events were discarded for this reason.\n\n" + "Consider reporting to linux-kernel@vger.kernel.org.\n\n", + session->evlist->stats.nr_invalid_chains, + session->evlist->stats.nr_events[PERF_RECORD_SAMPLE]); + } - if (session->stats.nr_unprocessable_samples != 0) { + if (session->evlist->stats.nr_unprocessable_samples != 0) { ui__warning("%u unprocessable samples recorded.\n" "Do you have a KVM guest running and not using 'perf kvm'?\n", - session->stats.nr_unprocessable_samples); + session->evlist->stats.nr_unprocessable_samples); } - if (session->stats.nr_unordered_events != 0) - ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events); + if (session->evlist->stats.nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", session->evlist->stats.nr_unordered_events); } volatile int session_done; @@ -1485,7 +1485,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) { size_t ret = fprintf(fp, "Aggregated stats:\n"); - ret += events_stats__fprintf(&session->stats, fp); + ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 6d663dc..fe859f3 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,7 +20,6 @@ struct perf_session { struct machines machines; struct perf_evlist *evlist; struct trace_event tevent; - struct events_stats stats; bool repipe; bool one_mmap; void *one_mmap_addr; -- cgit v0.10.2 From ccda068f96138734eb40e9202ea9562566b43c12 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 14 Feb 2015 14:57:13 -0300 Subject: perf session: Remove perf_session from warn_errors signature Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-pxxm1liohog3d6i826x8sud8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 06ef1c3..34dd749 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1125,47 +1125,47 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se return thread; } -static void perf_session__warn_about_errors(const struct perf_session *session, - const struct perf_tool *tool) +static void perf_tool__warn_about_errors(const struct perf_tool *tool, + const struct events_stats *stats) { if (tool->lost == perf_event__process_lost && - session->evlist->stats.nr_events[PERF_RECORD_LOST] != 0) { + stats->nr_events[PERF_RECORD_LOST] != 0) { ui__warning("Processed %d events and lost %d chunks!\n\n" "Check IO/CPU overload!\n\n", - session->evlist->stats.nr_events[0], - session->evlist->stats.nr_events[PERF_RECORD_LOST]); + stats->nr_events[0], + stats->nr_events[PERF_RECORD_LOST]); } - if (session->evlist->stats.nr_unknown_events != 0) { + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " "file generated by a more recent tool?\n\n" "If that is not the case, consider " "reporting to linux-kernel@vger.kernel.org.\n\n", - session->evlist->stats.nr_unknown_events); + stats->nr_unknown_events); } - if (session->evlist->stats.nr_unknown_id != 0) { + if (stats->nr_unknown_id != 0) { ui__warning("%u samples with id not present in the header\n", - session->evlist->stats.nr_unknown_id); + stats->nr_unknown_id); } - if (session->evlist->stats.nr_invalid_chains != 0) { + if (stats->nr_invalid_chains != 0) { ui__warning("Found invalid callchains!\n\n" "%u out of %u events were discarded for this reason.\n\n" "Consider reporting to linux-kernel@vger.kernel.org.\n\n", - session->evlist->stats.nr_invalid_chains, - session->evlist->stats.nr_events[PERF_RECORD_SAMPLE]); + stats->nr_invalid_chains, + stats->nr_events[PERF_RECORD_SAMPLE]); } - if (session->evlist->stats.nr_unprocessable_samples != 0) { + if (stats->nr_unprocessable_samples != 0) { ui__warning("%u unprocessable samples recorded.\n" "Do you have a KVM guest running and not using 'perf kvm'?\n", - session->evlist->stats.nr_unprocessable_samples); + stats->nr_unprocessable_samples); } - if (session->evlist->stats.nr_unordered_events != 0) - ui__warning("%u out of order events recorded.\n", session->evlist->stats.nr_unordered_events); + if (stats->nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", stats->nr_unordered_events); } volatile int session_done; @@ -1255,7 +1255,7 @@ done: err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); out_err: free(buf); - perf_session__warn_about_errors(session, tool); + perf_tool__warn_about_errors(tool, &session->evlist->stats); ordered_events__free(&session->ordered_events); return err; } @@ -1400,7 +1400,7 @@ out: err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session, tool); + perf_tool__warn_about_errors(tool, &session->evlist->stats); ordered_events__free(&session->ordered_events); session->one_mmap = false; return err; -- cgit v0.10.2 From 313e53b08e99b1dacf9ea2b0fbe97890db1ea95f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 14 Feb 2015 15:05:28 -0300 Subject: perf session: Remove perf_session from some deliver event routines Further untangling perf_session from plain event delivery routines. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-cvz8e6pwyogs4w14582iis9w@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 34dd749..0133d01 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -824,16 +824,15 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, return &machines->host; } -static int deliver_sample_value(struct perf_session *session, +static int deliver_sample_value(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, struct machine *machine) { - struct perf_sample_id *sid; + struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); - sid = perf_evlist__id2sid(session->evlist, v->id); if (sid) { sample->id = v->id; sample->period = v->value - sid->period; @@ -841,14 +840,14 @@ static int deliver_sample_value(struct perf_session *session, } if (!sid || sid->evsel == NULL) { - ++session->evlist->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } return tool->sample(tool, event, sample, sid->evsel, machine); } -static int deliver_sample_group(struct perf_session *session, +static int deliver_sample_group(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -858,7 +857,7 @@ static int deliver_sample_group(struct perf_session *session, u64 i; for (i = 0; i < sample->read.group.nr; i++) { - ret = deliver_sample_value(session, tool, event, sample, + ret = deliver_sample_value(evlist, tool, event, sample, &sample->read.group.values[i], machine); if (ret) @@ -869,7 +868,7 @@ static int deliver_sample_group(struct perf_session *session, } static int -perf_session__deliver_sample(struct perf_session *session, + perf_evlist__deliver_sample(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -886,10 +885,10 @@ perf_session__deliver_sample(struct perf_session *session, /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) - return deliver_sample_group(session, tool, event, sample, + return deliver_sample_group(evlist, tool, event, sample, machine); else - return deliver_sample_value(session, tool, event, sample, + return deliver_sample_value(evlist, tool, event, sample, &sample->read.one, machine); } @@ -898,12 +897,13 @@ int perf_session__deliver_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { + struct perf_evlist *evlist = session->evlist; struct perf_evsel *evsel; struct machine *machine; dump_event(session, event, file_offset, sample); - evsel = perf_evlist__id2evsel(session->evlist, sample->id); + evsel = perf_evlist__id2evsel(evlist, sample->id); machine = machines__find_for_cpumode(&session->machines, event, sample); @@ -911,15 +911,14 @@ int perf_session__deliver_event(struct perf_session *session, case PERF_RECORD_SAMPLE: dump_sample(evsel, event, sample); if (evsel == NULL) { - ++session->evlist->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } if (machine == NULL) { - ++session->evlist->stats.nr_unprocessable_samples; + ++evlist->stats.nr_unprocessable_samples; return 0; } - return perf_session__deliver_sample(session, tool, event, - sample, evsel, machine); + return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); case PERF_RECORD_MMAP2: @@ -932,7 +931,7 @@ int perf_session__deliver_event(struct perf_session *session, return tool->exit(tool, event, sample, machine); case PERF_RECORD_LOST: if (tool->lost == perf_event__process_lost) - session->evlist->stats.total_lost += event->lost.lost; + evlist->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); case PERF_RECORD_READ: return tool->read(tool, event, sample, evsel, machine); @@ -941,7 +940,7 @@ int perf_session__deliver_event(struct perf_session *session, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); default: - ++session->evlist->stats.nr_unknown_events; + ++evlist->stats.nr_unknown_events; return -1; } } @@ -1068,16 +1067,17 @@ static s64 perf_session__process_event(struct perf_session *session, struct perf_tool *tool, u64 file_offset) { + struct perf_evlist *evlist = session->evlist; struct perf_sample sample; int ret; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(session->evlist)); + event_swap(event, perf_evlist__sample_id_all(evlist)); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; - events_stats__inc(&session->evlist->stats, event->header.type); + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, tool, file_offset); @@ -1085,7 +1085,7 @@ static s64 perf_session__process_event(struct perf_session *session, /* * For all kernel events we get the sample data */ - ret = perf_evlist__parse_sample(session->evlist, event, &sample); + ret = perf_evlist__parse_sample(evlist, event, &sample); if (ret) return ret; -- cgit v0.10.2 From 9fa8727aa4d98d35ca50ef9cd8a50c6468af921d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 14 Feb 2015 15:08:51 -0300 Subject: perf session: Remove perf_session from dump_event All it wants is session->evlist. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6w9663gka3jb1j1rfxxd5jcq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0133d01..e4f1669 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -688,14 +688,14 @@ static void stack_user__printf(struct stack_dump *dump) dump->size, dump->offset); } -static void perf_session__print_tstamp(struct perf_session *session, +static void perf_evlist__print_tstamp(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample) { - u64 sample_type = __perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = __perf_evlist__combined_sample_type(evlist); if (event->header.type != PERF_RECORD_SAMPLE && - !perf_evlist__sample_id_all(session->evlist)) { + !perf_evlist__sample_id_all(evlist)) { fputs("-1 -1 ", stdout); return; } @@ -737,7 +737,7 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format) sample->read.one.id, sample->read.one.value); } -static void dump_event(struct perf_session *session, union perf_event *event, +static void dump_event(struct perf_evlist *evlist, union perf_event *event, u64 file_offset, struct perf_sample *sample) { if (!dump_trace) @@ -749,7 +749,7 @@ static void dump_event(struct perf_session *session, union perf_event *event, trace_event(event); if (sample) - perf_session__print_tstamp(session, event, sample); + perf_evlist__print_tstamp(evlist, event, sample); printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, event->header.size, perf_event__name(event->header.type)); @@ -901,7 +901,7 @@ int perf_session__deliver_event(struct perf_session *session, struct perf_evsel *evsel; struct machine *machine; - dump_event(session, event, file_offset, sample); + dump_event(evlist, event, file_offset, sample); evsel = perf_evlist__id2evsel(evlist, sample->id); @@ -953,7 +953,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, int fd = perf_data_file__fd(session->file); int err; - dump_event(session, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, NULL); /* These events are processed right away */ switch (event->header.type) { -- cgit v0.10.2 From 280836812f5f821d26393268010f211160874810 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 22 Feb 2015 13:52:47 -0800 Subject: perf ordered_events: Stop using tool->ordered_events To figure out if ordered_events are being used when doing a flush operation, it is enough to check if there were in fact some events queued, i.e. look at oe->nr_events. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-1c5r404vy766kt5nflv88uag@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index fd4be94..077ddd2 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -166,7 +166,7 @@ static int __ordered_events__flush(struct perf_session *s, struct ui_progress prog; int ret; - if (!tool->ordered_events || !limit) + if (!limit) return 0; if (show_progress) @@ -216,6 +216,9 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, }; int err; + if (oe->nr_events == 0) + return 0; + switch (how) { case OE_FLUSH__FINAL: oe->next_flush = ULLONG_MAX; -- cgit v0.10.2 From 07c1a0dadfce976e9877c55ce5212dd14753c91d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Feb 2015 15:34:23 -0300 Subject: perf tools: Introduce dump_stack signal helper To use in stdio based tools, like 'trace'. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-79kjmerlw6d88csyx1afzwvn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 92db3f1..4ee6d0d 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -269,6 +269,13 @@ void dump_stack(void) void dump_stack(void) {} #endif +void sighandler_dump_stack(int sig) +{ + psignal(sig, "perf"); + dump_stack(); + exit(sig); +} + void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 73c2f8e..fbd598a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -277,6 +277,7 @@ char *ltrim(char *s); char *rtrim(char *s); void dump_stack(void); +void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; -- cgit v0.10.2 From 4d08cb80ef5199258c01a3444fd29d94a36a0343 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Feb 2015 15:35:55 -0300 Subject: perf trace: Dump stack on segfaults [root@ssdandy ~]# perf trace --filter-pids 16348 0.000 ( 0.000 ms): tuned/1027 ... [continued]: select()) = 0 Timeout 793.770 ( 0.000 ms): lsmd/895 ... [continued]: select()) = 0 Timeout 793.775 (793.724 ms): tuned/1027 select(tvp: 0x7f7655556e50) ... perf: Segmentation fault Obtained 15 stack frames. perf(dump_stack+0x2e) [0x4ed330] perf(sighandler_dump_stack+0x2e) [0x4ed40f] /lib64/libc.so.6(+0x35640) [0x7fa2d5b69640] perf() [0x4c2d35] perf(machine__findnew_thread+0x39) [0x4c2ed6] perf() [0x454a4d] perf() [0x455f87] perf() [0x456556] perf(cmd_trace+0xa7e) [0x4580af] perf() [0x4867bd] perf() [0x486a1c] perf() [0x486b68] perf(main+0x23b) [0x486ec9] /lib64/libc.so.6(__libc_start_main+0xf5) [0x7fa2d5b55af5] perf() [0x41bd91] [ root@ssdandy ~]# Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v38cbxcnm2yf5qn9u4y4n9ab@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fbdfb33..5cd84974 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2660,6 +2660,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) int err; char bf[BUFSIZ]; + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); + trace.evlist = perf_evlist__new(); if (trace.evlist == NULL) return -ENOMEM; -- cgit v0.10.2 From 506740654db4fa5b6e1229147cee3cf8c7e07eca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Feb 2015 17:20:31 -0300 Subject: perf tools: Print the thread's tid on PERF_RECORD_COMM events when -D is asked Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-fmto8ft6jrtwz09dxn5d4z8w@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6c6d044..9e806d8 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -615,7 +615,7 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) else s = ""; - return fprintf(fp, "%s: %s:%d\n", s, event->comm.comm, event->comm.tid); + return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid); } int perf_event__process_comm(struct perf_tool *tool __maybe_unused, -- cgit v0.10.2 From 39bed6cbb842d8edf5a26b01122b391d36775b5e Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:40 +0000 Subject: perf: Make perf_cgroup_from_task() global Move perf_cgroup_from_task() from kernel/events/ to include/linux/ along with the necessary struct definitions, so that it can be used by the PMU code. When the upcoming Intel Cache Monitoring PMU driver assigns monitoring IDs to perf events, it needs to be able to check whether any two monitoring events overlap (say, a cgroup and task event), which means we need to be able to lookup the cgroup associated with a task (if any). Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Paul Mackerras Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 724d372..cae4a94 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -53,6 +53,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -547,6 +548,35 @@ struct perf_output_handle { int page; }; +#ifdef CONFIG_CGROUP_PERF + +/* + * perf_cgroup_info keeps track of time_enabled for a cgroup. + * This is a per-cpu dynamically allocated data structure. + */ +struct perf_cgroup_info { + u64 time; + u64 timestamp; +}; + +struct perf_cgroup { + struct cgroup_subsys_state css; + struct perf_cgroup_info __percpu *info; +}; + +/* + * Must ensure cgroup is pinned (css_get) before calling + * this function. In other words, we cannot call this function + * if there is no cgroup event for the current CPU context. + */ +static inline struct perf_cgroup * +perf_cgroup_from_task(struct task_struct *task) +{ + return container_of(task_css(task, perf_event_cgrp_id), + struct perf_cgroup, css); +} +#endif /* CONFIG_CGROUP_PERF */ + #ifdef CONFIG_PERF_EVENTS extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); diff --git a/kernel/events/core.c b/kernel/events/core.c index 20cece0..072de31 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -34,11 +34,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -351,32 +351,6 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, #ifdef CONFIG_CGROUP_PERF -/* - * perf_cgroup_info keeps track of time_enabled for a cgroup. - * This is a per-cpu dynamically allocated data structure. - */ -struct perf_cgroup_info { - u64 time; - u64 timestamp; -}; - -struct perf_cgroup { - struct cgroup_subsys_state css; - struct perf_cgroup_info __percpu *info; -}; - -/* - * Must ensure cgroup is pinned (css_get) before calling - * this function. In other words, we cannot call this function - * if there is no cgroup event for the current CPU context. - */ -static inline struct perf_cgroup * -perf_cgroup_from_task(struct task_struct *task) -{ - return container_of(task_css(task, perf_event_cgrp_id), - struct perf_cgroup, css); -} - static inline bool perf_cgroup_match(struct perf_event *event) { -- cgit v0.10.2 From eacd3ecc34472ce3751eedfc94e44c7cc6eb6305 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:41 +0000 Subject: perf: Add ->count() function to read per-package counters For PMU drivers that record per-package counters, the ->count variable cannot be used to record an accurate aggregated value, since it's not possible to perform SMP cross-calls to cpus on other packages from the context in which we update ->count. Introduce a new optional ->count() accessor function that can be used to customize how values are collected. If a PMU driver doesn't provide a ->count() function, we fallback to the existing code. There is necessarily a window of staleness with this approach because the task that generated the counter value may not have been scheduled by the cpu recently. An alternative and more complex approach would be to use a hrtimer to periodically refresh the values from a more permissive scheduling context. So, we're trading off complexity for accuracy. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cae4a94..9fc9b0d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -272,6 +272,11 @@ struct pmu { */ size_t task_ctx_size; + + /* + * Return the count value for a counter. + */ + u64 (*count) (struct perf_event *event); /*optional*/ }; /** @@ -770,6 +775,11 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, __perf_event_task_sched_out(prev, next); } +static inline u64 __perf_event_count(struct perf_event *event) +{ + return local64_read(&event->count) + atomic64_read(&event->child_count); +} + extern void perf_event_mmap(struct vm_area_struct *vma); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); diff --git a/kernel/events/core.c b/kernel/events/core.c index 072de31..4e8dc59 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3194,7 +3194,10 @@ static void __perf_event_read(void *info) static inline u64 perf_event_count(struct perf_event *event) { - return local64_read(&event->count) + atomic64_read(&event->child_count); + if (event->pmu->count) + return event->pmu->count(event); + + return __perf_event_count(event); } static u64 perf_event_read(struct perf_event *event) -- cgit v0.10.2 From 79dff51e900fd26a073be8b23acfbd8c15edb181 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:42 +0000 Subject: perf: Move cgroup init before PMU ->event_init() The Intel QoS PMU needs to know whether an event is part of a cgroup during ->event_init(), because tasks in the same cgroup share a monitoring ID. Move the cgroup initialisation before calling into the PMU driver. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-4-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 4e8dc59..1fc3bae 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7116,7 +7116,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, struct perf_event *group_leader, struct perf_event *parent_event, perf_overflow_handler_t overflow_handler, - void *context) + void *context, int cgroup_fd) { struct pmu *pmu; struct perf_event *event; @@ -7212,6 +7212,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (!has_branch_stack(event)) event->attr.branch_sample_type = 0; + if (cgroup_fd != -1) { + err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); + if (err) + goto err_ns; + } + pmu = perf_init_event(event); if (!pmu) goto err_ns; @@ -7235,6 +7241,8 @@ err_pmu: event->destroy(event); module_put(pmu->module); err_ns: + if (is_cgroup_event(event)) + perf_detach_cgroup(event); if (event->ns) put_pid_ns(event->ns); kfree(event); @@ -7453,6 +7461,7 @@ SYSCALL_DEFINE5(perf_event_open, int move_group = 0; int err; int f_flags = O_RDWR; + int cgroup_fd = -1; /* for future expandability... */ if (flags & ~PERF_FLAG_ALL) @@ -7518,21 +7527,16 @@ SYSCALL_DEFINE5(perf_event_open, get_online_cpus(); + if (flags & PERF_FLAG_PID_CGROUP) + cgroup_fd = pid; + event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, - NULL, NULL); + NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_cpus; } - if (flags & PERF_FLAG_PID_CGROUP) { - err = perf_cgroup_connect(pid, event, &attr, group_leader); - if (err) { - __free_event(event); - goto err_cpus; - } - } - if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { err = -ENOTSUPP; @@ -7769,7 +7773,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, */ event = perf_event_alloc(attr, cpu, task, NULL, NULL, - overflow_handler, context); + overflow_handler, context, -1); if (IS_ERR(event)) { err = PTR_ERR(event); goto err; @@ -8130,7 +8134,7 @@ inherit_event(struct perf_event *parent_event, parent_event->cpu, child, group_leader, parent_event, - NULL, NULL); + NULL, NULL, -1); if (IS_ERR(child_event)) return child_event; -- cgit v0.10.2 From cbc82b17263877ea5d21e84c58ce03f0292458a1 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 23 Jan 2015 18:45:43 +0000 Subject: x86: Add support for Intel Cache QoS Monitoring (CQM) detection This patch adds support for the new Cache QoS Monitoring (CQM) feature found in future Intel Xeon processors. It includes the new values to track CQM resources to the cpuinfo_x86 structure, plus the CPUID detection routines for CQM. CQM allows a process, or set of processes, to be tracked by the CPU to determine the cache usage of that task group. Using this data from the CPU, software can be written to extract this data and report cache usage and occupancy for a particular process, or group of processes. More information about Cache QoS Monitoring can be found in the Intel (R) x86 Architecture Software Developer Manual, section 17.14. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Chris Webb Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Igor Mammedov Cc: Jacob Shin Cc: Jan Beulich Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Steven Honeyman Cc: Steven Rostedt Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-5-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 90a5485..361922d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include #endif -#define NCAPINTS 11 /* N 32-bit words worth of info */ +#define NCAPINTS 13 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -226,6 +226,7 @@ #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ @@ -242,6 +243,12 @@ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ + +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ec1c935..a12d50e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -109,6 +109,9 @@ struct cpuinfo_x86 { /* in KB - valid for CPUS which support this call: */ int x86_cache_size; int x86_cache_alignment; /* In bytes */ + /* Cache QoS architectural values: */ + int x86_cache_max_rmid; /* max index */ + int x86_cache_occ_scale; /* scale to bytes */ int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07f2fc3..9fa00b2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -645,6 +645,30 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[10] = eax; } + /* Additional Intel-defined flags: level 0x0000000F */ + if (c->cpuid_level >= 0x0000000F) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=0 */ + cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); + c->x86_capability[11] = edx; + if (cpu_has(c, X86_FEATURE_CQM_LLC)) { + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = ebx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); + c->x86_capability[12] = edx; + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + } + } else { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + } + } + /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; @@ -833,6 +857,20 @@ static void generic_identify(struct cpuinfo_x86 *c) detect_nopl(c); } +static void x86_init_cache_qos(struct cpuinfo_x86 *c) +{ + /* + * The heavy lifting of max_rmid and cache_occ_scale are handled + * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu + * in case CQM bits really aren't there in this CPU. + */ + if (c != &boot_cpu_data) { + boot_cpu_data.x86_cache_max_rmid = + min(boot_cpu_data.x86_cache_max_rmid, + c->x86_cache_max_rmid); + } +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -922,6 +960,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) init_hypervisor(c); x86_init_rdrand(c); + x86_init_cache_qos(c); /* * Clear/Set all flags overriden by options, need do it -- cgit v0.10.2 From 4afbb24ce5e723c8a093a6674a3c33062175078a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:44 +0000 Subject: perf/x86/intel: Add Intel Cache QoS Monitoring support Future Intel Xeon processors support a Cache QoS Monitoring feature that allows tracking of the LLC occupancy for a task or task group, i.e. the amount of data in pulled into the LLC for the task (group). Currently the PMU only supports per-cpu events. We create an event for each cpu and read out all the LLC occupancy values. Because this results in duplicate values being written out to userspace, we also export a .per-pkg event file so that the perf tools only accumulate values for one cpu per package. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-6-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c new file mode 100644 index 0000000..05b4cd2 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -0,0 +1,530 @@ +/* + * Intel Cache Quality-of-Service Monitoring (CQM) support. + * + * Based very, very heavily on work by Peter Zijlstra. + */ + +#include +#include +#include +#include "perf_event.h" + +#define MSR_IA32_PQR_ASSOC 0x0c8f +#define MSR_IA32_QM_CTR 0x0c8e +#define MSR_IA32_QM_EVTSEL 0x0c8d + +static unsigned int cqm_max_rmid = -1; +static unsigned int cqm_l3_scale; /* supposedly cacheline size */ + +struct intel_cqm_state { + raw_spinlock_t lock; + int rmid; + int cnt; +}; + +static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); + +/* + * Protects cache_cgroups. + */ +static DEFINE_MUTEX(cache_mutex); + +/* + * Groups of events that have the same target(s), one RMID per group. + */ +static LIST_HEAD(cache_groups); + +/* + * Mask of CPUs for reading CQM values. We only need one per-socket. + */ +static cpumask_t cqm_cpumask; + +#define RMID_VAL_ERROR (1ULL << 63) +#define RMID_VAL_UNAVAIL (1ULL << 62) + +#define QOS_L3_OCCUP_EVENT_ID (1 << 0) + +#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID + +static u64 __rmid_read(unsigned long rmid) +{ + u64 val; + + /* + * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt, + * it just says that to increase confusion. + */ + wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid); + rdmsrl(MSR_IA32_QM_CTR, val); + + /* + * Aside from the ERROR and UNAVAIL bits, assume this thing returns + * the number of cachelines tagged with @rmid. + */ + return val; +} + +static unsigned long *cqm_rmid_bitmap; + +/* + * Returns < 0 on fail. + */ +static int __get_rmid(void) +{ + return bitmap_find_free_region(cqm_rmid_bitmap, cqm_max_rmid, 0); +} + +static void __put_rmid(int rmid) +{ + bitmap_release_region(cqm_rmid_bitmap, rmid, 0); +} + +static int intel_cqm_setup_rmid_cache(void) +{ + cqm_rmid_bitmap = kmalloc(sizeof(long) * BITS_TO_LONGS(cqm_max_rmid), GFP_KERNEL); + if (!cqm_rmid_bitmap) + return -ENOMEM; + + bitmap_zero(cqm_rmid_bitmap, cqm_max_rmid); + + /* + * RMID 0 is special and is always allocated. It's used for all + * tasks that are not monitored. + */ + bitmap_allocate_region(cqm_rmid_bitmap, 0, 0); + + return 0; +} + +/* + * Determine if @a and @b measure the same set of tasks. + */ +static bool __match_event(struct perf_event *a, struct perf_event *b) +{ + if ((a->attach_state & PERF_ATTACH_TASK) != + (b->attach_state & PERF_ATTACH_TASK)) + return false; + + /* not task */ + + return true; /* if not task, we're machine wide */ +} + +/* + * Determine if @a's tasks intersect with @b's tasks + */ +static bool __conflict_event(struct perf_event *a, struct perf_event *b) +{ + /* + * If one of them is not a task, same story as above with cgroups. + */ + if (!(a->attach_state & PERF_ATTACH_TASK) || + !(b->attach_state & PERF_ATTACH_TASK)) + return true; + + /* + * Must be non-overlapping. + */ + return false; +} + +/* + * Find a group and setup RMID. + * + * If we're part of a group, we use the group's RMID. + */ +static int intel_cqm_setup_event(struct perf_event *event, + struct perf_event **group) +{ + struct perf_event *iter; + int rmid; + + list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { + if (__match_event(iter, event)) { + /* All tasks in a group share an RMID */ + event->hw.cqm_rmid = iter->hw.cqm_rmid; + *group = iter; + return 0; + } + + if (__conflict_event(iter, event)) + return -EBUSY; + } + + rmid = __get_rmid(); + if (rmid < 0) + return rmid; + + event->hw.cqm_rmid = rmid; + return 0; +} + +static void intel_cqm_event_read(struct perf_event *event) +{ + unsigned long rmid = event->hw.cqm_rmid; + u64 val; + + val = __rmid_read(rmid); + + /* + * Ignore this reading on error states and do not update the value. + */ + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return; + + local64_set(&event->count, val); +} + +static void intel_cqm_event_start(struct perf_event *event, int mode) +{ + struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); + unsigned long rmid = event->hw.cqm_rmid; + unsigned long flags; + + if (!(event->hw.cqm_state & PERF_HES_STOPPED)) + return; + + event->hw.cqm_state &= ~PERF_HES_STOPPED; + + raw_spin_lock_irqsave(&state->lock, flags); + + if (state->cnt++) + WARN_ON_ONCE(state->rmid != rmid); + else + WARN_ON_ONCE(state->rmid); + + state->rmid = rmid; + wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid); + + raw_spin_unlock_irqrestore(&state->lock, flags); +} + +static void intel_cqm_event_stop(struct perf_event *event, int mode) +{ + struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); + unsigned long flags; + + if (event->hw.cqm_state & PERF_HES_STOPPED) + return; + + event->hw.cqm_state |= PERF_HES_STOPPED; + + raw_spin_lock_irqsave(&state->lock, flags); + intel_cqm_event_read(event); + + if (!--state->cnt) { + state->rmid = 0; + wrmsrl(MSR_IA32_PQR_ASSOC, 0); + } else { + WARN_ON_ONCE(!state->rmid); + } + + raw_spin_unlock_irqrestore(&state->lock, flags); +} + +static int intel_cqm_event_add(struct perf_event *event, int mode) +{ + int rmid; + + event->hw.cqm_state = PERF_HES_STOPPED; + rmid = event->hw.cqm_rmid; + WARN_ON_ONCE(!rmid); + + if (mode & PERF_EF_START) + intel_cqm_event_start(event, mode); + + return 0; +} + +static void intel_cqm_event_del(struct perf_event *event, int mode) +{ + intel_cqm_event_stop(event, mode); +} + +static void intel_cqm_event_destroy(struct perf_event *event) +{ + struct perf_event *group_other = NULL; + + mutex_lock(&cache_mutex); + + /* + * If there's another event in this group... + */ + if (!list_empty(&event->hw.cqm_group_entry)) { + group_other = list_first_entry(&event->hw.cqm_group_entry, + struct perf_event, + hw.cqm_group_entry); + list_del(&event->hw.cqm_group_entry); + } + + /* + * And we're the group leader.. + */ + if (!list_empty(&event->hw.cqm_groups_entry)) { + /* + * If there was a group_other, make that leader, otherwise + * destroy the group and return the RMID. + */ + if (group_other) { + list_replace(&event->hw.cqm_groups_entry, + &group_other->hw.cqm_groups_entry); + } else { + int rmid = event->hw.cqm_rmid; + + __put_rmid(rmid); + list_del(&event->hw.cqm_groups_entry); + } + } + + mutex_unlock(&cache_mutex); +} + +static struct pmu intel_cqm_pmu; + +/* + * XXX there's a bit of a problem in that we cannot simply do the one + * event per node as one would want, since that one event would one get + * scheduled on the one cpu. But we want to 'schedule' the RMID on all + * CPUs. + * + * This means we want events for each CPU, however, that generates a lot + * of duplicate values out to userspace -- this is not to be helped + * unless we want to change the core code in some way. Fore more info, + * see intel_cqm_event_read(). + */ +static int intel_cqm_event_init(struct perf_event *event) +{ + struct perf_event *group = NULL; + int err; + + if (event->attr.type != intel_cqm_pmu.type) + return -ENOENT; + + if (event->attr.config & ~QOS_EVENT_MASK) + return -EINVAL; + + if (event->cpu == -1) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + INIT_LIST_HEAD(&event->hw.cqm_group_entry); + INIT_LIST_HEAD(&event->hw.cqm_groups_entry); + + event->destroy = intel_cqm_event_destroy; + + mutex_lock(&cache_mutex); + + err = intel_cqm_setup_event(event, &group); /* will also set rmid */ + if (err) + goto out; + + if (group) { + list_add_tail(&event->hw.cqm_group_entry, + &group->hw.cqm_group_entry); + } else { + list_add_tail(&event->hw.cqm_groups_entry, + &cache_groups); + } + +out: + mutex_unlock(&cache_mutex); + return err; +} + +EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01"); +EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1"); +EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes"); +EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL); +EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1"); + +static struct attribute *intel_cqm_events_attr[] = { + EVENT_PTR(intel_cqm_llc), + EVENT_PTR(intel_cqm_llc_pkg), + EVENT_PTR(intel_cqm_llc_unit), + EVENT_PTR(intel_cqm_llc_scale), + EVENT_PTR(intel_cqm_llc_snapshot), + NULL, +}; + +static struct attribute_group intel_cqm_events_group = { + .name = "events", + .attrs = intel_cqm_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); +static struct attribute *intel_cqm_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group intel_cqm_format_group = { + .name = "format", + .attrs = intel_cqm_formats_attr, +}; + +static const struct attribute_group *intel_cqm_attr_groups[] = { + &intel_cqm_events_group, + &intel_cqm_format_group, + NULL, +}; + +static struct pmu intel_cqm_pmu = { + .attr_groups = intel_cqm_attr_groups, + .task_ctx_nr = perf_sw_context, + .event_init = intel_cqm_event_init, + .add = intel_cqm_event_add, + .del = intel_cqm_event_del, + .start = intel_cqm_event_start, + .stop = intel_cqm_event_stop, + .read = intel_cqm_event_read, +}; + +static inline void cqm_pick_event_reader(int cpu) +{ + int phys_id = topology_physical_package_id(cpu); + int i; + + for_each_cpu(i, &cqm_cpumask) { + if (phys_id == topology_physical_package_id(i)) + return; /* already got reader for this socket */ + } + + cpumask_set_cpu(cpu, &cqm_cpumask); +} + +static void intel_cqm_cpu_prepare(unsigned int cpu) +{ + struct intel_cqm_state *state = &per_cpu(cqm_state, cpu); + struct cpuinfo_x86 *c = &cpu_data(cpu); + + raw_spin_lock_init(&state->lock); + state->rmid = 0; + state->cnt = 0; + + WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); + WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); +} + +static void intel_cqm_cpu_exit(unsigned int cpu) +{ + int phys_id = topology_physical_package_id(cpu); + int i; + + /* + * Is @cpu a designated cqm reader? + */ + if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) + return; + + for_each_online_cpu(i) { + if (i == cpu) + continue; + + if (phys_id == topology_physical_package_id(i)) { + cpumask_set_cpu(i, &cqm_cpumask); + break; + } + } +} + +static int intel_cqm_cpu_notifier(struct notifier_block *nb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + intel_cqm_cpu_prepare(cpu); + break; + case CPU_DOWN_PREPARE: + intel_cqm_cpu_exit(cpu); + break; + case CPU_STARTING: + cqm_pick_event_reader(cpu); + break; + } + + return NOTIFY_OK; +} + +static const struct x86_cpu_id intel_cqm_match[] = { + { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC }, + {} +}; + +static int __init intel_cqm_init(void) +{ + char *str, scale[20]; + int i, cpu, ret; + + if (!x86_match_cpu(intel_cqm_match)) + return -ENODEV; + + cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale; + + /* + * It's possible that not all resources support the same number + * of RMIDs. Instead of making scheduling much more complicated + * (where we have to match a task's RMID to a cpu that supports + * that many RMIDs) just find the minimum RMIDs supported across + * all cpus. + * + * Also, check that the scales match on all cpus. + */ + cpu_notifier_register_begin(); + + for_each_online_cpu(cpu) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->x86_cache_max_rmid < cqm_max_rmid) + cqm_max_rmid = c->x86_cache_max_rmid; + + if (c->x86_cache_occ_scale != cqm_l3_scale) { + pr_err("Multiple LLC scale values, disabling\n"); + ret = -EINVAL; + goto out; + } + } + + snprintf(scale, sizeof(scale), "%u", cqm_l3_scale); + str = kstrdup(scale, GFP_KERNEL); + if (!str) { + ret = -ENOMEM; + goto out; + } + + event_attr_intel_cqm_llc_scale.event_str = str; + + ret = intel_cqm_setup_rmid_cache(); + if (ret) + goto out; + + for_each_online_cpu(i) { + intel_cqm_cpu_prepare(i); + cqm_pick_event_reader(i); + } + + __perf_cpu_notifier(intel_cqm_cpu_notifier); + + ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1); + + if (ret) + pr_err("Intel CQM perf registration failed: %d\n", ret); + else + pr_info("Intel CQM monitoring enabled\n"); + +out: + cpu_notifier_register_done(); + + return ret; +} +device_initcall(intel_cqm_init); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9fc9b0d..ca5504c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -123,6 +123,13 @@ struct hw_perf_event { /* for tp_event->class */ struct list_head tp_list; }; + struct { /* intel_cqm */ + int cqm_state; + int cqm_rmid; + struct list_head cqm_events_entry; + struct list_head cqm_groups_entry; + struct list_head cqm_group_entry; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ /* -- cgit v0.10.2 From 35298e554c74b7849875e3676ba8eaf833c7b917 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:45 +0000 Subject: perf/x86/intel: Implement LRU monitoring ID allocation for CQM It's possible to run into issues with re-using unused monitoring IDs because there may be stale cachelines associated with that ID from a previous allocation. This can cause the LLC occupancy values to be inaccurate. To attempt to mitigate this problem we place the IDs on a least recently used list, essentially a FIFO. The basic idea is that the longer the time period between ID re-use the lower the probability that stale cachelines exist in the cache. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-7-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 05b4cd2..b5d9d74 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -25,7 +25,7 @@ struct intel_cqm_state { static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); /* - * Protects cache_cgroups. + * Protects cache_cgroups and cqm_rmid_lru. */ static DEFINE_MUTEX(cache_mutex); @@ -64,36 +64,120 @@ static u64 __rmid_read(unsigned long rmid) return val; } -static unsigned long *cqm_rmid_bitmap; +struct cqm_rmid_entry { + u64 rmid; + struct list_head list; +}; + +/* + * A least recently used list of RMIDs. + * + * Oldest entry at the head, newest (most recently used) entry at the + * tail. This list is never traversed, it's only used to keep track of + * the lru order. That is, we only pick entries of the head or insert + * them on the tail. + * + * All entries on the list are 'free', and their RMIDs are not currently + * in use. To mark an RMID as in use, remove its entry from the lru + * list. + * + * This list is protected by cache_mutex. + */ +static LIST_HEAD(cqm_rmid_lru); + +/* + * We use a simple array of pointers so that we can lookup a struct + * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid() + * and __put_rmid() from having to worry about dealing with struct + * cqm_rmid_entry - they just deal with rmids, i.e. integers. + * + * Once this array is initialized it is read-only. No locks are required + * to access it. + * + * All entries for all RMIDs can be looked up in the this array at all + * times. + */ +static struct cqm_rmid_entry **cqm_rmid_ptrs; + +static inline struct cqm_rmid_entry *__rmid_entry(int rmid) +{ + struct cqm_rmid_entry *entry; + + entry = cqm_rmid_ptrs[rmid]; + WARN_ON(entry->rmid != rmid); + + return entry; +} /* * Returns < 0 on fail. + * + * We expect to be called with cache_mutex held. */ static int __get_rmid(void) { - return bitmap_find_free_region(cqm_rmid_bitmap, cqm_max_rmid, 0); + struct cqm_rmid_entry *entry; + + lockdep_assert_held(&cache_mutex); + + if (list_empty(&cqm_rmid_lru)) + return -EAGAIN; + + entry = list_first_entry(&cqm_rmid_lru, struct cqm_rmid_entry, list); + list_del(&entry->list); + + return entry->rmid; } static void __put_rmid(int rmid) { - bitmap_release_region(cqm_rmid_bitmap, rmid, 0); + struct cqm_rmid_entry *entry; + + lockdep_assert_held(&cache_mutex); + + entry = __rmid_entry(rmid); + + list_add_tail(&entry->list, &cqm_rmid_lru); } static int intel_cqm_setup_rmid_cache(void) { - cqm_rmid_bitmap = kmalloc(sizeof(long) * BITS_TO_LONGS(cqm_max_rmid), GFP_KERNEL); - if (!cqm_rmid_bitmap) + struct cqm_rmid_entry *entry; + int r; + + cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) * + (cqm_max_rmid + 1), GFP_KERNEL); + if (!cqm_rmid_ptrs) return -ENOMEM; - bitmap_zero(cqm_rmid_bitmap, cqm_max_rmid); + for (r = 0; r <= cqm_max_rmid; r++) { + struct cqm_rmid_entry *entry; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto fail; + + INIT_LIST_HEAD(&entry->list); + entry->rmid = r; + cqm_rmid_ptrs[r] = entry; + + list_add_tail(&entry->list, &cqm_rmid_lru); + } /* * RMID 0 is special and is always allocated. It's used for all * tasks that are not monitored. */ - bitmap_allocate_region(cqm_rmid_bitmap, 0, 0); + entry = __rmid_entry(0); + list_del(&entry->list); return 0; +fail: + while (r--) + kfree(cqm_rmid_ptrs[r]); + + kfree(cqm_rmid_ptrs); + return -ENOMEM; } /* -- cgit v0.10.2 From bfe1fcd2688f557a6b6a88f59ea7619228728bd7 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:46 +0000 Subject: perf/x86/intel: Support task events with Intel CQM Add support for task events as well as system-wide events. This change has a big impact on the way that we gather LLC occupancy values in intel_cqm_event_read(). Currently, for system-wide (per-cpu) events we defer processing to userspace which knows how to discard all but one cpu result per package. Things aren't so simple for task events because we need to do the value aggregation ourselves. To do this, we defer updating the LLC occupancy value in event->count from intel_cqm_event_read() and do an SMP cross-call to read values for all packages in intel_cqm_event_count(). We need to ensure that we only do this for one task event per cache group, otherwise we'll report duplicate values. If we're a system-wide event we want to fallback to the default perf_event_count() implementation. Refactor this into a common function so that we don't duplicate the code. Also, introduce PERF_TYPE_INTEL_CQM, since we need a way to track an event's task (if the event isn't per-cpu) inside of the Intel CQM PMU driver. This task information is only availble in the upper layers of the perf infrastructure. Other perf backends stash the target task in event->hw.*target so we need to do something similar. The task is used to determine whether events should share a cache group and an RMID. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/1422038748-21397-8-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index b5d9d74..8003d87 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -182,23 +182,124 @@ fail: /* * Determine if @a and @b measure the same set of tasks. + * + * If @a and @b measure the same set of tasks then we want to share a + * single RMID. */ static bool __match_event(struct perf_event *a, struct perf_event *b) { + /* Per-cpu and task events don't mix */ if ((a->attach_state & PERF_ATTACH_TASK) != (b->attach_state & PERF_ATTACH_TASK)) return false; - /* not task */ +#ifdef CONFIG_CGROUP_PERF + if (a->cgrp != b->cgrp) + return false; +#endif + + /* If not task event, we're machine wide */ + if (!(b->attach_state & PERF_ATTACH_TASK)) + return true; + + /* + * Events that target same task are placed into the same cache group. + */ + if (a->hw.cqm_target == b->hw.cqm_target) + return true; + + /* + * Are we an inherited event? + */ + if (b->parent == a) + return true; + + return false; +} + +#ifdef CONFIG_CGROUP_PERF +static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event) +{ + if (event->attach_state & PERF_ATTACH_TASK) + return perf_cgroup_from_task(event->hw.cqm_target); - return true; /* if not task, we're machine wide */ + return event->cgrp; } +#endif /* * Determine if @a's tasks intersect with @b's tasks + * + * There are combinations of events that we explicitly prohibit, + * + * PROHIBITS + * system-wide -> cgroup and task + * cgroup -> system-wide + * -> task in cgroup + * task -> system-wide + * -> task in cgroup + * + * Call this function before allocating an RMID. */ static bool __conflict_event(struct perf_event *a, struct perf_event *b) { +#ifdef CONFIG_CGROUP_PERF + /* + * We can have any number of cgroups but only one system-wide + * event at a time. + */ + if (a->cgrp && b->cgrp) { + struct perf_cgroup *ac = a->cgrp; + struct perf_cgroup *bc = b->cgrp; + + /* + * This condition should have been caught in + * __match_event() and we should be sharing an RMID. + */ + WARN_ON_ONCE(ac == bc); + + if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) || + cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup)) + return true; + + return false; + } + + if (a->cgrp || b->cgrp) { + struct perf_cgroup *ac, *bc; + + /* + * cgroup and system-wide events are mutually exclusive + */ + if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) || + (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK))) + return true; + + /* + * Ensure neither event is part of the other's cgroup + */ + ac = event_to_cgroup(a); + bc = event_to_cgroup(b); + if (ac == bc) + return true; + + /* + * Must have cgroup and non-intersecting task events. + */ + if (!ac || !bc) + return false; + + /* + * We have cgroup and task events, and the task belongs + * to a cgroup. Check for for overlap. + */ + if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) || + cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup)) + return true; + + return false; + } +#endif /* * If one of them is not a task, same story as above with cgroups. */ @@ -245,9 +346,16 @@ static int intel_cqm_setup_event(struct perf_event *event, static void intel_cqm_event_read(struct perf_event *event) { - unsigned long rmid = event->hw.cqm_rmid; + unsigned long rmid; u64 val; + /* + * Task events are handled by intel_cqm_event_count(). + */ + if (event->cpu == -1) + return; + + rmid = event->hw.cqm_rmid; val = __rmid_read(rmid); /* @@ -259,6 +367,63 @@ static void intel_cqm_event_read(struct perf_event *event) local64_set(&event->count, val); } +struct rmid_read { + unsigned int rmid; + atomic64_t value; +}; + +static void __intel_cqm_event_count(void *info) +{ + struct rmid_read *rr = info; + u64 val; + + val = __rmid_read(rr->rmid); + + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return; + + atomic64_add(val, &rr->value); +} + +static inline bool cqm_group_leader(struct perf_event *event) +{ + return !list_empty(&event->hw.cqm_groups_entry); +} + +static u64 intel_cqm_event_count(struct perf_event *event) +{ + struct rmid_read rr = { + .rmid = event->hw.cqm_rmid, + .value = ATOMIC64_INIT(0), + }; + + /* + * We only need to worry about task events. System-wide events + * are handled like usual, i.e. entirely with + * intel_cqm_event_read(). + */ + if (event->cpu != -1) + return __perf_event_count(event); + + /* + * Only the group leader gets to report values. This stops us + * reporting duplicate values to userspace, and gives us a clear + * rule for which task gets to report the values. + * + * Note that it is impossible to attribute these values to + * specific packages - we forfeit that ability when we create + * task events. + */ + if (!cqm_group_leader(event)) + return 0; + + on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); + + local64_set(&event->count, atomic64_read(&rr.value)); + + return __perf_event_count(event); +} + static void intel_cqm_event_start(struct perf_event *event, int mode) { struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); @@ -344,7 +509,7 @@ static void intel_cqm_event_destroy(struct perf_event *event) /* * And we're the group leader.. */ - if (!list_empty(&event->hw.cqm_groups_entry)) { + if (cqm_group_leader(event)) { /* * If there was a group_other, make that leader, otherwise * destroy the group and return the RMID. @@ -365,17 +530,6 @@ static void intel_cqm_event_destroy(struct perf_event *event) static struct pmu intel_cqm_pmu; -/* - * XXX there's a bit of a problem in that we cannot simply do the one - * event per node as one would want, since that one event would one get - * scheduled on the one cpu. But we want to 'schedule' the RMID on all - * CPUs. - * - * This means we want events for each CPU, however, that generates a lot - * of duplicate values out to userspace -- this is not to be helped - * unless we want to change the core code in some way. Fore more info, - * see intel_cqm_event_read(). - */ static int intel_cqm_event_init(struct perf_event *event) { struct perf_event *group = NULL; @@ -387,9 +541,6 @@ static int intel_cqm_event_init(struct perf_event *event) if (event->attr.config & ~QOS_EVENT_MASK) return -EINVAL; - if (event->cpu == -1) - return -EINVAL; - /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || @@ -407,7 +558,8 @@ static int intel_cqm_event_init(struct perf_event *event) mutex_lock(&cache_mutex); - err = intel_cqm_setup_event(event, &group); /* will also set rmid */ + /* Will also set rmid */ + err = intel_cqm_setup_event(event, &group); if (err) goto out; @@ -470,6 +622,7 @@ static struct pmu intel_cqm_pmu = { .start = intel_cqm_event_start, .stop = intel_cqm_event_stop, .read = intel_cqm_event_read, + .count = intel_cqm_event_count, }; static inline void cqm_pick_event_reader(int cpu) @@ -599,8 +752,8 @@ static int __init intel_cqm_init(void) __perf_cpu_notifier(intel_cqm_cpu_notifier); - ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1); - + ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", + PERF_TYPE_INTEL_CQM); if (ret) pr_err("Intel CQM perf registration failed: %d\n", ret); else diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ca5504c..dac4c28 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -129,6 +129,7 @@ struct hw_perf_event { struct list_head cqm_events_entry; struct list_head cqm_groups_entry; struct list_head cqm_group_entry; + struct task_struct *cqm_target; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1e3cd07..3c8b45d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -32,6 +32,7 @@ enum perf_type_id { PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, PERF_TYPE_BREAKPOINT = 5, + PERF_TYPE_INTEL_CQM = 6, PERF_TYPE_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 1fc3bae..71109a0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7181,6 +7181,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, else if (attr->type == PERF_TYPE_BREAKPOINT) event->hw.bp_target = task; #endif + else if (attr->type == PERF_TYPE_INTEL_CQM) + event->hw.cqm_target = task; } if (!overflow_handler && parent_event) { -- cgit v0.10.2 From bff671dba7981195a644a5dc210d65de8ae2d251 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:47 +0000 Subject: perf/x86/intel: Perform rotation on Intel CQM RMIDs There are many use cases where people will want to monitor more tasks than there exist RMIDs in the hardware, meaning that we have to perform some kind of multiplexing. We do this by "rotating" the RMIDs in a workqueue, and assigning an RMID to a waiting event when the RMID becomes unused. This scheme reserves one RMID at all times for rotation. When we need to schedule a new event we give it the reserved RMID, pick a victim event from the front of the global CQM list and wait for the victim's RMID to drop to zero occupancy, before it becomes the new reserved RMID. We put the victim's RMID onto the limbo list, where it resides for a "minimum queue time", which is intended to save ourselves an expensive smp IPI when the RMID is unlikely to have a occupancy value below __intel_cqm_threshold. If we fail to recycle an RMID, even after waiting the minimum queue time then we need to increment __intel_cqm_threshold. There is an upper bound on this threshold, __intel_cqm_max_threshold, which is programmable from userland as /sys/devices/intel_cqm/max_recycling_threshold. The comments above __intel_cqm_rmid_rotate() have more details. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-9-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 8003d87..e31f508 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -25,9 +25,13 @@ struct intel_cqm_state { static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); /* - * Protects cache_cgroups and cqm_rmid_lru. + * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. + * Also protects event->hw.cqm_rmid + * + * Hold either for stability, both for modification of ->hw.cqm_rmid. */ static DEFINE_MUTEX(cache_mutex); +static DEFINE_RAW_SPINLOCK(cache_lock); /* * Groups of events that have the same target(s), one RMID per group. @@ -46,7 +50,34 @@ static cpumask_t cqm_cpumask; #define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID -static u64 __rmid_read(unsigned long rmid) +/* + * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). + * + * This rmid is always free and is guaranteed to have an associated + * near-zero occupancy value, i.e. no cachelines are tagged with this + * RMID, once __intel_cqm_rmid_rotate() returns. + */ +static unsigned int intel_cqm_rotation_rmid; + +#define INVALID_RMID (-1) + +/* + * Is @rmid valid for programming the hardware? + * + * rmid 0 is reserved by the hardware for all non-monitored tasks, which + * means that we should never come across an rmid with that value. + * Likewise, an rmid value of -1 is used to indicate "no rmid currently + * assigned" and is used as part of the rotation code. + */ +static inline bool __rmid_valid(unsigned int rmid) +{ + if (!rmid || rmid == INVALID_RMID) + return false; + + return true; +} + +static u64 __rmid_read(unsigned int rmid) { u64 val; @@ -64,13 +95,21 @@ static u64 __rmid_read(unsigned long rmid) return val; } +enum rmid_recycle_state { + RMID_YOUNG = 0, + RMID_AVAILABLE, + RMID_DIRTY, +}; + struct cqm_rmid_entry { - u64 rmid; + unsigned int rmid; + enum rmid_recycle_state state; struct list_head list; + unsigned long queue_time; }; /* - * A least recently used list of RMIDs. + * cqm_rmid_free_lru - A least recently used list of RMIDs. * * Oldest entry at the head, newest (most recently used) entry at the * tail. This list is never traversed, it's only used to keep track of @@ -81,9 +120,18 @@ struct cqm_rmid_entry { * in use. To mark an RMID as in use, remove its entry from the lru * list. * - * This list is protected by cache_mutex. + * + * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs. + * + * This list is contains RMIDs that no one is currently using but that + * may have a non-zero occupancy value associated with them. The + * rotation worker moves RMIDs from the limbo list to the free list once + * the occupancy value drops below __intel_cqm_threshold. + * + * Both lists are protected by cache_mutex. */ -static LIST_HEAD(cqm_rmid_lru); +static LIST_HEAD(cqm_rmid_free_lru); +static LIST_HEAD(cqm_rmid_limbo_lru); /* * We use a simple array of pointers so that we can lookup a struct @@ -120,37 +168,43 @@ static int __get_rmid(void) lockdep_assert_held(&cache_mutex); - if (list_empty(&cqm_rmid_lru)) - return -EAGAIN; + if (list_empty(&cqm_rmid_free_lru)) + return INVALID_RMID; - entry = list_first_entry(&cqm_rmid_lru, struct cqm_rmid_entry, list); + entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list); list_del(&entry->list); return entry->rmid; } -static void __put_rmid(int rmid) +static void __put_rmid(unsigned int rmid) { struct cqm_rmid_entry *entry; lockdep_assert_held(&cache_mutex); + WARN_ON(!__rmid_valid(rmid)); entry = __rmid_entry(rmid); - list_add_tail(&entry->list, &cqm_rmid_lru); + entry->queue_time = jiffies; + entry->state = RMID_YOUNG; + + list_add_tail(&entry->list, &cqm_rmid_limbo_lru); } static int intel_cqm_setup_rmid_cache(void) { struct cqm_rmid_entry *entry; - int r; + unsigned int nr_rmids; + int r = 0; + nr_rmids = cqm_max_rmid + 1; cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) * - (cqm_max_rmid + 1), GFP_KERNEL); + nr_rmids, GFP_KERNEL); if (!cqm_rmid_ptrs) return -ENOMEM; - for (r = 0; r <= cqm_max_rmid; r++) { + for (; r <= cqm_max_rmid; r++) { struct cqm_rmid_entry *entry; entry = kmalloc(sizeof(*entry), GFP_KERNEL); @@ -161,7 +215,7 @@ static int intel_cqm_setup_rmid_cache(void) entry->rmid = r; cqm_rmid_ptrs[r] = entry; - list_add_tail(&entry->list, &cqm_rmid_lru); + list_add_tail(&entry->list, &cqm_rmid_free_lru); } /* @@ -171,6 +225,10 @@ static int intel_cqm_setup_rmid_cache(void) entry = __rmid_entry(0); list_del(&entry->list); + mutex_lock(&cache_mutex); + intel_cqm_rotation_rmid = __get_rmid(); + mutex_unlock(&cache_mutex); + return 0; fail: while (r--) @@ -313,6 +371,424 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b) return false; } +struct rmid_read { + unsigned int rmid; + atomic64_t value; +}; + +static void __intel_cqm_event_count(void *info); + +/* + * Exchange the RMID of a group of events. + */ +static unsigned int +intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid) +{ + struct perf_event *event; + unsigned int old_rmid = group->hw.cqm_rmid; + struct list_head *head = &group->hw.cqm_group_entry; + + lockdep_assert_held(&cache_mutex); + + /* + * If our RMID is being deallocated, perform a read now. + */ + if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) { + struct rmid_read rr = { + .value = ATOMIC64_INIT(0), + .rmid = old_rmid, + }; + + on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, + &rr, 1); + local64_set(&group->count, atomic64_read(&rr.value)); + } + + raw_spin_lock_irq(&cache_lock); + + group->hw.cqm_rmid = rmid; + list_for_each_entry(event, head, hw.cqm_group_entry) + event->hw.cqm_rmid = rmid; + + raw_spin_unlock_irq(&cache_lock); + + return old_rmid; +} + +/* + * If we fail to assign a new RMID for intel_cqm_rotation_rmid because + * cachelines are still tagged with RMIDs in limbo, we progressively + * increment the threshold until we find an RMID in limbo with <= + * __intel_cqm_threshold lines tagged. This is designed to mitigate the + * problem where cachelines tagged with an RMID are not steadily being + * evicted. + * + * On successful rotations we decrease the threshold back towards zero. + * + * __intel_cqm_max_threshold provides an upper bound on the threshold, + * and is measured in bytes because it's exposed to userland. + */ +static unsigned int __intel_cqm_threshold; +static unsigned int __intel_cqm_max_threshold; + +/* + * Test whether an RMID has a zero occupancy value on this cpu. + */ +static void intel_cqm_stable(void *arg) +{ + struct cqm_rmid_entry *entry; + + list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) { + if (entry->state != RMID_AVAILABLE) + break; + + if (__rmid_read(entry->rmid) > __intel_cqm_threshold) + entry->state = RMID_DIRTY; + } +} + +/* + * If we have group events waiting for an RMID that don't conflict with + * events already running, assign @rmid. + */ +static bool intel_cqm_sched_in_event(unsigned int rmid) +{ + struct perf_event *leader, *event; + + lockdep_assert_held(&cache_mutex); + + leader = list_first_entry(&cache_groups, struct perf_event, + hw.cqm_groups_entry); + event = leader; + + list_for_each_entry_continue(event, &cache_groups, + hw.cqm_groups_entry) { + if (__rmid_valid(event->hw.cqm_rmid)) + continue; + + if (__conflict_event(event, leader)) + continue; + + intel_cqm_xchg_rmid(event, rmid); + return true; + } + + return false; +} + +/* + * Initially use this constant for both the limbo queue time and the + * rotation timer interval, pmu::hrtimer_interval_ms. + * + * They don't need to be the same, but the two are related since if you + * rotate faster than you recycle RMIDs, you may run out of available + * RMIDs. + */ +#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */ + +static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME; + +/* + * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list + * @nr_available: number of freeable RMIDs on the limbo list + * + * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no + * cachelines are tagged with those RMIDs. After this we can reuse them + * and know that the current set of active RMIDs is stable. + * + * Return %true or %false depending on whether stabilization needs to be + * reattempted. + * + * If we return %true then @nr_available is updated to indicate the + * number of RMIDs on the limbo list that have been queued for the + * minimum queue time (RMID_AVAILABLE), but whose data occupancy values + * are above __intel_cqm_threshold. + */ +static bool intel_cqm_rmid_stabilize(unsigned int *available) +{ + struct cqm_rmid_entry *entry, *tmp; + struct perf_event *event; + + lockdep_assert_held(&cache_mutex); + + *available = 0; + list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) { + unsigned long min_queue_time; + unsigned long now = jiffies; + + /* + * We hold RMIDs placed into limbo for a minimum queue + * time. Before the minimum queue time has elapsed we do + * not recycle RMIDs. + * + * The reasoning is that until a sufficient time has + * passed since we stopped using an RMID, any RMID + * placed onto the limbo list will likely still have + * data tagged in the cache, which means we'll probably + * fail to recycle it anyway. + * + * We can save ourselves an expensive IPI by skipping + * any RMIDs that have not been queued for the minimum + * time. + */ + min_queue_time = entry->queue_time + + msecs_to_jiffies(__rmid_queue_time_ms); + + if (time_after(min_queue_time, now)) + break; + + entry->state = RMID_AVAILABLE; + (*available)++; + } + + /* + * Fast return if none of the RMIDs on the limbo list have been + * sitting on the queue for the minimum queue time. + */ + if (!*available) + return false; + + /* + * Test whether an RMID is free for each package. + */ + on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true); + + list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) { + /* + * Exhausted all RMIDs that have waited min queue time. + */ + if (entry->state == RMID_YOUNG) + break; + + if (entry->state == RMID_DIRTY) + continue; + + list_del(&entry->list); /* remove from limbo */ + + /* + * The rotation RMID gets priority if it's + * currently invalid. In which case, skip adding + * the RMID to the the free lru. + */ + if (!__rmid_valid(intel_cqm_rotation_rmid)) { + intel_cqm_rotation_rmid = entry->rmid; + continue; + } + + /* + * If we have groups waiting for RMIDs, hand + * them one now. + */ + list_for_each_entry(event, &cache_groups, + hw.cqm_groups_entry) { + if (__rmid_valid(event->hw.cqm_rmid)) + continue; + + intel_cqm_xchg_rmid(event, entry->rmid); + entry = NULL; + break; + } + + if (!entry) + continue; + + /* + * Otherwise place it onto the free list. + */ + list_add_tail(&entry->list, &cqm_rmid_free_lru); + } + + + return __rmid_valid(intel_cqm_rotation_rmid); +} + +/* + * Pick a victim group and move it to the tail of the group list. + */ +static struct perf_event * +__intel_cqm_pick_and_rotate(void) +{ + struct perf_event *rotor; + + lockdep_assert_held(&cache_mutex); + lockdep_assert_held(&cache_lock); + + rotor = list_first_entry(&cache_groups, struct perf_event, + hw.cqm_groups_entry); + list_rotate_left(&cache_groups); + + return rotor; +} + +/* + * Attempt to rotate the groups and assign new RMIDs. + * + * Rotating RMIDs is complicated because the hardware doesn't give us + * any clues. + * + * There's problems with the hardware interface; when you change the + * task:RMID map cachelines retain their 'old' tags, giving a skewed + * picture. In order to work around this, we must always keep one free + * RMID - intel_cqm_rotation_rmid. + * + * Rotation works by taking away an RMID from a group (the old RMID), + * and assigning the free RMID to another group (the new RMID). We must + * then wait for the old RMID to not be used (no cachelines tagged). + * This ensure that all cachelines are tagged with 'active' RMIDs. At + * this point we can start reading values for the new RMID and treat the + * old RMID as the free RMID for the next rotation. + * + * Return %true or %false depending on whether we did any rotating. + */ +static bool __intel_cqm_rmid_rotate(void) +{ + struct perf_event *group, *rotor, *start = NULL; + unsigned int threshold_limit; + unsigned int nr_needed = 0; + unsigned int nr_available; + unsigned int rmid; + bool rotated = false; + + mutex_lock(&cache_mutex); + +again: + /* + * Fast path through this function if there are no groups and no + * RMIDs that need cleaning. + */ + if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru)) + goto out; + + list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) { + if (!__rmid_valid(group->hw.cqm_rmid)) { + if (!start) + start = group; + nr_needed++; + } + } + + /* + * We have some event groups, but they all have RMIDs assigned + * and no RMIDs need cleaning. + */ + if (!nr_needed && list_empty(&cqm_rmid_limbo_lru)) + goto out; + + if (!nr_needed) + goto stabilize; + + /* + * We have more event groups without RMIDs than available RMIDs. + * + * We force deallocate the rmid of the group at the head of + * cache_groups. The first event group without an RMID then gets + * assigned intel_cqm_rotation_rmid. This ensures we always make + * forward progress. + * + * Rotate the cache_groups list so the previous head is now the + * tail. + */ + rotor = __intel_cqm_pick_and_rotate(); + rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID); + + /* + * The group at the front of the list should always have a valid + * RMID. If it doesn't then no groups have RMIDs assigned. + */ + if (!__rmid_valid(rmid)) + goto stabilize; + + /* + * If the rotation is going to succeed, reduce the threshold so + * that we don't needlessly reuse dirty RMIDs. + */ + if (__rmid_valid(intel_cqm_rotation_rmid)) { + intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid); + intel_cqm_rotation_rmid = INVALID_RMID; + + if (__intel_cqm_threshold) + __intel_cqm_threshold--; + } + + __put_rmid(rmid); + + rotated = true; + +stabilize: + /* + * We now need to stablize the RMID we freed above (if any) to + * ensure that the next time we rotate we have an RMID with zero + * occupancy value. + * + * Alternatively, if we didn't need to perform any rotation, + * we'll have a bunch of RMIDs in limbo that need stabilizing. + */ + threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale; + + while (intel_cqm_rmid_stabilize(&nr_available) && + __intel_cqm_threshold < threshold_limit) { + unsigned int steal_limit; + + /* + * Don't spin if nobody is actively waiting for an RMID, + * the rotation worker will be kicked as soon as an + * event needs an RMID anyway. + */ + if (!nr_needed) + break; + + /* Allow max 25% of RMIDs to be in limbo. */ + steal_limit = (cqm_max_rmid + 1) / 4; + + /* + * We failed to stabilize any RMIDs so our rotation + * logic is now stuck. In order to make forward progress + * we have a few options: + * + * 1. rotate ("steal") another RMID + * 2. increase the threshold + * 3. do nothing + * + * We do both of 1. and 2. until we hit the steal limit. + * + * The steal limit prevents all RMIDs ending up on the + * limbo list. This can happen if every RMID has a + * non-zero occupancy above threshold_limit, and the + * occupancy values aren't dropping fast enough. + * + * Note that there is prioritisation at work here - we'd + * rather increase the number of RMIDs on the limbo list + * than increase the threshold, because increasing the + * threshold skews the event data (because we reuse + * dirty RMIDs) - threshold bumps are a last resort. + */ + if (nr_available < steal_limit) + goto again; + + __intel_cqm_threshold++; + } + +out: + mutex_unlock(&cache_mutex); + return rotated; +} + +static void intel_cqm_rmid_rotate(struct work_struct *work); + +static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate); + +static struct pmu intel_cqm_pmu; + +static void intel_cqm_rmid_rotate(struct work_struct *work) +{ + unsigned long delay; + + __intel_cqm_rmid_rotate(); + + delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms); + schedule_delayed_work(&intel_cqm_rmid_work, delay); +} + /* * Find a group and setup RMID. * @@ -322,7 +798,6 @@ static int intel_cqm_setup_event(struct perf_event *event, struct perf_event **group) { struct perf_event *iter; - int rmid; list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { if (__match_event(iter, event)) { @@ -336,17 +811,14 @@ static int intel_cqm_setup_event(struct perf_event *event, return -EBUSY; } - rmid = __get_rmid(); - if (rmid < 0) - return rmid; - - event->hw.cqm_rmid = rmid; + event->hw.cqm_rmid = __get_rmid(); return 0; } static void intel_cqm_event_read(struct perf_event *event) { - unsigned long rmid; + unsigned long flags; + unsigned int rmid; u64 val; /* @@ -355,23 +827,25 @@ static void intel_cqm_event_read(struct perf_event *event) if (event->cpu == -1) return; + raw_spin_lock_irqsave(&cache_lock, flags); rmid = event->hw.cqm_rmid; + + if (!__rmid_valid(rmid)) + goto out; + val = __rmid_read(rmid); /* * Ignore this reading on error states and do not update the value. */ if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) - return; + goto out; local64_set(&event->count, val); +out: + raw_spin_unlock_irqrestore(&cache_lock, flags); } -struct rmid_read { - unsigned int rmid; - atomic64_t value; -}; - static void __intel_cqm_event_count(void *info) { struct rmid_read *rr = info; @@ -392,8 +866,8 @@ static inline bool cqm_group_leader(struct perf_event *event) static u64 intel_cqm_event_count(struct perf_event *event) { + unsigned long flags; struct rmid_read rr = { - .rmid = event->hw.cqm_rmid, .value = ATOMIC64_INIT(0), }; @@ -417,17 +891,36 @@ static u64 intel_cqm_event_count(struct perf_event *event) if (!cqm_group_leader(event)) return 0; - on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); + /* + * Notice that we don't perform the reading of an RMID + * atomically, because we can't hold a spin lock across the + * IPIs. + * + * Speculatively perform the read, since @event might be + * assigned a different (possibly invalid) RMID while we're + * busying performing the IPI calls. It's therefore necessary to + * check @event's RMID afterwards, and if it has changed, + * discard the result of the read. + */ + rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid); - local64_set(&event->count, atomic64_read(&rr.value)); + if (!__rmid_valid(rr.rmid)) + goto out; + + on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); + raw_spin_lock_irqsave(&cache_lock, flags); + if (event->hw.cqm_rmid == rr.rmid) + local64_set(&event->count, atomic64_read(&rr.value)); + raw_spin_unlock_irqrestore(&cache_lock, flags); +out: return __perf_event_count(event); } static void intel_cqm_event_start(struct perf_event *event, int mode) { struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); - unsigned long rmid = event->hw.cqm_rmid; + unsigned int rmid = event->hw.cqm_rmid; unsigned long flags; if (!(event->hw.cqm_state & PERF_HES_STOPPED)) @@ -473,15 +966,19 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode) static int intel_cqm_event_add(struct perf_event *event, int mode) { - int rmid; + unsigned long flags; + unsigned int rmid; + + raw_spin_lock_irqsave(&cache_lock, flags); event->hw.cqm_state = PERF_HES_STOPPED; rmid = event->hw.cqm_rmid; - WARN_ON_ONCE(!rmid); - if (mode & PERF_EF_START) + if (__rmid_valid(rmid) && (mode & PERF_EF_START)) intel_cqm_event_start(event, mode); + raw_spin_unlock_irqrestore(&cache_lock, flags); + return 0; } @@ -518,9 +1015,10 @@ static void intel_cqm_event_destroy(struct perf_event *event) list_replace(&event->hw.cqm_groups_entry, &group_other->hw.cqm_groups_entry); } else { - int rmid = event->hw.cqm_rmid; + unsigned int rmid = event->hw.cqm_rmid; - __put_rmid(rmid); + if (__rmid_valid(rmid)) + __put_rmid(rmid); list_del(&event->hw.cqm_groups_entry); } } @@ -528,11 +1026,10 @@ static void intel_cqm_event_destroy(struct perf_event *event) mutex_unlock(&cache_mutex); } -static struct pmu intel_cqm_pmu; - static int intel_cqm_event_init(struct perf_event *event) { struct perf_event *group = NULL; + bool rotate = false; int err; if (event->attr.type != intel_cqm_pmu.type) @@ -569,10 +1066,24 @@ static int intel_cqm_event_init(struct perf_event *event) } else { list_add_tail(&event->hw.cqm_groups_entry, &cache_groups); + + /* + * All RMIDs are either in use or have recently been + * used. Kick the rotation worker to clean/free some. + * + * We only do this for the group leader, rather than for + * every event in a group to save on needless work. + */ + if (!__rmid_valid(event->hw.cqm_rmid)) + rotate = true; } out: mutex_unlock(&cache_mutex); + + if (rotate) + schedule_delayed_work(&intel_cqm_rmid_work, 0); + return err; } @@ -607,22 +1118,76 @@ static struct attribute_group intel_cqm_format_group = { .attrs = intel_cqm_formats_attr, }; +static ssize_t +max_recycle_threshold_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + ssize_t rv; + + mutex_lock(&cache_mutex); + rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold); + mutex_unlock(&cache_mutex); + + return rv; +} + +static ssize_t +max_recycle_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int bytes, cachelines; + int ret; + + ret = kstrtouint(buf, 0, &bytes); + if (ret) + return ret; + + mutex_lock(&cache_mutex); + + __intel_cqm_max_threshold = bytes; + cachelines = bytes / cqm_l3_scale; + + /* + * The new maximum takes effect immediately. + */ + if (__intel_cqm_threshold > cachelines) + __intel_cqm_threshold = cachelines; + + mutex_unlock(&cache_mutex); + + return count; +} + +static DEVICE_ATTR_RW(max_recycle_threshold); + +static struct attribute *intel_cqm_attrs[] = { + &dev_attr_max_recycle_threshold.attr, + NULL, +}; + +static const struct attribute_group intel_cqm_group = { + .attrs = intel_cqm_attrs, +}; + static const struct attribute_group *intel_cqm_attr_groups[] = { &intel_cqm_events_group, &intel_cqm_format_group, + &intel_cqm_group, NULL, }; static struct pmu intel_cqm_pmu = { - .attr_groups = intel_cqm_attr_groups, - .task_ctx_nr = perf_sw_context, - .event_init = intel_cqm_event_init, - .add = intel_cqm_event_add, - .del = intel_cqm_event_del, - .start = intel_cqm_event_start, - .stop = intel_cqm_event_stop, - .read = intel_cqm_event_read, - .count = intel_cqm_event_count, + .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME, + .attr_groups = intel_cqm_attr_groups, + .task_ctx_nr = perf_sw_context, + .event_init = intel_cqm_event_init, + .add = intel_cqm_event_add, + .del = intel_cqm_event_del, + .start = intel_cqm_event_start, + .stop = intel_cqm_event_stop, + .read = intel_cqm_event_read, + .count = intel_cqm_event_count, }; static inline void cqm_pick_event_reader(int cpu) @@ -732,6 +1297,16 @@ static int __init intel_cqm_init(void) } } + /* + * A reasonable upper limit on the max threshold is the number + * of lines tagged per RMID if all RMIDs have the same number of + * lines tagged in the LLC. + * + * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. + */ + __intel_cqm_max_threshold = + boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1); + snprintf(scale, sizeof(scale), "%u", cqm_l3_scale); str = kstrdup(scale, GFP_KERNEL); if (!str) { -- cgit v0.10.2 From 59bf7fd45c90a8fde22a7717b5413e4ed9666c32 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 23 Jan 2015 18:45:48 +0000 Subject: perf/x86/intel: Enable conflicting event scheduling for CQM We can leverage the workqueue that we use for RMID rotation to support scheduling of conflicting monitoring events. Allowing events that monitor conflicting things is done at various other places in the perf subsystem, so there's precedent there. An example of two conflicting events would be monitoring a cgroup and simultaneously monitoring a task within that cgroup. This uses the cache_groups list as a queuing mechanism, where every event that reaches the front of the list gets the chance to be scheduled in, possibly descheduling any conflicting events that are running. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Linus Torvalds Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1422038748-21397-10-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index e31f508..9a8ef83 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -507,7 +507,6 @@ static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME; static bool intel_cqm_rmid_stabilize(unsigned int *available) { struct cqm_rmid_entry *entry, *tmp; - struct perf_event *event; lockdep_assert_held(&cache_mutex); @@ -577,19 +576,9 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available) /* * If we have groups waiting for RMIDs, hand - * them one now. + * them one now provided they don't conflict. */ - list_for_each_entry(event, &cache_groups, - hw.cqm_groups_entry) { - if (__rmid_valid(event->hw.cqm_rmid)) - continue; - - intel_cqm_xchg_rmid(event, entry->rmid); - entry = NULL; - break; - } - - if (!entry) + if (intel_cqm_sched_in_event(entry->rmid)) continue; /* @@ -604,25 +593,73 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available) /* * Pick a victim group and move it to the tail of the group list. + * @next: The first group without an RMID */ -static struct perf_event * -__intel_cqm_pick_and_rotate(void) +static void __intel_cqm_pick_and_rotate(struct perf_event *next) { struct perf_event *rotor; + unsigned int rmid; lockdep_assert_held(&cache_mutex); - lockdep_assert_held(&cache_lock); rotor = list_first_entry(&cache_groups, struct perf_event, hw.cqm_groups_entry); + + /* + * The group at the front of the list should always have a valid + * RMID. If it doesn't then no groups have RMIDs assigned and we + * don't need to rotate the list. + */ + if (next == rotor) + return; + + rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID); + __put_rmid(rmid); + list_rotate_left(&cache_groups); +} + +/* + * Deallocate the RMIDs from any events that conflict with @event, and + * place them on the back of the group list. + */ +static void intel_cqm_sched_out_conflicting_events(struct perf_event *event) +{ + struct perf_event *group, *g; + unsigned int rmid; + + lockdep_assert_held(&cache_mutex); + + list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) { + if (group == event) + continue; + + rmid = group->hw.cqm_rmid; + + /* + * Skip events that don't have a valid RMID. + */ + if (!__rmid_valid(rmid)) + continue; + + /* + * No conflict? No problem! Leave the event alone. + */ + if (!__conflict_event(group, event)) + continue; - return rotor; + intel_cqm_xchg_rmid(group, INVALID_RMID); + __put_rmid(rmid); + } } /* * Attempt to rotate the groups and assign new RMIDs. * + * We rotate for two reasons, + * 1. To handle the scheduling of conflicting events + * 2. To recycle RMIDs + * * Rotating RMIDs is complicated because the hardware doesn't give us * any clues. * @@ -642,11 +679,10 @@ __intel_cqm_pick_and_rotate(void) */ static bool __intel_cqm_rmid_rotate(void) { - struct perf_event *group, *rotor, *start = NULL; + struct perf_event *group, *start = NULL; unsigned int threshold_limit; unsigned int nr_needed = 0; unsigned int nr_available; - unsigned int rmid; bool rotated = false; mutex_lock(&cache_mutex); @@ -678,7 +714,9 @@ again: goto stabilize; /* - * We have more event groups without RMIDs than available RMIDs. + * We have more event groups without RMIDs than available RMIDs, + * or we have event groups that conflict with the ones currently + * scheduled. * * We force deallocate the rmid of the group at the head of * cache_groups. The first event group without an RMID then gets @@ -688,15 +726,7 @@ again: * Rotate the cache_groups list so the previous head is now the * tail. */ - rotor = __intel_cqm_pick_and_rotate(); - rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID); - - /* - * The group at the front of the list should always have a valid - * RMID. If it doesn't then no groups have RMIDs assigned. - */ - if (!__rmid_valid(rmid)) - goto stabilize; + __intel_cqm_pick_and_rotate(start); /* * If the rotation is going to succeed, reduce the threshold so @@ -704,14 +734,14 @@ again: */ if (__rmid_valid(intel_cqm_rotation_rmid)) { intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid); - intel_cqm_rotation_rmid = INVALID_RMID; + intel_cqm_rotation_rmid = __get_rmid(); + + intel_cqm_sched_out_conflicting_events(start); if (__intel_cqm_threshold) __intel_cqm_threshold--; } - __put_rmid(rmid); - rotated = true; stabilize: @@ -794,25 +824,37 @@ static void intel_cqm_rmid_rotate(struct work_struct *work) * * If we're part of a group, we use the group's RMID. */ -static int intel_cqm_setup_event(struct perf_event *event, - struct perf_event **group) +static void intel_cqm_setup_event(struct perf_event *event, + struct perf_event **group) { struct perf_event *iter; + unsigned int rmid; + bool conflict = false; list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { + rmid = iter->hw.cqm_rmid; + if (__match_event(iter, event)) { /* All tasks in a group share an RMID */ - event->hw.cqm_rmid = iter->hw.cqm_rmid; + event->hw.cqm_rmid = rmid; *group = iter; - return 0; + return; } - if (__conflict_event(iter, event)) - return -EBUSY; + /* + * We only care about conflicts for events that are + * actually scheduled in (and hence have a valid RMID). + */ + if (__conflict_event(iter, event) && __rmid_valid(rmid)) + conflict = true; } - event->hw.cqm_rmid = __get_rmid(); - return 0; + if (conflict) + rmid = INVALID_RMID; + else + rmid = __get_rmid(); + + event->hw.cqm_rmid = rmid; } static void intel_cqm_event_read(struct perf_event *event) @@ -1030,7 +1072,6 @@ static int intel_cqm_event_init(struct perf_event *event) { struct perf_event *group = NULL; bool rotate = false; - int err; if (event->attr.type != intel_cqm_pmu.type) return -ENOENT; @@ -1056,9 +1097,7 @@ static int intel_cqm_event_init(struct perf_event *event) mutex_lock(&cache_mutex); /* Will also set rmid */ - err = intel_cqm_setup_event(event, &group); - if (err) - goto out; + intel_cqm_setup_event(event, &group); if (group) { list_add_tail(&event->hw.cqm_group_entry, @@ -1078,13 +1117,12 @@ static int intel_cqm_event_init(struct perf_event *event) rotate = true; } -out: mutex_unlock(&cache_mutex); if (rotate) schedule_delayed_work(&intel_cqm_rmid_work, 0); - return err; + return 0; } EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01"); -- cgit v0.10.2 From 85c273d2b6569706762cf400079ca0699e007d81 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 24 Feb 2015 15:13:40 -0800 Subject: perf record: Support recording running/enabled time Add an option to perf record to record running/enabled time for read events, similar to what stat does. This is useful to understand multiplexing problems. Right now the report support is not great, but at least report -D already supports it. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1424819620-16043-1-git-send-email-andi@firstfloor.org [ Fixed the Documentation entry to match the OPT_BOOLEAN one ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 1c7e50f..cae75c1 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -241,6 +241,9 @@ Capture machine state (registers) at interrupt, i.e., on counter overflows for each sample. List of captured registers depends on the architecture. This option is off by default. +--running-time:: +Record running and enabled time for read events (:S) + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d0d02a8..4fdad06 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -839,6 +839,8 @@ struct option __record_options[] = { "use per-thread mmaps"), OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, "Sample machine registers on interrupt"), + OPT_BOOLEAN(0, "running-time", &record.opts.running_time, + "Record running/enabled time of read (:S) events"), OPT_END() }; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 1dabb85..1caa70a 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -53,6 +53,7 @@ struct record_opts { bool sample_time; bool period; bool sample_intr_regs; + bool running_time; unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f93e520..bb4eff2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -734,6 +734,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); + if (opts->running_time) { + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + } + /* * XXX see the function comment above * -- cgit v0.10.2 From 53d0a57343949b2af9b27229db534b98e5a0c4d0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 20 Feb 2015 23:16:58 +0100 Subject: perf tools: Add feature check for libbabeltrace Adding feature check for babeltrace library [1], which will be used for perf data file CTF [2] conversion in following patches. The babeltrace library is now automatically detected as standard feature. It's possible to specify LIBBABELTRACE_DIR make variable to specify location of installed libbabeltrace, like: $ make LIBBABELTRACE_DIR=/opt/libbabeltrace/ BUILD: Doing 'make -j4' parallel build Auto-detecting system features: ... dwarf: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libunwind: [ on ] ... libbabeltrace: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... DWARF post unwind library: libunwind NOTE The installation of the [1] to to used by above make: $ git clone git://git.efficios.com/babeltrace.git $ cd babeltrace $ vim README $ ./bootstrap $ ./configure --prefix=/opt/libbabeltrace $ make prefix=/opt/libbabeltrace $ sudo make install prefix=/opt/libbabeltrace Please make sure that the /opt/libbabeltrace/lib directory is in your LD_LIBRARY_PATH: $ export LD_LIBRARY_PATH=/opt/libbabeltrace/lib [1] babeltrace - http://www.efficios.com/babeltrace [2] Common Trace Format - http://www.efficios.com/ctf Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Reviewed-by: David Ahern Cc: Frederic Weisbecker Cc: Jeremie Galarneau Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Tom Zanussi Cc: Wang Nan Link: http://lkml.kernel.org/r/1424470628-5969-2-git-send-email-jolsa@kernel.org Signed-off-by: Sebastian Andrzej Siewior [ Added missing babeltrace build instructions ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index efc5158..ec4c063 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -68,7 +68,9 @@ include config/utilities.mak # for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode # # Define NO_ZLIB if you do not want to support compressed kernel modules - +# +# Define NO_LIBBABELTRACE if you do not want libbabeltrace support +# for CTF data format. ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index b97a7b9..6f129b0 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -96,6 +96,17 @@ ifndef NO_LIBELF FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw endif +ifndef NO_LIBBABELTRACE + # for linking with debug library, run like: + # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ + ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib + endif + FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) + FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +endif + # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile @@ -216,6 +227,7 @@ CORE_FEATURE_TESTS = \ stackprotector-all \ timerfd \ libdw-dwarf-unwind \ + libbabeltrace \ zlib LIB_FEATURE_TESTS = \ @@ -231,6 +243,7 @@ LIB_FEATURE_TESTS = \ libslang \ libunwind \ libdw-dwarf-unwind \ + libbabeltrace \ zlib VF_FEATURE_TESTS = \ @@ -692,6 +705,17 @@ else NO_PERF_READ_VDSOX32 := 1 endif +ifndef NO_LIBBABELTRACE + ifeq ($(feature-libbabeltrace), 0) + msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-devel/libbabeltrace-ctf-dev); + NO_LIBBABELTRACE := 1 + else + CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) + LDFLAGS += $(LIBBABELTRACE_LDFLAGS) + EXTLIBS += -lbabeltrace-ctf + endif +endif + # Among the variables below, these: # perfexecdir # template_dir diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index b32ff33..70c9aeb 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -29,6 +29,7 @@ FILES= \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ + test-libbabeltrace.bin \ test-compile-32.bin \ test-compile-x32.bin \ test-zlib.bin @@ -43,7 +44,7 @@ BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) ############################### test-all.bin: - $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz + $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -lbabeltrace test-hello.bin: $(BUILD) @@ -133,7 +134,10 @@ test-timerfd.bin: $(BUILD) test-libdw-dwarf-unwind.bin: - $(BUILD) + $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind) + +test-libbabeltrace.bin: + $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) test-sync-compare-and-swap.bin: $(BUILD) -Werror diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 6d4d093..1ffc3da 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -101,6 +101,10 @@ # include "test-pthread_attr_setaffinity_np.c" #undef main +#define main main_test_libbabeltrace +# include "test-libbabeltrace.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -126,6 +130,7 @@ int main(int argc, char *argv[]) main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); + main_test_libbabeltrace(); return 0; } diff --git a/tools/perf/config/feature-checks/test-libbabeltrace.c b/tools/perf/config/feature-checks/test-libbabeltrace.c new file mode 100644 index 0000000..3b7dd68 --- /dev/null +++ b/tools/perf/config/feature-checks/test-libbabeltrace.c @@ -0,0 +1,8 @@ + +#include + +int main(void) +{ + bt_ctf_stream_class_get_packet_context_type((void *) 0); + return 0; +} -- cgit v0.10.2 From 2245bf1410d2d719f3bfce729b07ab83fe6142f7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 20 Feb 2015 23:16:59 +0100 Subject: perf tools: Add new 'perf data' command Adding new 'perf data' command to provide operations over data files. The 'perf data convert' sub command is coming in following patch, but there's possibility for other useful commands like 'perf data ls' (to display perf data file in directory in ls style). Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Reviewed-by: David Ahern Cc: Frederic Weisbecker Cc: Jeremie Galarneau Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Tom Zanussi Cc: Wang Nan Link: http://lkml.kernel.org/r/1424470628-5969-3-git-send-email-jolsa@kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Build b/tools/perf/Build index 976e038..b77370e 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -18,6 +18,7 @@ perf-y += builtin-lock.o perf-y += builtin-kvm.o perf-y += builtin-inject.o perf-y += builtin-mem.o +perf-y += builtin-data.o perf-$(CONFIG_AUDIT) += builtin-trace.o perf-$(CONFIG_LIBELF) += builtin-probe.o diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt new file mode 100644 index 0000000..b8c8394 --- /dev/null +++ b/tools/perf/Documentation/perf-data.txt @@ -0,0 +1,15 @@ +perf-data(1) +============== + +NAME +---- +perf-data - Data file related processing + +SYNOPSIS +-------- +[verse] +'perf data' [] []", + +DESCRIPTION +----------- +Data file related processing. diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c new file mode 100644 index 0000000..1eee97d --- /dev/null +++ b/tools/perf/builtin-data.c @@ -0,0 +1,75 @@ +#include +#include "builtin.h" +#include "perf.h" +#include "debug.h" +#include "parse-options.h" + +typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); + +struct data_cmd { + const char *name; + const char *summary; + data_cmd_fn_t fn; +}; + +static struct data_cmd data_cmds[]; + +#define for_each_cmd(cmd) \ + for (cmd = data_cmds; cmd && cmd->name; cmd++) + +static const struct option data_options[] = { + OPT_END() +}; + +static const char * const data_usage[] = { + "perf data [] []", + NULL +}; + +static void print_usage(void) +{ + struct data_cmd *cmd; + + printf("Usage:\n"); + printf("\t%s\n\n", data_usage[0]); + printf("\tAvailable commands:\n"); + + for_each_cmd(cmd) { + printf("\t %s\t- %s\n", cmd->name, cmd->summary); + } + + printf("\n"); +} + +static struct data_cmd data_cmds[] = { + { NULL }, +}; + +int cmd_data(int argc, const char **argv, const char *prefix) +{ + struct data_cmd *cmd; + const char *cmdstr; + + /* No command specified. */ + if (argc < 2) + goto usage; + + argc = parse_options(argc, argv, data_options, data_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc < 1) + goto usage; + + cmdstr = argv[0]; + + for_each_cmd(cmd) { + if (strcmp(cmd->name, cmdstr)) + continue; + + return cmd->fn(argc, argv, prefix); + } + + pr_err("Unknown command: %s\n", cmdstr); +usage: + print_usage(); + return -1; +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index b210d62..3688ad2 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -37,6 +37,7 @@ extern int cmd_test(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int cmd_mem(int argc, const char **argv, const char *prefix); +extern int cmd_data(int argc, const char **argv, const char *prefix); extern int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0906fc4..00fcaf8 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -7,6 +7,7 @@ perf-archive mainporcelain common perf-bench mainporcelain common perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common +perf-data mainporcelain common perf-diff mainporcelain common perf-evlist mainporcelain common perf-inject mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3700a7f..f3c66b8 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -62,6 +62,7 @@ static struct cmd_struct commands[] = { #endif { "inject", cmd_inject, 0 }, { "mem", cmd_mem, 0 }, + { "data", cmd_data, 0 }, }; struct pager_config { -- cgit v0.10.2 From edbe9817aeb540aa1494aa20276a2bfc7f4ab816 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 20 Feb 2015 23:17:00 +0100 Subject: perf data: Add perf data to CTF conversion support Adding 'perf data convert' to convert perf data file into different format. This patch adds support for CTF format conversion. To convert perf.data into CTF run: $ perf data convert --to-ctf=./ctf-data/ [ perf data convert: Converted 'perf.data' into CTF data './ctf-data/' ] [ perf data convert: Converted and wrote 11.268 MB (100230 samples) ] The command will create CTF metadata out of perf.data file (or one specified via -i option) and then convert all sample events into single CTF stream. Each sample_type bit is translated into separated CTF event field apart from following exceptions: PERF_SAMPLE_RAW - added in next patch PERF_SAMPLE_READ - TODO PERF_SAMPLE_CALLCHAIN - TODO PERF_SAMPLE_BRANCH_STACK - TODO PERF_SAMPLE_REGS_USER - TODO PERF_SAMPLE_STACK_USER - TODO $ perf --debug=data-convert=2 data convert ... The converted CTF data could be analyzed by CTF tools, like babletrace or tracecompass [1]. $ babeltrace ./ctf-data/ [03:19:13.962125533] (+?.?????????) cycles: { }, { ip = 0xFFFFFFFF8105443A, tid = 20714, pid = 20714, period = 1 } [03:19:13.962130001] (+0.000004468) cycles: { }, { ip = 0xFFFFFFFF8105443A, tid = 20714, pid = 20714, period = 1 } [03:19:13.962131936] (+0.000001935) cycles: { }, { ip = 0xFFFFFFFF8105443A, tid = 20714, pid = 20714, period = 8 } [03:19:13.962133732] (+0.000001796) cycles: { }, { ip = 0xFFFFFFFF8105443A, tid = 20714, pid = 20714, period = 114 } [03:19:13.962135557] (+0.000001825) cycles: { }, { ip = 0xFFFFFFFF8105443A, tid = 20714, pid = 20714, period = 2087 } [03:19:13.962137627] (+0.000002070) cycles: { }, { ip = 0xFFFFFFFF81361938, tid = 20714, pid = 20714, period = 37582 } [03:19:13.962161091] (+0.000023464) cycles: { }, { ip = 0xFFFFFFFF8124218F, tid = 20714, pid = 20714, period = 600246 } [03:19:13.962517569] (+0.000356478) cycles: { }, { ip = 0xFFFFFFFF811A75DB, tid = 20714, pid = 20714, period = 1325731 } [03:19:13.969518008] (+0.007000439) cycles: { }, { ip = 0x34080917B2, tid = 20714, pid = 20714, period = 1144298 } The following members to the ctf-environment were decided to be added to distinguish and specify perf CTF data: - domain It says "kernel" because it contains a kernel trace (not to be confused with a user space like lttng-ust does) - tracer_name It says perf. This can be used to distinguish between lttng and perf CTF based trace. - version The kernel version from stream. In addition to release, this is what it looks like on a Debian kernel: release = "3.14-1-amd64"; version = "3.14.0"; [1] http://projects.eclipse.org/projects/tools.tracecompass Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Reviewed-by: David Ahern Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jeremie Galarneau Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Tom Zanussi Cc: Wang Nan Link: http://lkml.kernel.org/r/1424470628-5969-4-git-send-email-jolsa@kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index b8c8394..be8fa1a 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -13,3 +13,28 @@ SYNOPSIS DESCRIPTION ----------- Data file related processing. + +COMMANDS +-------- +convert:: + Converts perf data file into another format (only CTF [1] format is support by now). + It's possible to set data-convert debug variable to get debug messages from conversion, + like: + perf --debug data-convert data convert ... + +OPTIONS for 'convert' +--------------------- +--to-ctf:: + Triggers the CTF conversion, specify the path of CTF data directory. + +-i:: + Specify input perf data file path. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +SEE ALSO +-------- +linkperf:perf[1] +[1] Common Trace Format - http://www.efficios.com/ctf diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 1e8e400..2b13177 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -13,11 +13,16 @@ SYNOPSIS OPTIONS ------- --debug:: - Setup debug variable (just verbose for now) in value + Setup debug variable (see list below) in value range (0, 10). Use like: --debug verbose # sets verbose = 1 --debug verbose=2 # sets verbose = 2 + List of debug variables allowed to set: + verbose - general debug messages + ordered-events - ordered events object debug messages + data-convert - data convert command debug messages + --buildid-dir:: Setup buildid cache directory. It has higher priority than buildid.dir config file option. diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index 1eee97d..9705ba7 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -3,6 +3,7 @@ #include "perf.h" #include "debug.h" #include "parse-options.h" +#include "data-convert-bt.h" typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); @@ -41,7 +42,50 @@ static void print_usage(void) printf("\n"); } +static const char * const data_convert_usage[] = { + "perf data convert []", + NULL +}; + +static int cmd_data_convert(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + const char *to_ctf = NULL; + const struct option options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_STRING('i', "input", &input_name, "file", "input file name"), +#ifdef HAVE_LIBBABELTRACE_SUPPORT + OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), +#endif + OPT_END() + }; + +#ifndef HAVE_LIBBABELTRACE_SUPPORT + pr_err("No conversion support compiled in.\n"); + return -1; +#endif + + argc = parse_options(argc, argv, options, + data_convert_usage, 0); + if (argc) { + usage_with_options(data_convert_usage, options); + return -1; + } + + if (to_ctf) { +#ifdef HAVE_LIBBABELTRACE_SUPPORT + return bt_convert__perf2ctf(input_name, to_ctf); +#else + pr_err("The libbabeltrace support is not compiled in.\n"); + return -1; +#endif + } + + return 0; +} + static struct data_cmd data_cmds[] = { + { "convert", "converts data file between formats", cmd_data_convert }, { NULL }, }; diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 6f129b0..c3570b5 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -713,6 +713,7 @@ ifndef NO_LIBBABELTRACE CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) LDFLAGS += $(LIBBABELTRACE_LDFLAGS) EXTLIBS += -lbabeltrace-ctf + $(call detected,CONFIG_LIBBABELTRACE) endif endif diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 32f9327..a2c8047 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -88,6 +88,8 @@ libperf-$(CONFIG_DWARF) += dwarf-aux.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o + libperf-y += scripting-engines/ libperf-$(CONFIG_PERF_REGS) += perf_regs.o diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c new file mode 100644 index 0000000..ff4826c --- /dev/null +++ b/tools/perf/util/data-convert-bt.c @@ -0,0 +1,612 @@ +/* + * CTF writing support via babeltrace. + * + * Copyright (C) 2014, Jiri Olsa + * Copyright (C) 2014, Sebastian Andrzej Siewior + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "asm/bug.h" +#include "data-convert-bt.h" +#include "session.h" +#include "util.h" +#include "debug.h" +#include "tool.h" +#include "evlist.h" +#include "evsel.h" +#include "machine.h" + +#define pr_N(n, fmt, ...) \ + eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) + +#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__) +#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__) + +#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__) + +struct evsel_priv { + struct bt_ctf_event_class *event_class; +}; + +struct ctf_writer { + /* writer primitives */ + struct bt_ctf_writer *writer; + struct bt_ctf_stream *stream; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_clock *clock; + + /* data types */ + union { + struct { + struct bt_ctf_field_type *s64; + struct bt_ctf_field_type *u64; + struct bt_ctf_field_type *s32; + struct bt_ctf_field_type *u32; + struct bt_ctf_field_type *string; + struct bt_ctf_field_type *u64_hex; + }; + struct bt_ctf_field_type *array[6]; + } data; +}; + +struct convert { + struct perf_tool tool; + struct ctf_writer writer; + + u64 events_size; + u64 events_count; +}; + +static int value_set(struct bt_ctf_field_type *type, + struct bt_ctf_event *event, + const char *name, u64 val) +{ + struct bt_ctf_field *field; + bool sign = bt_ctf_field_type_integer_get_signed(type); + int ret; + + field = bt_ctf_field_create(type); + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (sign) { + ret = bt_ctf_field_signed_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } else { + ret = bt_ctf_field_unsigned_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } + + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err; + } + + pr2(" SET [%s = %" PRIu64 "]\n", name, val); + +err: + bt_ctf_field_put(field); + return ret; +} + +#define __FUNC_VALUE_SET(_name, _val_type) \ +static __maybe_unused int value_set_##_name(struct ctf_writer *cw, \ + struct bt_ctf_event *event, \ + const char *name, \ + _val_type val) \ +{ \ + struct bt_ctf_field_type *type = cw->data._name; \ + return value_set(type, event, name, (u64) val); \ +} + +#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name) + +FUNC_VALUE_SET(s32) +FUNC_VALUE_SET(u32) +FUNC_VALUE_SET(s64) +FUNC_VALUE_SET(u64) +__FUNC_VALUE_SET(u64_hex, u64) + +static int add_generic_values(struct ctf_writer *cw, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 type = evsel->attr.sample_type; + int ret; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + + if (type & PERF_SAMPLE_IP) { + ret = value_set_u64_hex(cw, event, "ip", sample->ip); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TID) { + ret = value_set_s32(cw, event, "tid", sample->tid); + if (ret) + return -1; + + ret = value_set_s32(cw, event, "pid", sample->pid); + if (ret) + return -1; + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) { + ret = value_set_u64(cw, event, "id", sample->id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + ret = value_set_u64(cw, event, "stream_id", sample->stream_id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_CPU) { + ret = value_set_u32(cw, event, "cpu", sample->cpu); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_PERIOD) { + ret = value_set_u64(cw, event, "period", sample->period); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_WEIGHT) { + ret = value_set_u64(cw, event, "weight", sample->weight); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_DATA_SRC) { + ret = value_set_u64(cw, event, "data_src", sample->data_src); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TRANSACTION) { + ret = value_set_u64(cw, event, "transaction", sample->transaction); + if (ret) + return -1; + } + + return 0; +} + +static int process_sample_event(struct perf_tool *tool, + union perf_event *_event __maybe_unused, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __maybe_unused) +{ + struct convert *c = container_of(tool, struct convert, tool); + struct evsel_priv *priv = evsel->priv; + struct ctf_writer *cw = &c->writer; + struct bt_ctf_event_class *event_class; + struct bt_ctf_event *event; + int ret; + + if (WARN_ONCE(!priv, "Failed to setup all events.\n")) + return 0; + + event_class = priv->event_class; + + /* update stats */ + c->events_count++; + c->events_size += _event->header.size; + + pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count); + + event = bt_ctf_event_create(event_class); + if (!event) { + pr_err("Failed to create an CTF event\n"); + return -1; + } + + bt_ctf_clock_set_time(cw->clock, sample->time); + + ret = add_generic_values(cw, event, evsel, sample); + if (ret) + return -1; + + bt_ctf_stream_append_event(cw->stream, event); + bt_ctf_event_put(event); + return 0; +} + +static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, + struct bt_ctf_event_class *event_class) +{ + u64 type = evsel->attr.sample_type; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + +#define ADD_FIELD(cl, t, n) \ + do { \ + pr2(" field '%s'\n", n); \ + if (bt_ctf_event_class_add_field(cl, t, n)) { \ + pr_err("Failed to add field '%s;\n", n); \ + return -1; \ + } \ + } while (0) + + if (type & PERF_SAMPLE_IP) + ADD_FIELD(event_class, cw->data.u64_hex, "ip"); + + if (type & PERF_SAMPLE_TID) { + ADD_FIELD(event_class, cw->data.s32, "tid"); + ADD_FIELD(event_class, cw->data.s32, "pid"); + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) + ADD_FIELD(event_class, cw->data.u64, "id"); + + if (type & PERF_SAMPLE_STREAM_ID) + ADD_FIELD(event_class, cw->data.u64, "stream_id"); + + if (type & PERF_SAMPLE_CPU) + ADD_FIELD(event_class, cw->data.u32, "cpu"); + + if (type & PERF_SAMPLE_PERIOD) + ADD_FIELD(event_class, cw->data.u64, "period"); + + if (type & PERF_SAMPLE_WEIGHT) + ADD_FIELD(event_class, cw->data.u64, "weight"); + + if (type & PERF_SAMPLE_DATA_SRC) + ADD_FIELD(event_class, cw->data.u64, "data_src"); + + if (type & PERF_SAMPLE_TRANSACTION) + ADD_FIELD(event_class, cw->data.u64, "transaction"); + +#undef ADD_FIELD + return 0; +} + +static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) +{ + struct bt_ctf_event_class *event_class; + struct evsel_priv *priv; + const char *name = perf_evsel__name(evsel); + int ret; + + pr("Adding event '%s' (type %d)\n", name, evsel->attr.type); + + event_class = bt_ctf_event_class_create(name); + if (!event_class) + return -1; + + ret = add_generic_types(cw, evsel, event_class); + if (ret) + goto err; + + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); + if (ret) { + pr("Failed to add event class into stream.\n"); + goto err; + } + + priv = malloc(sizeof(*priv)); + if (!priv) + goto err; + + priv->event_class = event_class; + evsel->priv = priv; + return 0; + +err: + bt_ctf_event_class_put(event_class); + pr_err("Failed to add event '%s'.\n", name); + return -1; +} + +static int setup_events(struct ctf_writer *cw, struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + int ret; + + evlist__for_each(evlist, evsel) { + ret = add_event(cw, evsel); + if (ret) + return ret; + } + return 0; +} + +static int ctf_writer__setup_env(struct ctf_writer *cw, + struct perf_session *session) +{ + struct perf_header *header = &session->header; + struct bt_ctf_writer *writer = cw->writer; + +#define ADD(__n, __v) \ +do { \ + if (bt_ctf_writer_add_environment_field(writer, __n, __v)) \ + return -1; \ +} while (0) + + ADD("host", header->env.hostname); + ADD("sysname", "Linux"); + ADD("release", header->env.os_release); + ADD("version", header->env.version); + ADD("machine", header->env.arch); + ADD("domain", "kernel"); + ADD("tracer_name", "perf"); + +#undef ADD + return 0; +} + +static int ctf_writer__setup_clock(struct ctf_writer *cw) +{ + struct bt_ctf_clock *clock = cw->clock; + + bt_ctf_clock_set_description(clock, "perf clock"); + +#define SET(__n, __v) \ +do { \ + if (bt_ctf_clock_set_##__n(clock, __v)) \ + return -1; \ +} while (0) + + SET(frequency, 1000000000); + SET(offset_s, 0); + SET(offset, 0); + SET(precision, 10); + SET(is_absolute, 0); + +#undef SET + return 0; +} + +static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) +{ + struct bt_ctf_field_type *type; + + type = bt_ctf_field_type_integer_create(size); + if (!type) + return NULL; + + if (sign && + bt_ctf_field_type_integer_set_signed(type, 1)) + goto err; + + if (hex && + bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) + goto err; + + pr2("Created type: INTEGER %d-bit %ssigned %s\n", + size, sign ? "un" : "", hex ? "hex" : ""); + return type; + +err: + bt_ctf_field_type_put(type); + return NULL; +} + +static void ctf_writer__cleanup_data(struct ctf_writer *cw) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cw->data.array); i++) + bt_ctf_field_type_put(cw->data.array[i]); +} + +static int ctf_writer__init_data(struct ctf_writer *cw) +{ +#define CREATE_INT_TYPE(type, size, sign, hex) \ +do { \ + (type) = create_int_type(size, sign, hex); \ + if (!(type)) \ + goto err; \ +} while (0) + + CREATE_INT_TYPE(cw->data.s64, 64, true, false); + CREATE_INT_TYPE(cw->data.u64, 64, false, false); + CREATE_INT_TYPE(cw->data.s32, 32, true, false); + CREATE_INT_TYPE(cw->data.u32, 32, false, false); + CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true); + + cw->data.string = bt_ctf_field_type_string_create(); + if (cw->data.string) + return 0; + +err: + ctf_writer__cleanup_data(cw); + pr_err("Failed to create data types.\n"); + return -1; +} + +static void ctf_writer__cleanup(struct ctf_writer *cw) +{ + ctf_writer__cleanup_data(cw); + + bt_ctf_clock_put(cw->clock); + bt_ctf_stream_put(cw->stream); + bt_ctf_stream_class_put(cw->stream_class); + bt_ctf_writer_put(cw->writer); + + /* and NULL all the pointers */ + memset(cw, 0, sizeof(*cw)); +} + +static int ctf_writer__init(struct ctf_writer *cw, const char *path) +{ + struct bt_ctf_writer *writer; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_stream *stream; + struct bt_ctf_clock *clock; + + /* CTF writer */ + writer = bt_ctf_writer_create(path); + if (!writer) + goto err; + + cw->writer = writer; + + /* CTF clock */ + clock = bt_ctf_clock_create("perf_clock"); + if (!clock) { + pr("Failed to create CTF clock.\n"); + goto err_cleanup; + } + + cw->clock = clock; + + if (ctf_writer__setup_clock(cw)) { + pr("Failed to setup CTF clock.\n"); + goto err_cleanup; + } + + /* CTF stream class */ + stream_class = bt_ctf_stream_class_create("perf_stream"); + if (!stream_class) { + pr("Failed to create CTF stream class.\n"); + goto err_cleanup; + } + + cw->stream_class = stream_class; + + /* CTF clock stream setup */ + if (bt_ctf_stream_class_set_clock(stream_class, clock)) { + pr("Failed to assign CTF clock to stream class.\n"); + goto err_cleanup; + } + + if (ctf_writer__init_data(cw)) + goto err_cleanup; + + /* CTF stream instance */ + stream = bt_ctf_writer_create_stream(writer, stream_class); + if (!stream) { + pr("Failed to create CTF stream.\n"); + goto err_cleanup; + } + + cw->stream = stream; + + /* CTF clock writer setup */ + if (bt_ctf_writer_add_clock(writer, clock)) { + pr("Failed to assign CTF clock to writer.\n"); + goto err_cleanup; + } + + return 0; + +err_cleanup: + ctf_writer__cleanup(cw); +err: + pr_err("Failed to setup CTF writer.\n"); + return -1; +} + +int bt_convert__perf2ctf(const char *input, const char *path) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = input, + .mode = PERF_DATA_MODE_READ, + }; + struct convert c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, + }; + struct ctf_writer *cw = &c.writer; + int err = -1; + + /* CTF writer */ + if (ctf_writer__init(cw, path)) + return -1; + + /* perf.data session */ + session = perf_session__new(&file, 0, NULL); + if (!session) + goto free_writer; + + /* CTF writer env/clock setup */ + if (ctf_writer__setup_env(cw, session)) + goto free_session; + + /* CTF events setup */ + if (setup_events(cw, session)) + goto free_session; + + err = perf_session__process_events(session, &c.tool); + if (!err) + err = bt_ctf_stream_flush(cw->stream); + + fprintf(stderr, + "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", + file.path, path); + + fprintf(stderr, + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", + (double) c.events_size / 1024.0 / 1024.0, + c.events_count); + + /* its all good */ +free_session: + perf_session__delete(session); + +free_writer: + ctf_writer__cleanup(cw); + return err; +} diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h new file mode 100644 index 0000000..dda30c5 --- /dev/null +++ b/tools/perf/util/data-convert-bt.h @@ -0,0 +1,8 @@ +#ifndef __DATA_CONVERT_BT_H +#define __DATA_CONVERT_BT_H +#ifdef HAVE_LIBBABELTRACE_SUPPORT + +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf); + +#endif /* HAVE_LIBBABELTRACE_SUPPORT */ +#endif /* __DATA_CONVERT_BT_H */ diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index ad60b2f..2da5581 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -20,6 +20,7 @@ int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; +int debug_data_convert; static int _eprintf(int level, int var, const char *fmt, va_list args) { @@ -147,6 +148,7 @@ static struct debug_variable { { .name = "verbose", .ptr = &verbose }, { .name = "ordered-events", .ptr = &debug_ordered_events}, { .name = "stderr", .ptr = &redirect_to_stderr}, + { .name = "data-convert", .ptr = &debug_data_convert }, { .name = NULL, } }; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index be264d6..caac2fd 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -12,6 +12,7 @@ extern int verbose; extern bool quiet, dump_trace; extern int debug_ordered_events; +extern int debug_data_convert; #ifndef pr_fmt #define pr_fmt(fmt) fmt -- cgit v0.10.2 From 54cf776a9c5c2e6a91de31954bba4d3bad6c657c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 20 Feb 2015 23:17:01 +0100 Subject: perf data: Add a 'perf' prefix to the generic fields Some of the tracers bring their own id or pid fields and we can end up having two of them. This patch adds a "perf_" prefix to the 'generic' fields so we avoid a clash of the member names. The change is visible in the babeltrace output: Before: $ babeltrace ./ctf-data/ [03:19:13.962131936] (+0.000001935) cycles: { }, { ip = 0xFFFFFFFF8105443A, tid = 20714, pid = 20714, period = 8 } [03:19:13.962133732] (+0.000001796) cycles: { }, { ip = 0xFFFFFFFF8105443A, tid = 20714, pid = 20714, period = 114 } ... Now: $ babeltrace ./ctf-data/ [03:19:13.962131936] (+0.000001935) cycles: { }, { perf_ip = 0xFFFFFFFF8105443A, perf_tid = 20714, perf_pid = 20714, perf_period = 8 } [03:19:13.962133732] (+0.000001796) cycles: { }, { perf_ip = 0xFFFFFFFF8105443A, perf_tid = 20714, perf_pid = 20714, perf_period = 114 } ... Signed-off-by: Sebastian Andrzej Siewior Acked-by: Namhyung Kim Reviewed-by: David Ahern Tested-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Tom Zanussi Cc: Wang Nan Link: http://lkml.kernel.org/r/1424470628-5969-5-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index ff4826c..e372e03 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -147,60 +147,62 @@ static int add_generic_values(struct ctf_writer *cw, */ if (type & PERF_SAMPLE_IP) { - ret = value_set_u64_hex(cw, event, "ip", sample->ip); + ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip); if (ret) return -1; } if (type & PERF_SAMPLE_TID) { - ret = value_set_s32(cw, event, "tid", sample->tid); + ret = value_set_s32(cw, event, "perf_tid", sample->tid); if (ret) return -1; - ret = value_set_s32(cw, event, "pid", sample->pid); + ret = value_set_s32(cw, event, "perf_pid", sample->pid); if (ret) return -1; } if ((type & PERF_SAMPLE_ID) || (type & PERF_SAMPLE_IDENTIFIER)) { - ret = value_set_u64(cw, event, "id", sample->id); + ret = value_set_u64(cw, event, "perf_id", sample->id); if (ret) return -1; } if (type & PERF_SAMPLE_STREAM_ID) { - ret = value_set_u64(cw, event, "stream_id", sample->stream_id); + ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id); if (ret) return -1; } if (type & PERF_SAMPLE_CPU) { - ret = value_set_u32(cw, event, "cpu", sample->cpu); + ret = value_set_u32(cw, event, "perf_cpu", sample->cpu); if (ret) return -1; } if (type & PERF_SAMPLE_PERIOD) { - ret = value_set_u64(cw, event, "period", sample->period); + ret = value_set_u64(cw, event, "perf_period", sample->period); if (ret) return -1; } if (type & PERF_SAMPLE_WEIGHT) { - ret = value_set_u64(cw, event, "weight", sample->weight); + ret = value_set_u64(cw, event, "perf_weight", sample->weight); if (ret) return -1; } if (type & PERF_SAMPLE_DATA_SRC) { - ret = value_set_u64(cw, event, "data_src", sample->data_src); + ret = value_set_u64(cw, event, "perf_data_src", + sample->data_src); if (ret) return -1; } if (type & PERF_SAMPLE_TRANSACTION) { - ret = value_set_u64(cw, event, "transaction", sample->transaction); + ret = value_set_u64(cw, event, "perf_transaction", + sample->transaction); if (ret) return -1; } @@ -276,34 +278,34 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, } while (0) if (type & PERF_SAMPLE_IP) - ADD_FIELD(event_class, cw->data.u64_hex, "ip"); + ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip"); if (type & PERF_SAMPLE_TID) { - ADD_FIELD(event_class, cw->data.s32, "tid"); - ADD_FIELD(event_class, cw->data.s32, "pid"); + ADD_FIELD(event_class, cw->data.s32, "perf_tid"); + ADD_FIELD(event_class, cw->data.s32, "perf_pid"); } if ((type & PERF_SAMPLE_ID) || (type & PERF_SAMPLE_IDENTIFIER)) - ADD_FIELD(event_class, cw->data.u64, "id"); + ADD_FIELD(event_class, cw->data.u64, "perf_id"); if (type & PERF_SAMPLE_STREAM_ID) - ADD_FIELD(event_class, cw->data.u64, "stream_id"); + ADD_FIELD(event_class, cw->data.u64, "perf_stream_id"); if (type & PERF_SAMPLE_CPU) - ADD_FIELD(event_class, cw->data.u32, "cpu"); + ADD_FIELD(event_class, cw->data.u32, "perf_cpu"); if (type & PERF_SAMPLE_PERIOD) - ADD_FIELD(event_class, cw->data.u64, "period"); + ADD_FIELD(event_class, cw->data.u64, "perf_period"); if (type & PERF_SAMPLE_WEIGHT) - ADD_FIELD(event_class, cw->data.u64, "weight"); + ADD_FIELD(event_class, cw->data.u64, "perf_weight"); if (type & PERF_SAMPLE_DATA_SRC) - ADD_FIELD(event_class, cw->data.u64, "data_src"); + ADD_FIELD(event_class, cw->data.u64, "perf_data_src"); if (type & PERF_SAMPLE_TRANSACTION) - ADD_FIELD(event_class, cw->data.u64, "transaction"); + ADD_FIELD(event_class, cw->data.u64, "perf_transaction"); #undef ADD_FIELD return 0; -- cgit v0.10.2 From 55d43bcafe78b6da33f8a49be68ef168f3cbfec9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Feb 2015 15:00:22 -0500 Subject: perf trace: Fix SIGBUS failures due to misaligned accesses On Sparc64 perf-trace is failing in many spots due to extended load instructions being used on misaligned accesses. (gdb) run trace ls Starting program: /tmp/perf/perf trace ls [Thread debugging using libthread_db enabled] Detaching after fork from child process 169460. Program received signal SIGBUS, Bus error. 0x000000000014f4dc in tp_field__u64 (field=0x4cc700, sample=0x7feffffa098) at builtin-trace.c:61 warning: Source file is more recent than executable. 61 TP_UINT_FIELD(64); (gdb) bt 0 0x000000000014f4dc in tp_field__u64 (field=0x4cc700, sample=0x7feffffa098) at builtin-trace.c:61 1 0x0000000000156ad4 in trace__sys_exit (trace=0x7feffffc268, evsel=0x4cc580, event=0xfffffc0104912000, sample=0x7feffffa098) at builtin-trace.c:1701 2 0x0000000000158c14 in trace__run (trace=0x7feffffc268, argc=1, argv=0x7fefffff360) at builtin-trace.c:2160 3 0x000000000015b78c in cmd_trace (argc=1, argv=0x7fefffff360, prefix=0x0) at builtin-trace.c:2609 4 0x0000000000107d94 in run_builtin (p=0x4549c8, argc=2, argv=0x7fefffff360) at perf.c:341 5 0x0000000000108140 in handle_internal_command (argc=2, argv=0x7fefffff360) at perf.c:400 6 0x0000000000108308 in run_argv (argcp=0x7feffffef2c, argv=0x7feffffef20) at perf.c:444 7 0x0000000000108728 in main (argc=2, argv=0x7fefffff360) at perf.c:559 (gdb) p *sample $1 = {ip = 4391276, pid = 169472, tid = 169472, time = 6303014583281250, addr = 0, id = 72082, stream_id = 18446744073709551615, period = 1, weight = 0, transaction = 0, cpu = 73, raw_size = 36, data_src = 84410401, flags = 0, insn_len = 0, raw_data = 0xfffffc010491203c, callchain = 0x0, branch_stack = 0x0, user_regs = {abi = 0, mask = 0, regs = 0x0, cache_regs = 0x7feffffa098, cache_mask = 0}, intr_regs = {abi = 0, mask = 0, regs = 0x0, cache_regs = 0x7feffffa098, cache_mask = 0}, user_stack = { offset = 0, size = 0, data = 0x0}, read = {time_enabled = 0, time_running = 0, {group = {nr = 0, values = 0x0}, one = {value = 0, id = 0}}}} (gdb) p *field $2 = {offset = 16, {integer = 0x14f4a8 , pointer = 0x14f4a8 }} sample->raw_data is guaranteed to not be 8-byte aligned because it is preceded by the size as a u3. So accessing raw data with an extended load instruction causes the SIGBUS. Resolve by using memcpy to a temporary variable of appropriate size. Signed-off-by: David Ahern Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1424376022-140608-1-git-send-email-david.ahern@oracle.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5cd84974..d95a8f4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -52,7 +52,9 @@ struct tp_field { #define TP_UINT_FIELD(bits) \ static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - return *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ + return value; \ } TP_UINT_FIELD(8); @@ -63,7 +65,8 @@ TP_UINT_FIELD(64); #define TP_UINT_FIELD__SWAPPED(bits) \ static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ return bswap_##bits(value);\ } @@ -1517,11 +1520,22 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } +/* + * args is to be interpreted as a series of longs but we need to handle + * 8-byte unaligned accesses. args points to raw_data within the event + * and raw_data is guaranteed to be 8-byte unaligned because it is + * preceded by raw_size which is a u32. So we need to copy args to a temp + * variable to read it. Most notably this avoids extended load instructions + * on unaligned addresses + */ + static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, - unsigned long *args, struct trace *trace, + unsigned char *args, struct trace *trace, struct thread *thread) { size_t printed = 0; + unsigned char *p; + unsigned long val; if (sc->tp_format != NULL) { struct format_field *field; @@ -1537,12 +1551,17 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, field = field->next, ++arg.idx, bit <<= 1) { if (arg.mask & bit) continue; + + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * arg.idx; + memcpy(&val, p, sizeof(val)); + /* * Suppress this argument if its value is zero and * and we don't have a string associated in an * strarray for it. */ - if (args[arg.idx] == 0 && + if (val == 0 && !(sc->arg_scnprintf && sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && sc->arg_parm[arg.idx])) @@ -1551,23 +1570,26 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { - arg.val = args[arg.idx]; + arg.val = val; if (sc->arg_parm) arg.parm = sc->arg_parm[arg.idx]; printed += sc->arg_scnprintf[arg.idx](bf + printed, size - printed, &arg); } else { printed += scnprintf(bf + printed, size - printed, - "%ld", args[arg.idx]); + "%ld", val); } } } else { int i = 0; while (i < 6) { + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * i; + memcpy(&val, p, sizeof(val)); printed += scnprintf(bf + printed, size - printed, "%sarg%d: %ld", - printed ? ", " : "", i, args[i]); + printed ? ", " : "", i, val); ++i; } } -- cgit v0.10.2 From 9aaf5a5f479bd68699f2e6f6e5e5f1253377b6da Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 19 Feb 2015 23:31:13 +0900 Subject: perf probe: Check kprobes blacklist when adding new events Recent linux kernel provides a blacklist of the functions which can not be probed. perf probe can now check this blacklist before setting new events and indicate better error message for users. Without this patch, ---- # perf probe --add vmalloc_fault Added new event: Failed to write event: Invalid argument Error: Failed to add events. ---- With this patch ---- # perf probe --add vmalloc_fault Added new event: Warning: Skipped probing on blacklisted function: vmalloc_fault ---- Reported-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150219143113.14434.5387.stgit@localhost.localdomain Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 9dfbed9..662d454 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1903,6 +1903,95 @@ static struct strlist *get_probe_trace_command_rawlist(int fd) return sl; } +struct kprobe_blacklist_node { + struct list_head list; + unsigned long start; + unsigned long end; + char *symbol; +}; + +static void kprobe_blacklist__delete(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + + while (!list_empty(blacklist)) { + node = list_first_entry(blacklist, + struct kprobe_blacklist_node, list); + list_del(&node->list); + free(node->symbol); + free(node); + } +} + +static int kprobe_blacklist__load(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + const char *__debugfs = debugfs_find_mountpoint(); + char buf[PATH_MAX], *p; + FILE *fp; + int ret; + + if (__debugfs == NULL) + return -ENOTSUP; + + ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs); + if (ret < 0) + return ret; + + fp = fopen(buf, "r"); + if (!fp) + return -errno; + + ret = 0; + while (fgets(buf, PATH_MAX, fp)) { + node = zalloc(sizeof(*node)); + if (!node) { + ret = -ENOMEM; + break; + } + INIT_LIST_HEAD(&node->list); + list_add_tail(&node->list, blacklist); + if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) { + ret = -EINVAL; + break; + } + p = strchr(buf, '\t'); + if (p) { + p++; + if (p[strlen(p) - 1] == '\n') + p[strlen(p) - 1] = '\0'; + } else + p = (char *)"unknown"; + node->symbol = strdup(p); + if (!node->symbol) { + ret = -ENOMEM; + break; + } + pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n", + node->start, node->end, node->symbol); + ret++; + } + if (ret < 0) + kprobe_blacklist__delete(blacklist); + fclose(fp); + + return ret; +} + +static struct kprobe_blacklist_node * +kprobe_blacklist__find_by_address(struct list_head *blacklist, + unsigned long address) +{ + struct kprobe_blacklist_node *node; + + list_for_each_entry(node, blacklist, list) { + if (node->start <= address && address <= node->end) + return node; + } + + return NULL; +} + /* Show an event */ static int show_perf_probe_event(struct perf_probe_event *pev, const char *module) @@ -2117,6 +2206,8 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, char buf[64]; const char *event, *group; struct strlist *namelist; + LIST_HEAD(blacklist); + struct kprobe_blacklist_node *node; if (pev->uprobes) fd = open_uprobe_events(true); @@ -2134,11 +2225,25 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, pr_debug("Failed to get current event list.\n"); return -EIO; } + /* Get kprobe blacklist if exists */ + if (!pev->uprobes) { + ret = kprobe_blacklist__load(&blacklist); + if (ret < 0) + pr_debug("No kprobe blacklist support, ignored\n"); + } ret = 0; pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; + /* Ensure that the address is NOT blacklisted */ + node = kprobe_blacklist__find_by_address(&blacklist, + tev->point.address); + if (node) { + pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol); + continue; + } + if (pev->event) event = pev->event; else @@ -2189,13 +2294,15 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, allow_suffix = true; } - if (ret >= 0) { + /* Note that it is possible to skip all events because of blacklist */ + if (ret >= 0 && tev->event) { /* Show how to use the event. */ pr_info("\nYou can now use it in all perf tools, such as:\n\n"); pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, tev->event); } + kprobe_blacklist__delete(&blacklist); strlist__delete(namelist); close(fd); return ret; -- cgit v0.10.2 From eb47cb2eb22dfacac9689708f5bd3cb0e975e290 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Feb 2015 17:25:04 +0900 Subject: perf probe: Fix get_real_path to free allocated memory in error path Fix get_real_path to free allocated memory when comp_dir is used for complementing path and getting an error. Signed-off-by: Masami Hiramatsu Cc: Namhyung Kim Cc: Naohiro Aota Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150226082504.28125.74506.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 662d454..4a93bf4 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -549,9 +549,11 @@ static int get_real_path(const char *raw_path, const char *comp_dir, if (access(*new_path, R_OK) == 0) return 0; - if (!symbol_conf.source_prefix) + if (!symbol_conf.source_prefix) { /* In case of searching comp_dir, don't retry */ + zfree(new_path); return -errno; + } switch (errno) { case ENAMETOOLONG: -- cgit v0.10.2 From 38ae502b1df196f712f6f5d3609afc36337b330b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 Feb 2015 11:47:18 -0300 Subject: perf probe: Handle strdup() failure We could end up returning 0 (Ok) with a NULL raw_path. Fix it. Acked-by: Masami Hiramatsu Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Naohiro Aota Link: http://lkml.kernel.org/n/tip-l0kcbcg5f4nnzqt01cv42vec@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 4a93bf4..9526cf3 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -533,7 +533,7 @@ static int get_real_path(const char *raw_path, const char *comp_dir, else { if (access(raw_path, R_OK) == 0) { *new_path = strdup(raw_path); - return 0; + return *new_path ? 0 : -ENOMEM; } else return -errno; } -- cgit v0.10.2 From a50d11a10c2db86d7383c281d4e249d5393661e9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 26 Feb 2015 15:54:40 +0900 Subject: perf buildid-cache: Add new buildid cache if update target is not cached Add new buildid cache if the update target file is not cached. This can happen when an old binary is replaced by new one after caching the old one. In this case, user sees his operation just failed. But it does not look straight, since user just pass the binary "path", not "build-id". ---- # ./perf buildid-cache --add ./perf (update ./perf to new binary) # ./perf buildid-cache --update ./perf ./perf wasn't in the cache # ---- This patch adds given new binary to cache if the new binary is not cached. So we'll not see the above error. ---- # ./perf buildid-cache --add ./perf (update ./perf to new binary) # ./perf buildid-cache --update ./perf # ---- Signed-off-by: Masami Hiramatsu Cc: Adrian Hunter Cc: Borislav Petkov Cc: Hemant Kumar Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150226065440.23912.1494.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 0294c57..cec6b57 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -41,9 +41,14 @@ OPTIONS --missing=:: List missing build ids in the cache for the specified file. -u:: ---update:: - Update specified file of the cache. It can be used to update kallsyms - kernel dso to vmlinux in order to support annotation. +--update=:: + Update specified file of the cache. Note that this doesn't remove + older entires since those may be still needed for annotating old + (or remote) perf.data. Only if there is already a cache which has + exactly same build-id, that is replaced by new one. It can be used + to update kallsyms and kernel dso to vmlinux in order to support + annotation. + -v:: --verbose:: Be more verbose. diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index d929d95..e7568f5 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -255,7 +255,7 @@ static int build_id_cache__update_file(const char *filename) u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - int err; + int err = 0; if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -263,7 +263,9 @@ static int build_id_cache__update_file(const char *filename) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id); + if (build_id_cache__cached(sbuild_id)) + err = build_id_cache__remove_s(sbuild_id); + if (!err) err = build_id_cache__add_s(sbuild_id, filename, false, false); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index adbc360..0bc33be 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -352,6 +352,18 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, return build_id_cache__add_s(sbuild_id, name, is_kallsyms, is_vdso); } +bool build_id_cache__cached(const char *sbuild_id) +{ + bool ret = false; + char *filename = build_id__filename(sbuild_id, NULL, 0); + + if (filename && !access(filename, F_OK)) + ret = true; + free(filename); + + return ret; +} + int build_id_cache__remove_s(const char *sbuild_id) { const size_t size = PATH_MAX; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 31b3c63..2a09498 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -22,6 +22,7 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); +bool build_id_cache__cached(const char *sbuild_id); int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); int build_id_cache__remove_s(const char *sbuild_id); -- cgit v0.10.2 From 94ba462d69efeba2f97111321a9ba1aa8141da57 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 9 Feb 2015 05:39:44 +0000 Subject: perf diff: Support for different binaries Currently, the perf diff only works with same binaries. That's because it compares the symbol start address. It doesn't work if the perf.data comes from different binaries. This patch matches the symbol names. Actually, perf diff once intended to compare the symbol names. The commit as below can look for a pair by name. 604c5c92972d (perf diff: Change the default sort order to "dso,symbol") However, at that time, perf diff used a global list of dsos. That means the binaries which has same name can only be loaded once. That's a problem for comparing different binaries. For example, we have an old binary and an updated binary. They very likely have same name and most of the functions, so only dsos from old binary will be loaded. When processing the data from updated binary, perf still use the symbol information from old binary. That's wrong. Then the commit as below used IP to replace symbol name. 9c443dfdd31e ("perf diff: Fix support for all --sort combinations") >From that time, perf diff starts to compare the symbol address. The global dsos is discarded from a patch in 2010. a1645ce12adb ("perf: 'perf kvm' tool for monitoring guest performance from host") However, at that time, perf diff already compared by address. So perf diff cannot work for different binaries as well. This patch actually rolls back the perf diff to original design. The document is also changed, so everybody knows the original design is to compare the symbol names. Here are some examples: The only difference between example_v1.c and example_v2.c is the location of f2 and f3. There is no change in behavior, but the previous perf diff display the wrong differential profile. example_v1.c noinline void f3(void) { volatile int i; for (i = 0; i < 10000;) { if(i%2) i++; else i++; } } noinline void f2(void) { volatile int a = 100, b, c; for (b = 0; b < 10000; b++) c = a * b; } noinline void f1(void) { f2(); f3(); } int main() { int i; for (i = 0; i < 100000; i++) f1(); } example_v2.c noinline void f2(void) { volatile int a = 100, b, c; for (b = 0; b < 10000; b++) c = a * b; } noinline void f3(void) { volatile int i; for (i = 0; i < 10000;) { if(i%2) i++; else i++; } } noinline void f1(void) { f2(); f3(); } int main() { int i; for (i = 0; i < 100000; i++) f1(); } [lk@localhost perf_diff]$ gcc example_v1.c -o example [lk@localhost perf_diff]$ perf record -o example_v1.data ./example [ perf record: Woken up 4 times to write data ] [ perf record: Captured and wrote 0.813 MB example_v1.data (~35522 samples) ] [lk@localhost perf_diff]$ gcc example_v2.c -o example [lk@localhost perf_diff]$ perf record -o example_v2.data ./example [ perf record: Woken up 4 times to write data ] [ perf record: Captured and wrote 0.824 MB example_v2.data (~36015 samples) ] Old perf diff result: [lk@localhost perf_diff]$ perf diff example_v1.data example_v2.data Event 'cycles' Baseline Delta Shared Object Symbol ........ ....... ................ ............................... [kernel.vmlinux] [k] __perf_event_task_sched_out 0.00% [kernel.vmlinux] [k] apic_timer_interrupt [kernel.vmlinux] [k] idle_cpu [kernel.vmlinux] [k] intel_pstate_timer_func [kernel.vmlinux] [k] native_read_msr_safe 0.00% [kernel.vmlinux] [k] native_read_tsc 0.00% [kernel.vmlinux] [k] native_write_msr_safe [kernel.vmlinux] [k] ntp_tick_length 0.00% [kernel.vmlinux] [k] rb_erase 0.00% [kernel.vmlinux] [k] tick_sched_timer 0.00% [kernel.vmlinux] [k] unmap_single_vma 0.00% [kernel.vmlinux] [k] update_wall_time 0.00% example [.] f1 46.24% example [.] f2 53.71% -7.55% example [.] f3 +53.81% example [.] f3 0.02% example [.] main New perf diff result: [lk@localhost perf_diff]$ perf diff example_v1.data example_v2.data [kernel.vmlinux] [k] __perf_event_task_sched_out 0.00% [kernel.vmlinux] [k] apic_timer_interrupt [kernel.vmlinux] [k] idle_cpu [kernel.vmlinux] [k] intel_pstate_timer_func [kernel.vmlinux] [k] native_read_msr_safe 0.00% [kernel.vmlinux] [k] native_read_tsc 0.00% [kernel.vmlinux] [k] native_write_msr_safe [kernel.vmlinux] [k] ntp_tick_length 0.00% [kernel.vmlinux] [k] rb_erase 0.00% [kernel.vmlinux] [k] tick_sched_timer 0.00% [kernel.vmlinux] [k] unmap_single_vma 0.00% [kernel.vmlinux] [k] update_wall_time 0.00% example [.] f1 46.24% -0.08% example [.] f2 53.71% +0.11% example [.] f3 0.02% example [.] main Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Link: http://lkml.kernel.org/r/1423460384-11645-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index e463caa..5182661 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -20,6 +20,11 @@ If no parameters are passed it will assume perf.data.old and perf.data. The differential profile is displayed only for events matching both specified perf.data files. +If no parameters are passed the samples will be sorted by dso and symbol. +As the perf.data files could come from different binaries, the symbols addresses +could vary. So perf diff is based on the comparison of the files and +symbols name. + OPTIONS ------- -D:: diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7a39c1e..4593f36 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1463,6 +1463,15 @@ int sort_dimension__add(const char *tok) sort__has_parent = 1; } else if (sd->entry == &sort_sym) { sort__has_sym = 1; + /* + * perf diff displays the performance difference amongst + * two or more perf.data files. Those files could come + * from different binaries. So we should not compare + * their ips, but the name of symbol. + */ + if (sort__mode == SORT_MODE__DIFF) + sd->entry->se_collapse = sort__sym_sort; + } else if (sd->entry == &sort_dso) { sort__has_dso = 1; } -- cgit v0.10.2 From f56847c2e99810781f6941d01baff9ae223eeac3 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Fri, 27 Feb 2015 18:52:53 +0800 Subject: perf probe: Fix a precedence bug The minus operator has higher precedence than ?: Add parentheses around ?: fix this. Before this patch: $ echo 'p:myprobe do_sys_open' > /sys/kernel/debug/tracing/kprobe_events $ perf probe -l -k ../vmlinux kprobes:myprobe (on do_sys_open) After this patch: $ echo 'p:myprobe do_sys_open' > /sys/kernel/debug/tracing/kprobe_events $ perf probe -l -k ../vmlinux kprobes:myprobe (on do_sys_open@linux.git/fs/open.c) Signed-off-by: He Kuang Acked-by: Masami Hiramatsu Cc: Wang Nan Link: http://lkml.kernel.org/r/1425034373-14511-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 9526cf3..7c0e765 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -151,7 +151,7 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) sym = __find_kernel_function_by_name(name, &map); if (sym) return map->unmap_ip(map, sym->start) - - (reloc) ? 0 : map->reloc; + ((reloc) ? 0 : map->reloc); } return 0; } -- cgit v0.10.2 From 1f924c29b5ab2257be88a2a4075d0800573d8479 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Feb 2015 19:53:46 +0800 Subject: perf data: Fix sentinel setting for data_cmds array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent new patch "perf tools: Add new 'perf data' command" (commit 2245bf14 in acme's git repo perf/core) has caused a building error when compiling the source code of perf: cc1: warnings being treated as errors builtin-data.c:89: error: missing initializer builtin-data.c:89: error: (near initialization for ‘data_cmds[1].summary’) make[2]: *** [builtin-data.o] Error 1 make[2]: *** Waiting for unfinished jobs.... LD bench/perf-in.o LD tests/perf-in.o make[1]: *** [perf-in.o] Error 2 make: *** [all] Error 2 This patch fixes the building error above. Signed-off-by: Yunlong Song Cc: Peter Zijlstra Cc: Jiri Olsa Cc: Paul Mackerras Cc: Wang Nan Link: http://lkml.kernel.org/r/1425038026-27604-1-git-send-email-yunlong.song@huawei.com [ .name == NULL ends the loop, use it instead of seting all fields to NULL ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index 9705ba7..155cf75 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -86,7 +86,7 @@ static int cmd_data_convert(int argc, const char **argv, static struct data_cmd data_cmds[] = { { "convert", "converts data file between formats", cmd_data_convert }, - { NULL }, + { .name = NULL, }, }; int cmd_data(int argc, const char **argv, const char *prefix) -- cgit v0.10.2 From ab0e48002db818c1937f105cd18001dfdd3ce056 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Feb 2015 18:21:25 +0800 Subject: perf list: Sort the output of 'perf list' to view more clearly Sort the output according to ASCII character list (using strcmp), which supports both number sequence and alphabet sequence. Example: Before this patch: $ perf list List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] ... ... jbd2:jbd2_start_commit [Tracepoint event] jbd2:jbd2_commit_locking [Tracepoint event] jbd2:jbd2_run_stats [Tracepoint event] block:block_rq_issue [Tracepoint event] block:block_bio_complete [Tracepoint event] block:block_bio_backmerge [Tracepoint event] block:block_getrq [Tracepoint event] ... ... After this patch: $ perf list List of pre-defined events (to be used in -e): branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cache-misses [Hardware event] cache-references [Hardware event] cpu-cycles OR cycles [Hardware event] instructions [Hardware event] ... ... block:block_bio_backmerge [Tracepoint event] block:block_bio_complete [Tracepoint event] block:block_getrq [Tracepoint event] block:block_rq_issue [Tracepoint event] jbd2:jbd2_commit_locking [Tracepoint event] jbd2:jbd2_run_stats [Tracepoint event] jbd2:jbd2_start_commit [Tracepoint event] ... ... Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1425032491-20224-2-git-send-email-yunlong.song@huawei.com [ Don't forget closedir({sys,evt}_dir) when handling errors ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 109ba5c..f6822d9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1089,6 +1089,14 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; +static int cmp_string(const void *a, const void *b) +{ + const char * const *as = a; + const char * const *bs = b; + + return strcmp(*as, *bs); +} + /* * Print the events from /tracing/events */ @@ -1100,11 +1108,21 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; + char **evt_list = NULL; + unsigned int evt_i = 0, evt_num = 0; + bool evt_num_known = false; +restart: sys_dir = opendir(tracing_events_path); if (!sys_dir) return; + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_close_sys_dir; + } + for_each_subsystem(sys_dir, sys_dirent, sys_next) { if (subsys_glob != NULL && !strglobmatch(sys_dirent.d_name, subsys_glob)) @@ -1121,19 +1139,56 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, !strglobmatch(evt_dirent.d_name, event_glob)) continue; - if (name_only) { - printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name); + if (!evt_num_known) { + evt_num++; continue; } snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); - printf(" %-50s [%s]\n", evt_path, - event_type_descriptors[PERF_TYPE_TRACEPOINT]); + + evt_list[evt_i] = strdup(evt_path); + if (evt_list[evt_i] == NULL) + goto out_close_evt_dir; + evt_i++; } closedir(evt_dir); } closedir(sys_dir); + + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + } + if (evt_num) + printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_close_evt_dir: + closedir(evt_dir); +out_close_sys_dir: + closedir(sys_dir); + + printf("FATAL: not enough memory to print %s\n", + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + if (evt_list) + goto out_free; } /* @@ -1218,20 +1273,61 @@ static void __print_events_type(u8 type, struct event_symbol *syms, unsigned max) { char name[64]; - unsigned i; + unsigned int i, evt_i = 0, evt_num = 0; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + syms -= max; + } for (i = 0; i < max ; i++, syms++) { if (!is_event_supported(type, i)) continue; + if (!evt_num_known) { + evt_num++; + continue; + } + if (strlen(syms->alias)) snprintf(name, sizeof(name), "%s OR %s", syms->symbol, syms->alias); else snprintf(name, sizeof(name), "%s", syms->symbol); - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; + } + + if (!evt_num_known) { + evt_num_known = true; + goto restart; } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) + printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + if (evt_num) + printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); + if (evt_list) + goto out_free; } void print_events_type(u8 type) @@ -1244,8 +1340,17 @@ void print_events_type(u8 type) int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, printed = 0; + unsigned int type, op, i, evt_i = 0, evt_num = 0; char name[64]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + } for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { @@ -1263,27 +1368,66 @@ int print_hwcache_events(const char *event_glob, bool name_only) type | (op << 8) | (i << 16))) continue; - if (name_only) - printf("%s ", name); - else - printf(" %-50s [%s]\n", name, - event_type_descriptors[PERF_TYPE_HW_CACHE]); - ++printed; + if (!evt_num_known) { + evt_num++; + continue; + } + + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } } } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_HW_CACHE]); + } + if (evt_num) printf("\n"); - return printed; + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return evt_num; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[PERF_TYPE_HW_CACHE]); + if (evt_list) + goto out_free; + return evt_num; } static void print_symbol_events(const char *event_glob, unsigned type, struct event_symbol *syms, unsigned max, bool name_only) { - unsigned i, printed = 0; + unsigned int i, evt_i = 0, evt_num = 0; char name[MAX_NAME_LEN]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + syms -= max; + } for (i = 0; i < max; i++, syms++) { @@ -1295,23 +1439,49 @@ static void print_symbol_events(const char *event_glob, unsigned type, if (!is_event_supported(type, i)) continue; - if (name_only) { - printf("%s ", syms->symbol); + if (!evt_num_known) { + evt_num++; continue; } - if (strlen(syms->alias)) + if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else strncpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - - printed++; + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + } + if (evt_num) printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); + if (evt_list) + goto out_free; } /* -- cgit v0.10.2 From 161149513b3570ebd7fe14fc2ddc42cb46557e37 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Feb 2015 18:21:26 +0800 Subject: perf list: Allow listing events with 'tracepoint' prefix If somebody happens to name an event with the beginning of 'tracepoint' (e.g. tracepoint_foo), then it will never be showed with perf list event_glob, thus we parse the argument 'tracepoint' more carefully for accuracy. Example: Before this patch: $ perf list tracepoint_foo:* jbd2:jbd2_start_commit [Tracepoint event] jbd2:jbd2_commit_locking [Tracepoint event] jbd2:jbd2_run_stats [Tracepoint event] block:block_rq_issue [Tracepoint event] block:block_bio_complete [Tracepoint event] block:block_bio_backmerge [Tracepoint event] block:block_getrq [Tracepoint event] ... ... As shown above, all of the tracepoint events are printed. In fact, the command's real intention is to print the events of tracepoint_foo. After this patch: $ perf list tracepoint_foo:* tracepoint_foo:tp_foo_enter [Tracepoint event] tracepoint_foo:tp_foo_exit [Tracepoint event] As shown above, only the events of tracepoint_foo are printed. Signed-off-by: Yunlong Song Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1425032491-20224-3-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index ad8018e..2acbcf0 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -50,9 +50,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) } for (i = 0; i < argc; ++i) { - if (i) - putchar('\n'); - if (strncmp(argv[i], "tracepoint", 10) == 0) + if (strcmp(argv[i], "tracepoint") == 0) print_tracepoint_events(NULL, NULL, false); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) -- cgit v0.10.2 From ed45752061be11a40f57df4304296147dbda2da9 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Feb 2015 18:21:29 +0800 Subject: perf list: Avoid confusion of perf output and the next command prompt Distinguish the output of 'perf list --list-opts' or 'perf --list-cmds' with the next command prompt, which also happens in other cases (e.g. record, report ...). Example: Before this patch: $perf list --list-opts --raw-dump $ <-- the output and the next command prompt are at the same line After this patch: $perf list --list-opts --raw-dump $ <-- the new line Signed-off-by: Yunlong Song Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1425032491-20224-6-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/perf.c b/tools/perf/perf.c index f3c66b8..3df2665 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -223,6 +223,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) struct cmd_struct *p = commands+i; printf("%s ", p->cmd); } + putchar('\n'); exit(0); } else if (!strcmp(cmd, "--debug")) { if (*argc < 2) { diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 4ee9a86..b0ef2d8 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -508,12 +508,14 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o printf("--%s ", options->long_name); options++; } + putchar('\n'); exit(130); case PARSE_OPT_LIST_SUBCMDS: if (subcommands) { for (int i = 0; subcommands[i]; i++) printf("%s ", subcommands[i]); } + putchar('\n'); exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { -- cgit v0.10.2 From 3ef1e65c829c86ffaa94a4ed59fed5da37f9610a Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Feb 2015 18:21:30 +0800 Subject: perf tools: Remove the '--(null)' long_name for --list-opts If the long_name of a 'struct option' is defined as NULL, --list-opts will incorrectly print '--(null)' in its output. As a result, '--(null)' will finally appear in the case of bash completion, e.g. 'perf record --'. Example: Before this patch: $ perf record --list-opts --event --filter --pid --tid --realtime --no-buffering --raw-samples --all-cpus --cpu --count --output --no-inherit --freq --mmap-pages --group --(null) --call-graph --verbose --quiet --stat --data --timestamp --period --no-samples --no-buildid-cache --no-buildid --cgroup --delay --uid --branch-any --branch-filter --weight --transaction --per-thread --intr-regs After this patch: $ perf record --list-opts --event --filter --pid --tid --realtime --no-buffering --raw-samples --all-cpus --cpu --count --output --no-inherit --freq --mmap-pages --group --call-graph --verbose --quiet --stat --data --timestamp --period --no-samples --no-buildid-cache --no-buildid --cgroup --delay --uid --branch-any --branch-filter --weight --transaction --per-thread --intr-regs Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1425032491-20224-7-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index b0ef2d8..1457d66 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -505,7 +505,8 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o break; case PARSE_OPT_LIST_OPTS: while (options->type != OPTION_END) { - printf("--%s ", options->long_name); + if (options->long_name) + printf("--%s ", options->long_name); options++; } putchar('\n'); -- cgit v0.10.2 From 705750f2d6e283ba2856ba8eda60dce2d405b387 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Feb 2015 18:21:27 +0800 Subject: perf list: Clean up the printing functions of hardware/software events Do not need print_events_type or __print_events_type for listing hw/sw events, let print_symbol_events do its job instead. Moreover, print_symbol_events can also handle event_glob and name_only. Signed-off-by: Yunlong Song Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1425032491-20224-4-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 2acbcf0..8b323e0 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -54,10 +54,12 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_tracepoint_events(NULL, NULL, false); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_events_type(PERF_TYPE_HARDWARE); + print_symbol_events(NULL, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX, false); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) - print_events_type(PERF_TYPE_SOFTWARE); + print_symbol_events(NULL, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX, false); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) print_hwcache_events(NULL, false); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f6822d9..fe07573 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -20,11 +20,6 @@ #define MAX_NAME_LEN 100 -struct event_symbol { - const char *symbol; - const char *alias; -}; - #ifdef PARSER_DEBUG extern int parse_events_debug; #endif @@ -39,7 +34,7 @@ static struct perf_pmu_event_symbol *perf_pmu_events_list; */ static int perf_pmu_events_list_num; -static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { +struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { .symbol = "cpu-cycles", .alias = "cycles", @@ -82,7 +77,7 @@ static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { }, }; -static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { +struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { [PERF_COUNT_SW_CPU_CLOCK] = { .symbol = "cpu-clock", .alias = "", @@ -1269,75 +1264,6 @@ static bool is_event_supported(u8 type, unsigned config) return ret; } -static void __print_events_type(u8 type, struct event_symbol *syms, - unsigned max) -{ - char name[64]; - unsigned int i, evt_i = 0, evt_num = 0; - char **evt_list = NULL; - bool evt_num_known = false; - -restart: - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_enomem; - syms -= max; - } - - for (i = 0; i < max ; i++, syms++) { - if (!is_event_supported(type, i)) - continue; - - if (!evt_num_known) { - evt_num++; - continue; - } - - if (strlen(syms->alias)) - snprintf(name, sizeof(name), "%s OR %s", - syms->symbol, syms->alias); - else - snprintf(name, sizeof(name), "%s", syms->symbol); - - evt_list[evt_i] = strdup(name); - if (evt_list[evt_i] == NULL) - goto out_enomem; - evt_i++; - } - - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) - printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); - if (evt_num) - printf("\n"); - -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - return; - -out_enomem: - printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); - if (evt_list) - goto out_free; -} - -void print_events_type(u8 type) -{ - if (type == PERF_TYPE_SOFTWARE) - __print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX); - else - __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); -} - int print_hwcache_events(const char *event_glob, bool name_only) { unsigned int type, op, i, evt_i = 0, evt_num = 0; @@ -1412,7 +1338,7 @@ out_enomem: return evt_num; } -static void print_symbol_events(const char *event_glob, unsigned type, +void print_symbol_events(const char *event_glob, unsigned type, struct event_symbol *syms, unsigned max, bool name_only) { diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 39c3b57..52a2dda 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -116,7 +116,16 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_error(void *data, void *scanner, char const *msg); void print_events(const char *event_glob, bool name_only); -void print_events_type(u8 type); + +struct event_symbol { + const char *symbol; + const char *alias; +}; +extern struct event_symbol event_symbols_hw[]; +extern struct event_symbol event_symbols_sw[]; +void print_symbol_events(const char *event_glob, unsigned type, + struct event_symbol *syms, unsigned max, + bool name_only); void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); -- cgit v0.10.2 From 5ef803ee02d67ad0b49f357cb7feb7d5e6b0015d Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Feb 2015 18:21:28 +0800 Subject: perf list: Extend raw-dump to certain kind of events Extend 'perf list --raw-dump' to 'perf list --raw-dump [hw|sw|cache |tracepoint|pmu|event_glob]' in order to show the raw-dump of a certain kind of events rather than all of the events. Example: Before this patch: $ perf list --raw-dump hw branch-instructions branch-misses bus-cycles cache-misses cache-references cpu-cycles instructions stalled-cycles-backend stalled-cycles-frontend alignment-faults context-switches cpu-clock cpu-migrations emulation-faults major-faults minor-faults page-faults task-clock ... ... writeback:writeback_thread_start writeback:writeback_thread_stop writeback:writeback_wait_iff_congested writeback:writeback_wake_background writeback:writeback_wake_thread As shown above, all of the events are printed. After this patch: $ perf list --raw-dump hw branch-instructions branch-misses bus-cycles cache-misses cache-references cpu-cycles instructions stalled-cycles-backend stalled-cycles-frontend As shown above, only the hw events are printed. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1425032491-20224-5-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 3e2aec9..4692d27 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -127,6 +127,12 @@ To limit the list use: One or more types can be used at the same time, listing the events for the types specified. +Support raw format: + +. '--raw-dump', shows the raw-dump of all the events. +. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of + a certain kind of events. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-top[1], diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 8b323e0..af5bd05 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -36,41 +36,36 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) setup_pager(); - if (raw_dump) { - print_events(NULL, true); - return 0; - } - if (!raw_dump) printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, false); + print_events(NULL, raw_dump); return 0; } for (i = 0; i < argc; ++i) { if (strcmp(argv[i], "tracepoint") == 0) - print_tracepoint_events(NULL, NULL, false); + print_tracepoint_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) print_symbol_events(NULL, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX, false); + event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) print_symbol_events(NULL, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX, false); + event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL, false); + print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, false); + print_pmu_events(NULL, raw_dump); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], false); + print_events(argv[i], raw_dump); continue; } sep_idx = sep - argv[i]; @@ -79,7 +74,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) return -1; s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1, false); + print_tracepoint_events(s, s + sep_idx + 1, raw_dump); free(s); } } -- cgit v0.10.2 From 7335399a6a4bead9ef8b59ce7d811fc4e99ca98c Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Feb 2015 18:21:31 +0800 Subject: perf tools: Fix the bash completion problem of 'perf --*' The perf-completion.sh uses a predefined string '--help --version --exec-path --html-path --paginate --no-pager --perf-dir --work-tree --debugfs-dir' for the bash completion of 'perf --*', which has two problems: Problem 1: If the options of perf are changed (see handle_options() in perf.c), the perf-completion.sh has to be changed at the same time. If not, the bash completion of 'perf --*' and the options which perf really supports will be inconsistent. Problem 2: When typing another single character after 'perf --', e.g. 'h', and hit TAB key to get the bash completion of 'perf --h', the character 'h' disappears at once. This is not what we want, we wish the bash completion can return '--help --html-path' and then we can continue to choose one. To solve this problem, we add '--list-opts' to perf, which now supports 'perf --list-opts' directly, and its result can be used in bash completion now. Example: Before this patch: $ perf --h <-- hit TAB key after character 'h' $ perf -- <-- 'h' disappears and no required result After this patch: $ perf --h <-- hit TAB key after character 'h' --help --html-path <-- the required result Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1425032491-20224-8-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 3356984..c2595e9 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -110,13 +110,11 @@ __perf_main () # List perf subcommands or long options if [ $cword -eq 1 ]; then if [[ $cur == --* ]]; then - __perfcomp '--help --version \ - --exec-path --html-path --paginate --no-pager \ - --perf-dir --work-tree --debugfs-dir' -- "$cur" + cmds=$($cmd --list-opts) else cmds=$($cmd --list-cmds) - __perfcomp "$cmds" "$cur" fi + __perfcomp "$cmds" "$cur" # List possible events for -e option elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3df2665..b857fcb 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -13,6 +13,7 @@ #include "util/quote.h" #include "util/run-command.h" #include "util/parse-events.h" +#include "util/parse-options.h" #include "util/debug.h" #include #include @@ -125,6 +126,23 @@ static void commit_pager_choice(void) } } +struct option options[] = { + OPT_ARGUMENT("help", "help"), + OPT_ARGUMENT("version", "version"), + OPT_ARGUMENT("exec-path", "exec-path"), + OPT_ARGUMENT("html-path", "html-path"), + OPT_ARGUMENT("paginate", "paginate"), + OPT_ARGUMENT("no-pager", "no-pager"), + OPT_ARGUMENT("perf-dir", "perf-dir"), + OPT_ARGUMENT("work-tree", "work-tree"), + OPT_ARGUMENT("debugfs-dir", "debugfs-dir"), + OPT_ARGUMENT("buildid-dir", "buildid-dir"), + OPT_ARGUMENT("list-cmds", "list-cmds"), + OPT_ARGUMENT("list-opts", "list-opts"), + OPT_ARGUMENT("debug", "debug"), + OPT_END() +}; + static int handle_options(const char ***argv, int *argc, int *envchanged) { int handled = 0; @@ -225,6 +243,15 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) } putchar('\n'); exit(0); + } else if (!strcmp(cmd, "--list-opts")) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(options)-1; i++) { + struct option *p = options+i; + printf("--%s ", p->long_name); + } + putchar('\n'); + exit(0); } else if (!strcmp(cmd, "--debug")) { if (*argc < 2) { fprintf(stderr, "No variable specified for --debug.\n"); -- cgit v0.10.2 From 8d8c8e4cb3014fcc51f0e127b4316043306f5bb0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Feb 2015 13:50:26 +0900 Subject: perf buildid-cache: Add --purge FILE to remove all caches of FILE Add --purge FILE to remove all caches of FILE. Since the current --remove FILE removes a cache which has same build-id of given FILE. Since the command takes a FILE path, it can confuse user who tries to remove cache about FILE path. ----- # ./perf buildid-cache -v --add ./perf Adding 133b7b5486d987a5ab5c3ebf4ea14941f45d4d4f ./perf: Ok # (update the ./perf binary) # ./perf buildid-cache -v --remove ./perf Removing 305bbd1be68f66eca7e2d78db294653031edfa79 ./perf: FAIL ./perf wasn't in the cache ----- Actually, the --remove's FAIL is not shown, it just silently fails. So, this patch adds --purge FILE action for such usecase. perf buildid-cache --purge FILE removes all caches which has same FILE path. In other words, it removes all caches including old binaries. ----- # ./perf buildid-cache -v --add ./perf Adding 133b7b5486d987a5ab5c3ebf4ea14941f45d4d4f ./perf: Ok # (update the ./perf binary) # ./perf buildid-cache -v --purge ./perf Removing 133b7b5486d987a5ab5c3ebf4ea14941f45d4d4f ./perf: Ok ----- BTW, if you want to purge all the caches, remove ~/.debug/* . Signed-off-by: Masami Hiramatsu Cc: Adrian Hunter Cc: Borislav Petkov Cc: Hemant Kumar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150227045026.1999.64084.stgit@localhost.localdomain [ s/dirname/dir_name/g to fix build on fedora14, where dirname is a global ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index cec6b57..dd07b55 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -12,9 +12,9 @@ SYNOPSIS DESCRIPTION ----------- -This command manages the build-id cache. It can add and remove files to/from -the cache. In the future it should as well purge older entries, set upper -limits for the space used by the cache, etc. +This command manages the build-id cache. It can add, remove, update and purge +files to/from the cache. In the future it should as well set upper limits for +the space used by the cache, etc. OPTIONS ------- @@ -36,7 +36,12 @@ OPTIONS actually made. -r:: --remove=:: - Remove specified file from the cache. + Remove a cached binary which has same build-id of specified file + from the cache. +-p:: +--purge=:: + Purge all cached binaries including older caches which have specified + path from the cache. -M:: --missing=:: List missing build ids in the cache for the specified file. diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index e7568f5..86f9d78 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -223,6 +223,33 @@ static int build_id_cache__remove_file(const char *filename) return err; } +static int build_id_cache__purge_path(const char *pathname) +{ + struct strlist *list; + struct str_node *pos; + int err; + + err = build_id_cache__list_build_ids(pathname, &list); + if (err) + goto out; + + strlist__for_each(pos, list) { + err = build_id_cache__remove_s(pos->s); + if (verbose) + pr_info("Removing %s %s: %s\n", pos->s, pathname, + err ? "FAIL" : "Ok"); + if (err) + break; + } + strlist__delete(list); + +out: + if (verbose) + pr_info("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok"); + + return err; +} + static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) { char filename[PATH_MAX]; @@ -285,6 +312,7 @@ int cmd_buildid_cache(int argc, const char **argv, bool force = false; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, + *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, *kcore_filename = NULL; @@ -302,6 +330,8 @@ int cmd_buildid_cache(int argc, const char **argv, "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), + OPT_STRING('p', "purge", &purge_name_list_str, "path list", + "path(s) to remove (remove old caches too)"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -368,6 +398,24 @@ int cmd_buildid_cache(int argc, const char **argv, } } + if (purge_name_list_str) { + list = strlist__new(true, purge_name_list_str); + if (list) { + strlist__for_each(pos, list) + if (build_id_cache__purge_path(pos->s)) { + if (errno == ENOENT) { + pr_debug("%s wasn't in the cache\n", + pos->s); + continue; + } + pr_warning("Couldn't remove %s: %s\n", + pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + } + + strlist__delete(list); + } + } + if (missing_filename) ret = build_id_cache__fprintf_missing(session, stdout); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 0bc33be..ffdc338 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -281,35 +281,93 @@ void disable_buildid_cache(void) no_buildid_cache = true; } +static char *build_id_cache__dirname_from_path(const char *name, + bool is_kallsyms, bool is_vdso) +{ + char *realname = (char *)name, *filename; + bool slash = is_kallsyms || is_vdso; + + if (!slash) { + realname = realpath(name, NULL); + if (!realname) + return NULL; + } + + if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname) < 0) + filename = NULL; + + if (!slash) + free(realname); + + return filename; +} + +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result) +{ + struct strlist *list; + char *dir_name; + DIR *dir; + struct dirent *d; + int ret = 0; + + list = strlist__new(true, NULL); + dir_name = build_id_cache__dirname_from_path(pathname, false, false); + if (!list || !dir_name) { + ret = -ENOMEM; + goto out; + } + + /* List up all dirents */ + dir = opendir(dir_name); + if (!dir) { + ret = -errno; + goto out; + } + + while ((d = readdir(dir)) != NULL) { + if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; + strlist__add(list, d->d_name); + } + closedir(dir); + +out: + free(dir_name); + if (ret) + strlist__delete(list); + else + *result = list; + + return ret; +} + int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso) { const size_t size = PATH_MAX; - char *realname, *filename = zalloc(size), + char *realname = NULL, *filename = NULL, *dir_name = NULL, *linkname = zalloc(size), *targetname, *tmp; - int len, err = -1; - bool slash = is_kallsyms || is_vdso; + int err = -1; - if (is_kallsyms) { - if (symbol_conf.kptr_restrict) { - pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); - err = 0; - goto out_free; - } - realname = (char *) name; - } else + if (!is_kallsyms) { realname = realpath(name, NULL); + if (!realname) + goto out_free; + } - if (realname == NULL || filename == NULL || linkname == NULL) + dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso); + if (!dir_name) goto out_free; - len = scnprintf(filename, size, "%s%s%s", - buildid_dir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname); - if (mkdir_p(filename, 0755)) + if (mkdir_p(dir_name, 0755)) goto out_free; - snprintf(filename + len, size - len, "/%s", sbuild_id); + if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) { + filename = NULL; + goto out_free; + } if (access(filename, F_OK)) { if (is_kallsyms) { @@ -337,6 +395,7 @@ out_free: if (!is_kallsyms) free(realname); free(filename); + free(dir_name); free(linkname); return err; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 2a09498..8501122 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -4,6 +4,7 @@ #define BUILD_ID_SIZE 20 #include "tool.h" +#include "strlist.h" #include extern struct perf_tool build_id__mark_dso_hit_ops; @@ -22,6 +23,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result); bool build_id_cache__cached(const char *sbuild_id); int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); -- cgit v0.10.2 From cc169c7c31253e80e0d504f0cd5dbb9f1e3d3ac5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Feb 2015 13:50:28 +0900 Subject: perf buildid-cache: Use pr_debug instead of verbose && pr_info Use pr_debug instead of the combination of verbose and pr_info. "if (verbose) pr_info(...)" is same as "pr_debug(...)", replace it. Signed-off-by: Masami Hiramatsu Suggested-by: Namhyung Kim Cc: Adrian Hunter Cc: Borislav Petkov Cc: Hemant Kumar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150227045028.1999.93137.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 86f9d78..04466c4 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -196,9 +196,8 @@ static int build_id_cache__add_file(const char *filename) build_id__sprintf(build_id, sizeof(build_id), sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, false, false); - if (verbose) - pr_info("Adding %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + pr_debug("Adding %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } @@ -216,9 +215,8 @@ static int build_id_cache__remove_file(const char *filename) build_id__sprintf(build_id, sizeof(build_id), sbuild_id); err = build_id_cache__remove_s(sbuild_id); - if (verbose) - pr_info("Removing %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + pr_debug("Removing %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } @@ -235,17 +233,15 @@ static int build_id_cache__purge_path(const char *pathname) strlist__for_each(pos, list) { err = build_id_cache__remove_s(pos->s); - if (verbose) - pr_info("Removing %s %s: %s\n", pos->s, pathname, - err ? "FAIL" : "Ok"); + pr_debug("Removing %s %s: %s\n", pos->s, pathname, + err ? "FAIL" : "Ok"); if (err) break; } strlist__delete(list); out: - if (verbose) - pr_info("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok"); + pr_debug("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok"); return err; } @@ -296,9 +292,8 @@ static int build_id_cache__update_file(const char *filename) if (!err) err = build_id_cache__add_s(sbuild_id, filename, false, false); - if (verbose) - pr_info("Updating %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + pr_debug("Updating %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } -- cgit v0.10.2 From 0497d0a8201a38f0c95edc8a1fc0325f2f879ddb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Feb 2015 13:50:31 +0900 Subject: perf buildid-cache: Show usage with incorrect params Show usage if no action is specified or unexpected parameter is given. In other words, be more user friendly. Signed-off-by: Masami Hiramatsu Cc: Adrian Hunter Cc: Borislav Petkov Cc: Hemant Kumar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150227045030.1999.44006.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 04466c4..d47a0cd 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -343,6 +343,11 @@ int cmd_buildid_cache(int argc, const char **argv, argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); + if (argc || (!add_name_list_str && !kcore_filename && + !remove_name_list_str && !purge_name_list_str && + !missing_filename && !update_name_list_str)) + usage_with_options(buildid_cache_usage, buildid_cache_options); + if (missing_filename) { file.path = missing_filename; file.force = force; -- cgit v0.10.2 From fefd2d9619de3bf0bf02a8622e9f445c3d19cc3f Mon Sep 17 00:00:00 2001 From: He Kuang Date: Sun, 15 Feb 2015 10:33:37 +0800 Subject: perf report: Fix branch stack mode cannot be set When perf.data file is obtained using 'perf record -b', perf report should use branch stack mode to generate output. But this function is broken by improper comparison between boolean and constant -1. before this patch: $ perf report -b -i perf.data Samples: 16 of event 'cycles', Event count (approx.): 3171896 Overhead Command Shared Object Symbol 13.59% ls [kernel.kallsyms] [k] prio_tree_remove 13.16% ls [kernel.kallsyms] [k] change_pte_range 12.09% ls [kernel.kallsyms] [k] page_fault 12.02% ls [kernel.kallsyms] [k] zap_pte_range ... after this patch: $ perf report -b -i perf.data Samples: 256 of event 'cycles', Event count (approx.): 256 Overhead Command Source Shared Object Source Symbol Target Shared Object Target Symbol 9.38% ls [unknown] [k] 0000000000000000 [unknown] [k] 0000000000000000 6.25% ls libc-2.19.so [.] _dl_addr libc-2.19.so [.] _dl_addr 6.25% ls [kernel.kallsyms] [k] zap_pte_range [kernel.kallsyms] [k] zap_pte_range 6.25% ls [kernel.kallsyms] [k] change_pte_range [kernel.kallsyms] [k] change_pte_range 0.39% ls [kernel.kallsyms] [k] prio_tree_remove [kernel.kallsyms] [k] prio_tree_remove ... Signed-off-by: He Kuang Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1423967617-28879-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0ba5f07..fb35034 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -768,7 +768,7 @@ repeat: * 0/1 means the user chose a mode. */ if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) && - branch_call_mode == -1) { + !branch_call_mode) { sort__mode = SORT_MODE__BRANCH; symbol_conf.cumulate_callchain = false; } -- cgit v0.10.2 From ecefde629fadd3fcca2ea4c6a799d6e6aab8781f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Feb 2015 13:22:33 -0500 Subject: perf tools: Only include tsc file for x86 The perf_time_to_tsc and tsc_to_perf_time functions are only used for x86. Make inclusion of tsc.c dependent on x86 as well. Signed-off-by: David Ahern Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1424370153-128274-1-git-send-email-david.ahern@oracle.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/Build b/tools/perf/util/Build index a2c8047..972a6e0 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -71,7 +71,7 @@ libperf-y += stat.o libperf-y += record.o libperf-y += srcline.o libperf-y += data.o -libperf-y += tsc.o +libperf-$(CONFIG_X86) += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o -- cgit v0.10.2 From c65568c5456e5216e5467e81d1e04c1f5bdd453f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Feb 2015 18:59:31 -0500 Subject: perf tools: Compare JOBS to 0 after grep If JOBS is not by user perf tries to autodetect the number by grepping the number of CPUs from /proc/cpuinfo. 'grep -c' will always return an integer so after this command JOBS should be compared to 0, not "". Signed-off-by: David Ahern Link: http://lkml.kernel.org/r/1424303971-91904-1-git-send-email-david.ahern@oracle.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index cb2e586..d5020ae 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -25,7 +25,7 @@ unexport MAKEFLAGS # ifeq ($(JOBS),) JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null) - ifeq ($(JOBS),) + ifeq ($(JOBS),0) JOBS := 1 endif endif -- cgit v0.10.2 From 3b4331d9a4f2d99603c38bfcac79943b7c6c5439 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Fri, 13 Feb 2015 18:40:58 +0000 Subject: perf stat: Report unsupported events properly Commit 1971f59 (perf stat: Use read_counter in read_counter_aggr ) broke the perf stat output for unsupported counters. $ perf stat -v -a -C 0 -e CCI_400/config=24/ sleep 1 Warning: CCI_400/config=24/ event is not supported by the kernel. Performance counter stats for 'system wide': 0 CCI_400/config=24/ 1.080265400 seconds time elapsed Where it used to be : $ perf stat -v -a -C 0 -e CCI_400/config=24/ sleep 1 Warning: CCI_400/config=24/ event is not supported by the kernel. Performance counter stats for 'system wide': CCI_400/config=24/ 1.083840675 seconds time elapsed This patch fixes the issues by checking if the counter is supported, before reading and logging the counter value. Signed-off-by: Suzuki K. Poulose Acked-by: David Ahern Tested-by: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1423852858-8455-1-git-send-email-suzuki.poulose@arm.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e598e4e..d28949d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -510,6 +510,9 @@ static int read_counter(struct perf_evsel *counter) int ncpus = perf_evsel__nr_cpus(counter); int cpu, thread; + if (!counter->supported) + return -ENOENT; + if (counter->system_wide) nthreads = 1; @@ -1285,7 +1288,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (scaled == -1) { + if (scaled == -1 || !counter->supported) { fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, -- cgit v0.10.2 From 4aa5f4f7bb8bc41cba15bcd0d80c4fb085027d6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Feb 2015 19:52:10 -0300 Subject: perf tools: Fix FORK after COMM when synthesizing records for pre-existing threads In this commit: commit 363b785f3805a2632eb09a8b430842461c21a640 Author: Don Zickus Date: Fri Mar 14 10:43:44 2014 -0400 perf tools: Speed up thread map generation We ended up emitting PERF_RECORD_FORK events after their corresponding PERF_RECORD_COMM, so the code below will remove the "existing thread" and then recreates it, unnecessarily: [root@ssdandy ~]# perf probe -x ~/bin/perf -L machine__process_fork_event 0 int machine__process_fork_event(struct machine *machine, union perf_event *event, struct perf_sample *sample) 2 { 3 struct thread *thread = machine__find_thread(machine, event->fork.pid, event->fork.tid); 6 struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); /* if a thread currently exists for the thread id remove it */ if (thread != NULL) 12 machine__remove_thread(machine, thread); 14 thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); 16 if (dump_trace) 17 perf_event__fprintf_task(event, stdout); 19 if (thread == NULL || parent == NULL || 20 thread__fork(thread, parent, sample->time) < 0) { 21 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); 22 return -1; } 25 return 0; 26 } [root@ssdandy ~]# perf probe -x ~/bin/perf fork_after_comm=machine__process_fork_event:12 Added new event: probe_perf:fork_after_comm (on machine__process_fork_event:12 in /home/acme/bin/perf) You can now use it in all perf tools, such as: perf record -e probe_perf:fork_after_comm -aR sleep 1 [root@ssdandy ~]# [root@ssdandy ~]# perf record -g -e probe_perf:* trace -o /tmp/bla ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.021 MB perf.data (30 samples) ] Terminated [root@ssdandy ~]# [root@ssdandy ~]# perf report --no-children --show-total-period --stdio # To display the perf.data header info, please use --header/--header-only options. # # Samples: 30 of event 'probe_perf:fork_after_comm' # Event count (approx.): 30 # # Overhead Period Command Shared Object Symbol # ........ ............ ....... ............. ............................... # 100.00% 30 trace trace [.] machine__process_fork_event | ---machine__process_fork_event __event__synthesize_thread.part.2 perf_event__synthesize_threads cmd_trace main __libc_start_main [root@ssdandy ~]# And Looking at 'perf report -D' output we see it: 0 0 0x8698 [0x30]: PERF_RECORD_COMM: auditd:703/707 0 0 0x86c8 [0x38]: PERF_RECORD_FORK(703:707):(703:703) Fix it by more closely mimicking how the kernel generates those records when a new fork happens, i.e. first a PERF_RECORD_FORK, then a PERF_RECORD_COMM. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-h0emvymi2t3mw8dlqd6d6z73@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 9e806d8..d5efa50 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -95,9 +95,7 @@ static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len) return tgid; } -static pid_t perf_event__synthesize_comm(struct perf_tool *tool, - union perf_event *event, pid_t pid, - perf_event__handler_t process, +static pid_t perf_event__prepare_comm(union perf_event *event, pid_t pid, struct machine *machine) { size_t size; @@ -124,6 +122,19 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, (sizeof(event->comm.comm) - size) + machine->id_hdr_size); event->comm.tid = pid; +out: + return tgid; +} + +static pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + perf_event__handler_t process, + struct machine *machine) +{ + pid_t tgid = perf_event__prepare_comm(event, pid, machine); + + if (tgid == -1) + goto out; if (process(tool, event, &synth_sample, machine) != 0) return -1; @@ -139,7 +150,6 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, { memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); - /* this is really a clone event but we use fork to synthesize it */ event->fork.ppid = tgid; event->fork.ptid = tgid; event->fork.pid = tgid; @@ -368,19 +378,23 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (*end) continue; - tgid = perf_event__synthesize_comm(tool, comm_event, _pid, - process, machine); + tgid = perf_event__prepare_comm(comm_event, _pid, machine); if (tgid == -1) return -1; + if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, + process, machine) < 0) + return -1; + /* + * Send the prepared comm event + */ + if (process(tool, comm_event, &synth_sample, machine) != 0) + return -1; + if (_pid == pid) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data); - } else { - /* only fork the tid's map, to save time */ - rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid, - process, machine); } if (rc) -- cgit v0.10.2 From b11db6581beaccef8ae9a388ae96074aa5cc144f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Mar 2015 13:31:03 +0900 Subject: perf tools: Fix build error on ARCH=i386/x86_64/sparc64 He Kuang reported that current perf tools failed to build when ARCH variable was given like above. It was because the name is different that internal directory name. I can see that David's sparc64 build has same problem. So fix it by applying the sed conversion script to the command line ARCH variable also, and fixing the converted name there (i.e. i386/x86_64 -> x86, sparc64 -> sparc). Reported-by: He Kuang Signed-off-by: Namhyung Kim Tested-by: He Kuang Acked: Jiri Olsa Cc: David Ahern Cc: He Kuang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1425270663-10215-1-git-send-email-namhyung@kernel.org [ Resolved conflict with 4861f87cd3d1 "Make sparc64 arch point to sparc" ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index ac8721f..e972057 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch @@ -1,32 +1,15 @@ +ifndef ARCH +ARCH := $(shell uname -m 2>/dev/null || echo not) +endif -uname_M := $(shell uname -m 2>/dev/null || echo not) - -RAW_ARCH := $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ +ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ + -e s/sun4u/sparc/ -e s/sparc64/sparc/ \ -e s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ -e s/tile.*/tile/ ) -# Additional ARCH settings for x86 -ifeq ($(RAW_ARCH),i386) - ARCH ?= x86 -endif - -ifeq ($(RAW_ARCH),x86_64) - ARCH ?= x86 - - ifneq (, $(findstring m32,$(CFLAGS))) - RAW_ARCH := x86_32 - endif -endif - -ifeq ($(RAW_ARCH),sparc64) - ARCH ?= sparc -endif - -ARCH ?= $(RAW_ARCH) - LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) ifeq ($(LP64), 1) IS_64_BIT := 1 -- cgit v0.10.2 From 08b23f4e635fa42a1d3ebdf31b8bb720f17d6c14 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Mar 2015 13:53:58 +0900 Subject: perf record: Get rid of -l option from Documentation The perf record does not support -l option anymore, so nuke it. Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1425272038-10406-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index cae75c1..4d66894 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -62,9 +62,6 @@ OPTIONS --all-cpus:: System-wide collection from all CPUs. --l:: - Scale counter values. - -p:: --pid=:: Record events on existing process ID (comma separated list). -- cgit v0.10.2 From 9a75606ca06d94aab1ed0dbe96935e3f89dfb81c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Mar 2015 12:13:33 +0900 Subject: perf record: Document --group option The 'perf record --group' option lacks documentation and confuses users. As -e/--event option already supports group spec, it should not be used anymore. Also add a short description of event group itself. Reported-by: Stephane Eranian Signed-off-by: Namhyung Kim Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1425266013-5034-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 4d66894..355c4f5 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -55,6 +55,11 @@ OPTIONS If you want to profile write accesses in [0x1000~1008), just set 'mem:0x1000/8:w'. + - a group of events surrounded by a pair of brace ("{event1,event2,...}"). + Each event is separated by commas and the group should be quoted to + prevent the shell interpretation. You also need to use --group on + "perf report" to view group events together. + --filter=:: Event filter. @@ -104,6 +109,10 @@ OPTIONS specification with appended unit character - B/K/M/G. The size is rounded up to have nearest pages power of two value. +--group:: + Put all events in a single event group. This precedes the --event + option and remains only for backward compatibility. See --event. + -g:: Enables call-graph (stack chain/backtrace) recording. -- cgit v0.10.2 From 97fe9253592241572711d3c1818c0b586d2f34b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Feb 2015 09:12:48 +0100 Subject: perf tools: Add PERF-FEATURES to the .gitignore file It's an auto-generated file. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150228081248.GA31856@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 40399c3..68328f5 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -1,6 +1,7 @@ PERF-CFLAGS PERF-GUI-VARS PERF-VERSION-FILE +PERF-FEATURES perf perf-read-vdso32 perf-read-vdsox32 -- cgit v0.10.2 From a6a76ba9ea03fe22eb28a6a19482d547b8773001 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Feb 2015 09:17:50 +0100 Subject: perf tools: Remove annoying extra message from the features build This message: Makefile:153: The path 'python-config' is not executable. Appears on every perf build that does not have a sufficient python environment installed. It's really just an internal detail of python configuration pass and users should not see it - and it's pretty meaningless to them in any case because the message is not very helpful. (So it's not executable. Why does that matter? What can the user do about it?) Remove the warning, the missing python feature warning is sufficient: config/Makefile:566: No python-config tool was found config/Makefile:566: Python support will not be built although even that one isn't very helpful to users: so no Python support will be built, what can the user do to fix that? Most other such warnings give package install suggestions. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150228081750.GA31887@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 7076a62..c16ce83 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -175,6 +175,5 @@ _ge-abspath = $(if $(is-executable),$(1)) define get-executable-or-default $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) endef -_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) -_gea_warn = $(warning The path '$(1)' is not executable.) +_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2))) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) -- cgit v0.10.2 From 6c5aa23704e2786eb1a2a733165eef95c4375f41 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Feb 2015 09:33:45 +0100 Subject: perf tools: Improve Python feature detection messages Change the Python detection message from: config/Makefile:566: No python-config tool was found config/Makefile:566: Python support will not be built config/Makefile:565: No 'python-config' tool was found: disables Python support - please install python-devel/python-dev It's now a standard one-line message with a package install suggestion, and it also uses the standard language used by other feature detection messages. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150228083345.GB31887@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index c3570b5..d3efeef 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -548,22 +548,21 @@ endif disable-python = $(eval $(disable-python_code)) define disable-python_code CFLAGS += -DNO_LIBPYTHON - $(if $(1),$(warning No $(1) was found)) - $(warning Python support will not be built) + $(warning $1) NO_LIBPYTHON := 1 endef ifdef NO_LIBPYTHON - $(call disable-python) + $(call disable-python,Python support disabled by user) else ifndef PYTHON - $(call disable-python,python interpreter) + $(call disable-python,No python interpreter was found: disables Python support - please install python-devel/python-dev) else PYTHON_WORD := $(call shell-wordify,$(PYTHON)) ifndef PYTHON_CONFIG - $(call disable-python,python-config tool) + $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev) else PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) @@ -575,7 +574,7 @@ else FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) ifneq ($(feature-libpython), 1) - $(call disable-python,Python.h (for Python 2.x)) + $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev) else ifneq ($(feature-libpython-version), 1) -- cgit v0.10.2 From a954e68402f9cac000ad7ea57df6040fe5ef455a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Feb 2015 09:39:09 +0100 Subject: perf tools: Improve libperl detection message Before: Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed After: Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev Change the message to the standard 'please install' language and adds Debian-ish package suggestion. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150228083909.GC31887@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index d3efeef..aa2f0aa 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -531,7 +531,7 @@ else ifneq ($(feature-libperl), 1) CFLAGS += -DNO_LIBPERL NO_LIBPERL := 1 - msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed); + msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev); else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) -- cgit v0.10.2 From 0189d7c45acd9fc9a7e6876dc55bc44ae8dc9a37 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Feb 2015 09:46:42 +0100 Subject: perf tools: Improve libbfd detection message Before: No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling After: No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling Change the message to the standard 'please install' language and also add libiberty-dev suggestion for Ubuntu systems. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150228084610.GE31887@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index aa2f0aa..e2350ad 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -635,7 +635,7 @@ else EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) + msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling) CFLAGS += -DNO_DEMANGLE endif endif -- cgit v0.10.2 From b49f1a4be701c2386ccc7496dc8442cf26424d5c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Feb 2015 10:16:27 +0100 Subject: perf tools: Improve feature test debuggability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain feature tests fail with link errors: triton:~/tip/tools/perf/config/feature-checks> make test-libbabeltrace.bin gcc -MD -o test-libbabeltrace.bin test-libbabeltrace.c # -lbabeltrace provided by /tmp/cc6dRSqd.o: In function `main': test-libbabeltrace.c:(.text+0xf): undefined reference to `bt_ctf_stream_class_get_packet_context_type' although they should already fail with a build error due to lack of a proper prototype for the function. Due to this I first tried to find which library was missing - while it was the whole feature that was missing from the .h file already. To solve this, propagate -Wall -Werror to all testcases and remove them from testcase Makefile rules that used them explicitly. A missing feature now outputs: triton:~/tip/tools/perf/config/feature-checks> make test-libbabeltrace.bin gcc -MD -Wall -Werror -o test-libbabeltrace.bin test-libbabeltrace.c # -lbabeltrace provided by test-libbabeltrace.c: In function ‘main’: test-libbabeltrace.c:6:2: error: implicit declaration of function ‘bt_ctf_stream_class_get_packet_context_type’ [-Werror=implicit-function-declaration] Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150228091627.GF31887@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 70c9aeb..8fe0678 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -39,24 +39,24 @@ PKG_CONFIG := $(CROSS_COMPILE)pkg-config all: $(FILES) -BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) +BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) ############################### test-all.bin: - $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -lbabeltrace + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -lbabeltrace test-hello.bin: $(BUILD) test-pthread-attr-setaffinity-np.bin: - $(BUILD) -D_GNU_SOURCE -Werror -lpthread + $(BUILD) -D_GNU_SOURCE -lpthread test-stackprotector-all.bin: - $(BUILD) -Werror -fstack-protector-all + $(BUILD) -fstack-protector-all test-fortify-source.bin: - $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 + $(BUILD) -O2 -D_FORTIFY_SOURCE=2 test-bionic.bin: $(BUILD) @@ -119,10 +119,10 @@ test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl test-liberty.bin: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty + $(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty test-liberty-z.bin: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz + $(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz test-cplus-demangle.bin: $(BUILD) -liberty @@ -140,7 +140,7 @@ test-libbabeltrace.bin: $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) test-sync-compare-and-swap.bin: - $(BUILD) -Werror + $(BUILD) test-compile-32.bin: $(CC) -m32 -o $(OUTPUT)$@ test-compile.c -- cgit v0.10.2 From de5349fa439dd32d432cd401eb2decfae20b9f74 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Feb 2015 10:18:49 +0100 Subject: perf tools: Improve 'libbabel' feature check failure message On Debian-ish systems libbabeltrace-dev should be suggested as a package install as well. Signed-off-by: Ingo Molnar Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150228091849.GA28959@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index e2350ad..d44c64d 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -706,7 +706,7 @@ endif ifndef NO_LIBBABELTRACE ifeq ($(feature-libbabeltrace), 0) - msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-devel/libbabeltrace-ctf-dev); + msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev); NO_LIBBABELTRACE := 1 else CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) -- cgit v0.10.2 From 79702f614187f652a814061e8f5875ddcc9e732d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 28 Feb 2015 11:53:29 +0900 Subject: perf probe: Warn if given uprobe event accesses memory on older kernel Warn if given uprobe event accesses memory on older kernel. Until 3.14, uprobe event only supports accessing registers so this warns to upgrade kernel if uprobe-event returns -EINVAL and an argument of the event accesses memory ($stack, @+offset, and +|-offs() symtax). With this patch (on 3.10.0-123.13.2.el7.x86_64); ----- # ./perf probe -x ./perf warn_uprobe_event_compat stack=-0\(%sp\) Added new event: Failed to write event: Invalid argument Please upgrade your kernel to at least 3.14 to have access to feature -0(%sp) Error: Failed to add events. ----- Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20150228025329.32106.70581.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 7c0e765..1c570c2 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2199,6 +2199,27 @@ static int get_new_event_name(char *buf, size_t len, const char *base, return ret; } +/* Warn if the current kernel's uprobe implementation is old */ +static void warn_uprobe_event_compat(struct probe_trace_event *tev) +{ + int i; + char *buf = synthesize_probe_trace_command(tev); + + /* Old uprobe event doesn't support memory dereference */ + if (!tev->uprobes || tev->nargs == 0 || !buf) + goto out; + + for (i = 0; i < tev->nargs; i++) + if (strglobmatch(tev->args[i].value, "[$@+-]*")) { + pr_warning("Please upgrade your kernel to at least " + "3.14 to have access to feature %s\n", + tev->args[i].value); + break; + } +out: + free(buf); +} + static int __add_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event *tevs, int ntevs, bool allow_suffix) @@ -2295,6 +2316,8 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, */ allow_suffix = true; } + if (ret == -EINVAL && pev->uprobes) + warn_uprobe_event_compat(tev); /* Note that it is possible to skip all events because of blacklist */ if (ret >= 0 && tev->event) { -- cgit v0.10.2 From 0104fe69e0287cf3635657b4c6b26a18e0091697 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 2 Mar 2015 21:49:46 +0900 Subject: perf probe: Remove bias offset to find probe point by address Remove bias offset to find probe point by address. Without this patch, probe points on kernel and executables are shown correctly, but do not work with libraries: # ./perf probe -l probe:do_fork (on do_fork@kernel/fork.c) probe_libc:malloc (on malloc in /usr/lib64/libc-2.17.so) probe_perf:strlist__new (on strlist__new@util/strlist.c in /home/mhiramat/ksrc/linux-3/tools/perf/perf) Removing bias allows it to show it as real place: # ./perf probe -l probe:do_fork (on do_fork@kernel/fork.c) probe_libc:malloc (on __libc_malloc@malloc/malloc.c in /usr/lib64/libc-2.17.so) probe_perf:strlist__new (on strlist__new@util/strlist.c in /home/mhiramat/ksrc/linux-3/tools/perf/perf) Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Naohiro Aota Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150302124946.9191.64085.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index d141935..46f009a 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1345,11 +1345,8 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; - /* Adjust address with bias */ - addr += dbg->bias; - /* Find cu die */ - if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr - dbg->bias, &cudie)) { + if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; -- cgit v0.10.2 From 543d976fa2ebf5543bd07b5d487bf3a6144c0886 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 2 Mar 2015 09:59:05 +0200 Subject: perf tools: Initialize cpu set in pthread_attr_setaffinity_np feature test Feature tests are compiled but not executed, however it might avoid a future uninitialized variable warning, so initialize the cpu set. Reported-by: Ingo Molnar Signed-off-by: Adrian Hunter Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Stephane Eranian Cc: Thomas Gleixner Cc: linux-tip-commits@vger.kernel.org Link: http://lkml.kernel.org/r/54F41849.1010906@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c b/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c index 2b81b72..fdada5e 100644 --- a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c +++ b/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c @@ -1,5 +1,6 @@ #include #include +#include int main(void) { @@ -8,7 +9,8 @@ int main(void) cpu_set_t cs; pthread_attr_init(&thread_attr); - /* don't care abt exact args, just the API itself in libpthread */ + CPU_ZERO(&cs); + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cs), &cs); return ret; -- cgit v0.10.2 From 2ed11312eb19506c027e7cac039994ad42a9cb2c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 2 Mar 2015 02:14:26 -0500 Subject: Revert "perf: Remove the extra validity check on nr_pages" This reverts commit 74390aa55678 ("perf: Remove the extra validity check on nr_pages") nr_pages equals to number of pages - 1 in perf_mmap. So nr_pages = 0 is valid. So the nr_pages != 0 && !is_power_of_2(nr_pages) are all needed for checking. Otherwise, for example, perf test 6 failed. # perf test 6 6: x86 rdpmc test :Error: mmap() syscall returned with (Invalid argument) FAILED! Signed-off-by: Kan Liang Cc: Andi Kleen Cc: Kaixu Xia Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1425280466-7830-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/kernel/events/core.c b/kernel/events/core.c index af924bc..8bb20cc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4446,7 +4446,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) * If we have rb pages ensure they're a power-of-two number, so we * can do bitmasks instead of modulo. */ - if (!is_power_of_2(nr_pages)) + if (nr_pages != 0 && !is_power_of_2(nr_pages)) return -EINVAL; if (vma_size != PAGE_SIZE * (1 + nr_pages)) -- cgit v0.10.2 From f3b623b8490af7a9b819cbcf2d99ab4597ece94b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2015 22:21:35 -0300 Subject: perf tools: Reference count struct thread We need to do that to stop accumulating entries in the dead_threads linked list, i.e. we were keeping references to threads in struct hists that continue to exist even after a thread exited and was removed from the machine threads rbtree. We still keep the dead_threads list, but just for debugging, allowing us to iterate at any given point over the threads that still are referenced by things like struct hist_entry. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3ejvfyed0r7ue61dkurzjux4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 7ce2966..e00e2ea 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -831,7 +831,7 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) return -1; } - atoms->thread = thread; + atoms->thread = thread__get(thread); INIT_LIST_HEAD(&atoms->work_list); __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); return 0; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d95a8f4..211614f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1741,7 +1741,10 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } else ttrace->entry_pending = true; - trace->current = thread; + if (trace->current != thread) { + thread__put(trace->current); + trace->current = thread__get(thread); + } return 0; } @@ -2274,6 +2277,8 @@ next_event: } out_disable: + thread__zput(trace->current); + perf_evlist__disable(evlist); if (!err) { diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 788506e..ad312d9 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1467,7 +1467,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, perf_hpp__set_user_width(symbol_conf.col_width_list_str); while (1) { - const struct thread *thread = NULL; + struct thread *thread = NULL; const struct dso *dso = NULL; int choice = 0, annotate = -2, zoom_dso = -2, zoom_thread = -2, @@ -1754,13 +1754,13 @@ zoom_thread: pstack__remove(fstack, &browser->hists->thread_filter); zoom_out_thread: ui_helpline__pop(); - browser->hists->thread_filter = NULL; + thread__zput(browser->hists->thread_filter); perf_hpp__set_elide(HISTC_THREAD, false); } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); - browser->hists->thread_filter = thread; + browser->hists->thread_filter = thread__get(thread); perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(fstack, &browser->hists->thread_filter); } diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index ffdc338..a196746 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -61,8 +61,9 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, if (thread) { rb_erase(&thread->rb_node, &machine->threads); - machine->last_match = NULL; - thread__delete(thread); + if (machine->last_match == thread) + thread__zput(machine->last_match); + thread__put(thread); } return 0; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 70b48a6..95f5ab7 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -355,6 +355,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, callchain_init(he->callchain); INIT_LIST_HEAD(&he->pairs.node); + thread__get(he->thread); } return he; @@ -941,6 +942,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) void hist_entry__delete(struct hist_entry *he) { + thread__zput(he->thread); zfree(&he->branch_info); zfree(&he->mem_info); zfree(&he->stat_acc); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2b690d0..e988c9f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -60,7 +60,7 @@ struct hists { struct rb_root entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; - const struct thread *thread_filter; + struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 9e0f60a..24f8c97 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -14,6 +14,8 @@ #include "unwind.h" #include "linux/hash.h" +static void machine__remove_thread(struct machine *machine, struct thread *th); + static void dsos__init(struct dsos *dsos) { INIT_LIST_HEAD(&dsos->head); @@ -89,16 +91,6 @@ static void dsos__delete(struct dsos *dsos) } } -void machine__delete_dead_threads(struct machine *machine) -{ - struct thread *n, *t; - - list_for_each_entry_safe(t, n, &machine->dead_threads, node) { - list_del(&t->node); - thread__delete(t); - } -} - void machine__delete_threads(struct machine *machine) { struct rb_node *nd = rb_first(&machine->threads); @@ -106,9 +98,8 @@ void machine__delete_threads(struct machine *machine) while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); - rb_erase(&t->rb_node, &machine->threads); nd = rb_next(nd); - thread__delete(t); + machine__remove_thread(machine, t); } } @@ -361,9 +352,13 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * the full rbtree: */ th = machine->last_match; - if (th && th->tid == tid) { - machine__update_thread_pid(machine, th, pid); - return th; + if (th != NULL) { + if (th->tid == tid) { + machine__update_thread_pid(machine, th, pid); + return th; + } + + thread__zput(machine->last_match); } while (*p != NULL) { @@ -371,7 +366,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + machine->last_match = thread__get(th); machine__update_thread_pid(machine, th, pid); return th; } @@ -403,8 +398,11 @@ static struct thread *__machine__findnew_thread(struct machine *machine, thread__delete(th); return NULL; } - - machine->last_match = th; + /* + * It is now in the rbtree, get a ref + */ + thread__get(th); + machine->last_match = thread__get(th); } return th; @@ -1238,13 +1236,17 @@ out_problem: static void machine__remove_thread(struct machine *machine, struct thread *th) { - machine->last_match = NULL; + if (machine->last_match == th) + thread__zput(machine->last_match); + rb_erase(&th->rb_node, &machine->threads); /* - * We may have references to this thread, for instance in some hist_entry - * instances, so just move them to a separate list. + * Move it first to the dead_threads list, then drop the reference, + * if this is the last reference, then the thread__delete destructor + * will be called and we will remove it from the dead_threads list. */ list_add_tail(&th->node, &machine->dead_threads); + thread__put(th); } int machine__process_fork_event(struct machine *machine, union perf_event *event, diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index e8b7779..e2faf3b 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -118,7 +118,6 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); -void machine__delete_dead_threads(struct machine *machine); void machine__delete_threads(struct machine *machine); void machine__delete(struct machine *machine); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e4f1669..ed4e5cf 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -138,11 +138,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file, return NULL; } -static void perf_session__delete_dead_threads(struct perf_session *session) -{ - machine__delete_dead_threads(&session->machines.host); -} - static void perf_session__delete_threads(struct perf_session *session) { machine__delete_threads(&session->machines.host); @@ -167,7 +162,6 @@ static void perf_session_env__delete(struct perf_session_env *env) void perf_session__delete(struct perf_session *session) { perf_session__destroy_kernel_maps(session); - perf_session__delete_dead_threads(session); perf_session__delete_threads(session); perf_session_env__delete(&session->header.env); machines__exit(&session->machines); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9ebc8b1..a5dbba9 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -82,6 +82,20 @@ void thread__delete(struct thread *thread) free(thread); } +struct thread *thread__get(struct thread *thread) +{ + ++thread->refcnt; + return thread; +} + +void thread__put(struct thread *thread) +{ + if (thread && --thread->refcnt == 0) { + list_del_init(&thread->node); + thread__delete(thread); + } +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 160fd06..783b668 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -20,6 +20,7 @@ struct thread { pid_t tid; pid_t ppid; int cpu; + int refcnt; char shortname[3]; bool comm_set; bool dead; /* if set thread has exited */ @@ -37,6 +38,18 @@ struct comm; struct thread *thread__new(pid_t pid, pid_t tid); int thread__init_map_groups(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); + +struct thread *thread__get(struct thread *thread); +void thread__put(struct thread *thread); + +static inline void __thread__zput(struct thread **thread) +{ + thread__put(*thread); + *thread = NULL; +} + +#define thread__zput(thread) __thread__zput(&thread) + static inline void thread__exited(struct thread *thread) { thread->dead = true; -- cgit v0.10.2 From ae536acfacb65a4a9858c32b12361e09f84f4157 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2015 22:28:41 -0300 Subject: perf sched: No need to keep the session around We were keeping the session around just because we kept pointers to struct thread instances, but now we reference count them, so no need for deferring the perf_session__delete call to after we traverse the work_list entries. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-9agtck6jdr3rebdp39z1lo0e@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e00e2ea..a3ebf1d 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1439,8 +1439,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ return err; } -static int perf_sched__read_events(struct perf_sched *sched, - struct perf_session **psession) +static int perf_sched__read_events(struct perf_sched *sched) { const struct perf_evsel_str_handler handlers[] = { { "sched:sched_switch", process_sched_switch_event, }, @@ -1454,6 +1453,7 @@ static int perf_sched__read_events(struct perf_sched *sched, .path = input_name, .mode = PERF_DATA_MODE_READ, }; + int rc = -1; session = perf_session__new(&file, false, &sched->tool); if (session == NULL) { @@ -1478,16 +1478,10 @@ static int perf_sched__read_events(struct perf_sched *sched, sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; } - if (psession) - *psession = session; - else - perf_session__delete(session); - - return 0; - + rc = 0; out_delete: perf_session__delete(session); - return -1; + return rc; } static void print_bad_events(struct perf_sched *sched) @@ -1515,12 +1509,10 @@ static void print_bad_events(struct perf_sched *sched) static int perf_sched__lat(struct perf_sched *sched) { struct rb_node *next; - struct perf_session *session; setup_pager(); - /* save session -- references to threads are held in work_list */ - if (perf_sched__read_events(sched, &session)) + if (perf_sched__read_events(sched)) return -1; perf_sched__sort_lat(sched); @@ -1537,6 +1529,7 @@ static int perf_sched__lat(struct perf_sched *sched) work_list = rb_entry(next, struct work_atoms, node); output_lat_thread(sched, work_list); next = rb_next(next); + thread__zput(work_list->thread); } printf(" -----------------------------------------------------------------------------------------------------------------\n"); @@ -1548,7 +1541,6 @@ static int perf_sched__lat(struct perf_sched *sched) print_bad_events(sched); printf("\n"); - perf_session__delete(session); return 0; } @@ -1557,7 +1549,7 @@ static int perf_sched__map(struct perf_sched *sched) sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); setup_pager(); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; print_bad_events(sched); return 0; @@ -1572,7 +1564,7 @@ static int perf_sched__replay(struct perf_sched *sched) test_calibrations(sched); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; printf("nr_run_events: %ld\n", sched->nr_run_events); -- cgit v0.10.2 From fa713a4eb9cebe5dec71b1bd11429603e17d841d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Mar 2015 11:48:12 -0300 Subject: perf ordered_events: Untangle from perf_session For use by tools that are not perf.data based, as maybe 'perf trace' in live mode. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-nedqe7cmii5w82etfi36urfz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 077ddd2..e6ab630 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -153,10 +153,11 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve free_dup_event(oe, event->event); } -static int __ordered_events__flush(struct perf_session *s, +static int __ordered_events__flush(struct ordered_events *oe, + struct machines *machines, + struct perf_evlist *evlist, struct perf_tool *tool) { - struct ordered_events *oe = &s->ordered_events; struct list_head *head = &oe->events; struct ordered_event *tmp, *iter; struct perf_sample sample; @@ -179,12 +180,12 @@ static int __ordered_events__flush(struct perf_session *s, if (iter->timestamp > limit) break; - ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample); + ret = perf_evlist__parse_sample(evlist, iter->event, &sample); if (ret) pr_err("Can't parse sample, err = %d\n", ret); else { - ret = perf_session__deliver_event(s, iter->event, &sample, tool, - iter->file_offset); + ret = machines__deliver_event(machines, evlist, iter->event, + &sample, tool, iter->file_offset); if (ret) return ret; } @@ -204,10 +205,10 @@ static int __ordered_events__flush(struct perf_session *s, return 0; } -int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, +int ordered_events__flush(struct ordered_events *oe, struct machines *machines, + struct perf_evlist *evlist, struct perf_tool *tool, enum oe_flush how) { - struct ordered_events *oe = &s->ordered_events; static const char * const str[] = { "NONE", "FINAL", @@ -251,7 +252,7 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, str[how], oe->nr_events); pr_oe_time(oe->max_timestamp, "max_timestamp\n"); - err = __ordered_events__flush(s, tool); + err = __ordered_events__flush(oe, machines, evlist, tool); if (!err) { if (how == OE_FLUSH__ROUND) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 7b8f9b0..e09f243 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -2,9 +2,10 @@ #define __ORDERED_EVENTS_H #include -#include "tool.h" -struct perf_session; +struct perf_tool; +struct perf_evlist; +struct machines; struct ordered_event { u64 timestamp; @@ -40,7 +41,8 @@ struct ordered_events { struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp, union perf_event *event); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); -int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, +int ordered_events__flush(struct ordered_events *oe, struct machines *machines, + struct perf_evlist *evlist, struct perf_tool *tool, enum oe_flush how); void ordered_events__init(struct ordered_events *oe); void ordered_events__free(struct ordered_events *oe); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ed4e5cf..23be146 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -512,7 +512,11 @@ static int process_finished_round(struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_session *session) { - return ordered_events__flush(session, tool, OE_FLUSH__ROUND); + struct ordered_events *oe = &session->ordered_events; + struct perf_evlist *evlist = session->evlist; + struct machines *machines = &session->machines; + + return ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__ROUND); } int perf_session_queue_event(struct perf_session *s, union perf_event *event, @@ -520,6 +524,9 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, u64 file_offset) { struct ordered_events *oe = &s->ordered_events; + struct perf_evlist *evlist = s->evlist; + struct machines *machines = &s->machines; + u64 timestamp = sample->time; struct ordered_event *new; @@ -536,7 +543,7 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, new = ordered_events__new(oe, timestamp, event); if (!new) { - ordered_events__flush(s, tool, OE_FLUSH__HALF); + ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__HALF); new = ordered_events__new(oe, timestamp, event); } @@ -886,12 +893,12 @@ static int &sample->read.one, machine); } -int perf_session__deliver_event(struct perf_session *session, +int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { - struct perf_evlist *evlist = session->evlist; struct perf_evsel *evsel; struct machine *machine; @@ -899,7 +906,7 @@ int perf_session__deliver_event(struct perf_session *session, evsel = perf_evlist__id2evsel(evlist, sample->id); - machine = machines__find_for_cpumode(&session->machines, event, sample); + machine = machines__find_for_cpumode(machines, event, sample); switch (event->header.type) { case PERF_RECORD_SAMPLE: @@ -984,12 +991,14 @@ int perf_session__deliver_synth_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - events_stats__inc(&session->evlist->stats, event->header.type); + struct perf_evlist *evlist = session->evlist; + + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, tool, 0); - return perf_session__deliver_event(session, event, sample, tool, 0); + return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0); } static void event_swap(union perf_event *event, bool sample_id_all) @@ -1090,8 +1099,8 @@ static s64 perf_session__process_event(struct perf_session *session, return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return machines__deliver_event(&session->machines, evlist, event, + &sample, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) @@ -1167,6 +1176,9 @@ volatile int session_done; static int __perf_session__process_pipe_events(struct perf_session *session, struct perf_tool *tool) { + struct ordered_events *oe = &session->ordered_events; + struct perf_evlist *evlist = session->evlist; + struct machines *machines = &session->machines; int fd = perf_data_file__fd(session->file); union perf_event *event; uint32_t size, cur_size = 0; @@ -1246,7 +1258,7 @@ more: goto more; done: /* do the final flush for ordered samples */ - err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__FINAL); out_err: free(buf); perf_tool__warn_about_errors(tool, &session->evlist->stats); @@ -1298,6 +1310,9 @@ static int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, u64 file_size, struct perf_tool *tool) { + struct ordered_events *oe = &session->ordered_events; + struct perf_evlist *evlist = session->evlist; + struct machines *machines = &session->machines; int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; @@ -1391,7 +1406,7 @@ more: out: /* do the final flush for ordered samples */ - err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__FINAL); out_err: ui_progress__finish(); perf_tool__warn_about_errors(tool, &session->evlist->stats); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index fe859f3..c08fa6b 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -57,7 +57,8 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, void perf_tool__fill_defaults(struct perf_tool *tool); -int perf_session__deliver_event(struct perf_session *session, +int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool, u64 file_offset); -- cgit v0.10.2 From b7b61cbebd789a3dbca522e3fdb727fe5c95593f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Mar 2015 11:58:45 -0300 Subject: perf ordered_events: Shorten function signatures By keeping pointers to machines, evlist and tool in ordered_events. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-0c6huyaf59mqtm2ek9pmposl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 747f861..71bf745 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -208,7 +208,7 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - ret = perf_session__process_events(session, &ann->tool); + ret = perf_session__process_events(session); if (ret) goto out; diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index ed3873b..feb420f 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -74,7 +74,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID */ if (with_hits || perf_data_file__is_pipe(&file)) - perf_session__process_events(session, &build_id__mark_dso_hit_ops); + perf_session__process_events(session); perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); perf_session__delete(session); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 74aada5..f800fc9 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -747,7 +747,7 @@ static int __cmd_diff(void) goto out_delete; } - ret = perf_session__process_events(d->session, &tool); + ret = perf_session__process_events(d->session); if (ret) { pr_err("Failed to process %s\n", d->file.path); goto out_delete; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a13641e..2563f07 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -359,8 +359,6 @@ static int __cmd_inject(struct perf_inject *inject) } else if (inject->sched_stat) { struct perf_evsel *evsel; - inject->tool.ordered_events = true; - evlist__for_each(session->evlist, evsel) { const char *name = perf_evsel__name(evsel); @@ -379,7 +377,7 @@ static int __cmd_inject(struct perf_inject *inject) if (!file_out->is_pipe) lseek(fd, session->header.data_offset, SEEK_SET); - ret = perf_session__process_events(session, &inject->tool); + ret = perf_session__process_events(session); if (!file_out->is_pipe) { if (inject->build_ids) @@ -458,6 +456,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } + inject.tool.ordered_events = inject.sched_stat; + file.path = inject.input_name; inject.session = perf_session__new(&file, true, &inject.tool); if (inject.session == NULL) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index f295141..62f165a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -426,7 +426,7 @@ static int __cmd_kmem(struct perf_session *session) } setup_pager(); - err = perf_session__process_events(session, &perf_kmem); + err = perf_session__process_events(session); if (err != 0) goto out; sort_result(); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0894a81..802b8f5 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -730,9 +730,9 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return -1; } - err = perf_session_queue_event(kvm->session, event, &kvm->tool, &sample, 0); + err = perf_session__queue_event(kvm->session, event, &sample, 0); /* - * FIXME: Here we can't consume the event, as perf_session_queue_event will + * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ perf_evlist__mmap_consume(kvm->evlist, idx); @@ -1066,7 +1066,7 @@ static int read_events(struct perf_kvm_stat *kvm) if (ret < 0) return ret; - return perf_session__process_events(kvm->session, &kvm->tool); + return perf_session__process_events(kvm->session); } static int parse_target_str(struct perf_kvm_stat *kvm) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index e7ec715..7893a9b 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -878,7 +878,7 @@ static int __cmd_report(bool display_info) if (select_key()) goto out_delete; - err = perf_session__process_events(session, &eops); + err = perf_session__process_events(session); if (err) goto out_delete; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 9b56639..46c6931 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -141,7 +141,7 @@ static int report_raw_events(struct perf_mem *mem) printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - err = perf_session__process_events(session, &mem->tool); + err = perf_session__process_events(session); if (err) return err; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4fdad06..5a2ff51 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -225,7 +225,7 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; - return perf_session__process_events(session, &rec->tool); + return perf_session__process_events(session); } static void perf_event__synthesize_guest_os(struct machine *machine, void *data) @@ -343,7 +343,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); - session = perf_session__new(file, false, NULL); + session = perf_session__new(file, false, tool); if (session == NULL) { pr_err("Perf session creation failed.\n"); return -1; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fb35034..52f74e1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -482,7 +482,7 @@ static int __cmd_report(struct report *rep) if (ret) return ret; - ret = perf_session__process_events(session, &rep->tool); + ret = perf_session__process_events(session); if (ret) return ret; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index a3ebf1d..3b3a5bb 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1467,7 +1467,7 @@ static int perf_sched__read_events(struct perf_sched *sched) goto out_delete; if (perf_session__has_traces(session, "record -R")) { - int err = perf_session__process_events(session, &sched->tool); + int err = perf_session__process_events(session); if (err) { pr_err("Failed to process events, error %d", err); goto out_delete; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ce304df..c7e6750 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -800,7 +800,7 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap2 = process_mmap2_event; } - ret = perf_session__process_events(script->session, &script->tool); + ret = perf_session__process_events(script->session); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index f3bb1a4..51440d1 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1623,7 +1623,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) goto out_delete; } - ret = perf_session__process_events(session, &tchart->tool); + ret = perf_session__process_events(session); if (ret) goto out_delete; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 211614f..6969ba9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2408,7 +2408,7 @@ static int trace__replay(struct trace *trace) setup_pager(); - err = perf_session__process_events(session, &trace->tool); + err = perf_session__process_events(session); if (err) pr_err("Failed to process events, error %d", err); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index e372e03..1afd381 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -579,7 +579,7 @@ int bt_convert__perf2ctf(const char *input, const char *path) return -1; /* perf.data session */ - session = perf_session__new(&file, 0, NULL); + session = perf_session__new(&file, 0, &c.tool); if (!session) goto free_writer; @@ -591,7 +591,7 @@ int bt_convert__perf2ctf(const char *input, const char *path) if (setup_events(cw, session)) goto free_session; - err = perf_session__process_events(session, &c.tool); + err = perf_session__process_events(session); if (!err) err = bt_ctf_stream_flush(cw->stream); diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index e6ab630..bad46ce 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -153,10 +153,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve free_dup_event(oe, event->event); } -static int __ordered_events__flush(struct ordered_events *oe, - struct machines *machines, - struct perf_evlist *evlist, - struct perf_tool *tool) +static int __ordered_events__flush(struct ordered_events *oe) { struct list_head *head = &oe->events; struct ordered_event *tmp, *iter; @@ -180,12 +177,12 @@ static int __ordered_events__flush(struct ordered_events *oe, if (iter->timestamp > limit) break; - ret = perf_evlist__parse_sample(evlist, iter->event, &sample); + ret = perf_evlist__parse_sample(oe->evlist, iter->event, &sample); if (ret) pr_err("Can't parse sample, err = %d\n", ret); else { - ret = machines__deliver_event(machines, evlist, iter->event, - &sample, tool, iter->file_offset); + ret = machines__deliver_event(oe->machines, oe->evlist, iter->event, + &sample, oe->tool, iter->file_offset); if (ret) return ret; } @@ -205,9 +202,7 @@ static int __ordered_events__flush(struct ordered_events *oe, return 0; } -int ordered_events__flush(struct ordered_events *oe, struct machines *machines, - struct perf_evlist *evlist, struct perf_tool *tool, - enum oe_flush how) +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) { static const char * const str[] = { "NONE", @@ -252,7 +247,7 @@ int ordered_events__flush(struct ordered_events *oe, struct machines *machines, str[how], oe->nr_events); pr_oe_time(oe->max_timestamp, "max_timestamp\n"); - err = __ordered_events__flush(oe, machines, evlist, tool); + err = __ordered_events__flush(oe); if (!err) { if (how == OE_FLUSH__ROUND) @@ -268,13 +263,17 @@ int ordered_events__flush(struct ordered_events *oe, struct machines *machines, return err; } -void ordered_events__init(struct ordered_events *oe) +void ordered_events__init(struct ordered_events *oe, struct machines *machines, + struct perf_evlist *evlist, struct perf_tool *tool) { INIT_LIST_HEAD(&oe->events); INIT_LIST_HEAD(&oe->cache); INIT_LIST_HEAD(&oe->to_free); oe->max_alloc_size = (u64) -1; oe->cur_alloc_size = 0; + oe->evlist = evlist; + oe->machines = machines; + oe->tool = tool; } void ordered_events__free(struct ordered_events *oe) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index e09f243..ef7d73e 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -32,6 +32,9 @@ struct ordered_events { struct list_head to_free; struct ordered_event *buffer; struct ordered_event *last; + struct machines *machines; + struct perf_evlist *evlist; + struct perf_tool *tool; int buffer_idx; unsigned int nr_events; enum oe_flush last_flush_type; @@ -41,10 +44,9 @@ struct ordered_events { struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp, union perf_event *event); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); -int ordered_events__flush(struct ordered_events *oe, struct machines *machines, - struct perf_evlist *evlist, struct perf_tool *tool, - enum oe_flush how); -void ordered_events__init(struct ordered_events *oe); +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); +void ordered_events__init(struct ordered_events *oe, struct machines *machines, + struct perf_evlist *evlsit, struct perf_tool *tool); void ordered_events__free(struct ordered_events *oe); static inline diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 23be146..c6dd89f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -95,7 +95,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file, goto out; session->repipe = repipe; - ordered_events__init(&session->ordered_events); machines__init(&session->machines); if (file) { @@ -126,7 +125,8 @@ struct perf_session *perf_session__new(struct perf_data_file *file, tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; - } + } else + ordered_events__init(&session->ordered_events, &session->machines, session->evlist, tool); return session; @@ -508,24 +508,19 @@ static perf_event__swap_op perf_event__swap_ops[] = { * Flush every events below timestamp 7 * etc... */ -static int process_finished_round(struct perf_tool *tool, +static int process_finished_round(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; - struct perf_evlist *evlist = session->evlist; - struct machines *machines = &session->machines; - return ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__ROUND); + return ordered_events__flush(oe, OE_FLUSH__ROUND); } -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset) +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset) { struct ordered_events *oe = &s->ordered_events; - struct perf_evlist *evlist = s->evlist; - struct machines *machines = &s->machines; u64 timestamp = sample->time; struct ordered_event *new; @@ -543,7 +538,7 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, new = ordered_events__new(oe, timestamp, event); if (!new) { - ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__HALF); + ordered_events__flush(oe, OE_FLUSH__HALF); new = ordered_events__new(oe, timestamp, event); } @@ -948,9 +943,9 @@ int machines__deliver_event(struct machines *machines, static s64 perf_session__process_user_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, u64 file_offset) { + struct perf_tool *tool = session->ordered_events.tool; int fd = perf_data_file__fd(session->file); int err; @@ -988,15 +983,15 @@ static s64 perf_session__process_user_event(struct perf_session *session, int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool) + struct perf_sample *sample) { struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->ordered_events.tool; events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, 0); + return perf_session__process_user_event(session, event, 0); return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0); } @@ -1066,11 +1061,10 @@ out_parse_sample: } static s64 perf_session__process_event(struct perf_session *session, - union perf_event *event, - struct perf_tool *tool, - u64 file_offset) + union perf_event *event, u64 file_offset) { struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->ordered_events.tool; struct perf_sample sample; int ret; @@ -1083,7 +1077,7 @@ static s64 perf_session__process_event(struct perf_session *session, events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, file_offset); + return perf_session__process_user_event(session, event, file_offset); /* * For all kernel events we get the sample data @@ -1093,8 +1087,7 @@ static s64 perf_session__process_event(struct perf_session *session, return ret; if (tool->ordered_events) { - ret = perf_session_queue_event(session, event, tool, &sample, - file_offset); + ret = perf_session__queue_event(session, event, &sample, file_offset); if (ret != -ETIME) return ret; } @@ -1173,12 +1166,10 @@ static void perf_tool__warn_about_errors(const struct perf_tool *tool, volatile int session_done; -static int __perf_session__process_pipe_events(struct perf_session *session, - struct perf_tool *tool) +static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; - struct perf_evlist *evlist = session->evlist; - struct machines *machines = &session->machines; + struct perf_tool *tool = oe->tool; int fd = perf_data_file__fd(session->file); union perf_event *event; uint32_t size, cur_size = 0; @@ -1242,7 +1233,7 @@ more: } } - if ((skip = perf_session__process_event(session, event, tool, head)) < 0) { + if ((skip = perf_session__process_event(session, event, head)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", head, event->header.size, event->header.type); err = -EINVAL; @@ -1258,7 +1249,7 @@ more: goto more; done: /* do the final flush for ordered samples */ - err = ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); out_err: free(buf); perf_tool__warn_about_errors(tool, &session->evlist->stats); @@ -1308,11 +1299,10 @@ fetch_mmaped_event(struct perf_session *session, static int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, - u64 file_size, struct perf_tool *tool) + u64 file_size) { struct ordered_events *oe = &session->ordered_events; - struct perf_evlist *evlist = session->evlist; - struct machines *machines = &session->machines; + struct perf_tool *tool = oe->tool; int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; @@ -1381,8 +1371,7 @@ more: size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, tool, file_pos)) - < 0) { + (skip = perf_session__process_event(session, event, file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", file_offset + head, event->header.size, event->header.type); @@ -1406,7 +1395,7 @@ more: out: /* do the final flush for ordered samples */ - err = ordered_events__flush(oe, machines, evlist, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); out_err: ui_progress__finish(); perf_tool__warn_about_errors(tool, &session->evlist->stats); @@ -1415,8 +1404,7 @@ out_err: return err; } -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool) +int perf_session__process_events(struct perf_session *session) { u64 size = perf_data_file__size(session->file); int err; @@ -1427,10 +1415,9 @@ int perf_session__process_events(struct perf_session *session, if (!perf_data_file__is_pipe(session->file)) err = __perf_session__process_events(session, session->header.data_offset, - session->header.data_size, - size, tool); + session->header.data_size, size); else - err = __perf_session__process_pipe_events(session, tool); + err = __perf_session__process_pipe_events(session); return err; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index c08fa6b..06e0777 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -48,12 +48,10 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, union perf_event **event_ptr, struct perf_sample *sample); -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool); +int perf_session__process_events(struct perf_session *session); -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset); +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); @@ -126,8 +124,7 @@ extern volatile int session_done; int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool); + struct perf_sample *sample); int perf_event__process_id_index(struct perf_tool *tool, union perf_event *event, -- cgit v0.10.2 From d10eb1eb76a86266354ecab6e42c1606e3b8bc4c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Mar 2015 12:20:38 -0300 Subject: perf ordered_events: Allow tools to specify a deliver method So that we can simplify the deliver method to pass just: (ordered_events, ordered_event, sample); Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-j0s4bpxs5qza5tnkvjwom9rw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index bad46ce..0d8cea9 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -181,8 +181,7 @@ static int __ordered_events__flush(struct ordered_events *oe) if (ret) pr_err("Can't parse sample, err = %d\n", ret); else { - ret = machines__deliver_event(oe->machines, oe->evlist, iter->event, - &sample, oe->tool, iter->file_offset); + ret = oe->deliver(oe, iter, &sample); if (ret) return ret; } @@ -264,7 +263,8 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) } void ordered_events__init(struct ordered_events *oe, struct machines *machines, - struct perf_evlist *evlist, struct perf_tool *tool) + struct perf_evlist *evlist, struct perf_tool *tool, + ordered_events__deliver_t deliver) { INIT_LIST_HEAD(&oe->events); INIT_LIST_HEAD(&oe->cache); @@ -274,6 +274,7 @@ void ordered_events__init(struct ordered_events *oe, struct machines *machines, oe->evlist = evlist; oe->machines = machines; oe->tool = tool; + oe->deliver = deliver; } void ordered_events__free(struct ordered_events *oe) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index ef7d73e..c6cf0ba 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -5,6 +5,7 @@ struct perf_tool; struct perf_evlist; +struct perf_sample; struct machines; struct ordered_event { @@ -21,6 +22,12 @@ enum oe_flush { OE_FLUSH__HALF, }; +struct ordered_events; + +typedef int (*ordered_events__deliver_t)(struct ordered_events *oe, + struct ordered_event *event, + struct perf_sample *sample); + struct ordered_events { u64 last_flush; u64 next_flush; @@ -35,6 +42,7 @@ struct ordered_events { struct machines *machines; struct perf_evlist *evlist; struct perf_tool *tool; + ordered_events__deliver_t deliver; int buffer_idx; unsigned int nr_events; enum oe_flush last_flush_type; @@ -46,7 +54,8 @@ struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timesta void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, struct machines *machines, - struct perf_evlist *evlsit, struct perf_tool *tool); + struct perf_evlist *evlsit, struct perf_tool *tool, + ordered_events__deliver_t deliver); void ordered_events__free(struct ordered_events *oe); static inline diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index c6dd89f..e2f318a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -16,6 +16,12 @@ #include "perf_regs.h" #include "asm/bug.h" +static int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, u64 file_offset); + static int perf_session__open(struct perf_session *session) { struct perf_data_file *file = session->file; @@ -86,6 +92,14 @@ static void perf_session__set_comm_exec(struct perf_session *session) machines__set_comm_exec(&session->machines, comm_exec); } +static int ordered_events__deliver_event(struct ordered_events *oe, + struct ordered_event *event, + struct perf_sample *sample) +{ + return machines__deliver_event(oe->machines, oe->evlist, event->event, + sample, oe->tool, event->file_offset); +} + struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool) { @@ -125,8 +139,10 @@ struct perf_session *perf_session__new(struct perf_data_file *file, tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; - } else - ordered_events__init(&session->ordered_events, &session->machines, session->evlist, tool); + } else { + ordered_events__init(&session->ordered_events, &session->machines, + session->evlist, tool, ordered_events__deliver_event); + } return session; @@ -888,11 +904,11 @@ static int &sample->read.one, machine); } -int machines__deliver_event(struct machines *machines, - struct perf_evlist *evlist, - union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset) +static int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, u64 file_offset) { struct perf_evsel *evsel; struct machine *machine; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 06e0777..1310998 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -55,12 +55,6 @@ int perf_session__queue_event(struct perf_session *s, union perf_event *event, void perf_tool__fill_defaults(struct perf_tool *tool); -int machines__deliver_event(struct machines *machines, - struct perf_evlist *evlist, - union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset); - int perf_session__resolve_callchain(struct perf_session *session, struct perf_evsel *evsel, struct thread *thread, -- cgit v0.10.2 From d704ebdae4aaeec89180dcfd0ca74e5bba318853 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Mar 2015 12:37:54 -0300 Subject: perf tools: tool->finished_round() doesn't need perf_session It is all about flushing the ordered queue or piping it thru, no need for a perf_session pointer. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-g47fx3ys0t9271cp0dcabjc7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 2563f07..ea46df25 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -53,6 +53,13 @@ static int perf_event__repipe_synth(struct perf_tool *tool, return 0; } +static int perf_event__repipe_oe_synth(struct perf_tool *tool, + union perf_event *event, + struct ordered_events *oe __maybe_unused) +{ + return perf_event__repipe_synth(tool, event); +} + static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, struct perf_session *session @@ -406,7 +413,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .unthrottle = perf_event__repipe, .attr = perf_event__repipe_attr, .tracing_data = perf_event__repipe_op2_synth, - .finished_round = perf_event__repipe_op2_synth, + .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, }, diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 802b8f5..643722f 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -18,6 +18,7 @@ #include "util/stat.h" #include "util/top.h" #include "util/data.h" +#include "util/ordered-events.h" #include #ifdef HAVE_TIMERFD_SUPPORT @@ -783,8 +784,10 @@ static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm) /* flush queue after each round in which we processed events */ if (ntotal) { - kvm->session->ordered_events.next_flush = flush_time; - err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session); + struct ordered_events *oe = &kvm->session->ordered_events; + + oe->next_flush = flush_time; + err = ordered_events__flush(oe, OE_FLUSH__ROUND); if (err) { if (kvm->lost_events) pr_info("\nLost events: %" PRIu64 "\n\n", diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e2f318a..703a370 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -225,10 +225,17 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused, return 0; } +static int process_build_id_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) + struct ordered_events *oe __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -236,7 +243,7 @@ static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, static int process_finished_round(struct perf_tool *tool, union perf_event *event, - struct perf_session *session); + struct ordered_events *oe); static int process_id_index_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, @@ -274,7 +281,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->tracing_data == NULL) tool->tracing_data = process_event_synth_tracing_data_stub; if (tool->build_id == NULL) - tool->build_id = process_finished_round_stub; + tool->build_id = process_build_id_stub; if (tool->finished_round == NULL) { if (tool->ordered_events) tool->finished_round = process_finished_round; @@ -526,10 +533,8 @@ static perf_event__swap_op perf_event__swap_ops[] = { */ static int process_finished_round(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct perf_session *session) + struct ordered_events *oe) { - struct ordered_events *oe = &session->ordered_events; - return ordered_events__flush(oe, OE_FLUSH__ROUND); } @@ -961,7 +966,8 @@ static s64 perf_session__process_user_event(struct perf_session *session, union perf_event *event, u64 file_offset) { - struct perf_tool *tool = session->ordered_events.tool; + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = oe->tool; int fd = perf_data_file__fd(session->file); int err; @@ -989,7 +995,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_HEADER_BUILD_ID: return tool->build_id(tool, event, session); case PERF_RECORD_FINISHED_ROUND: - return tool->finished_round(tool, event, session); + return tool->finished_round(tool, event, oe); case PERF_RECORD_ID_INDEX: return tool->id_index(tool, event, session); default: diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index bb2708b..51d9e56 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -10,6 +10,7 @@ struct perf_evsel; struct perf_sample; struct perf_tool; struct machine; +struct ordered_events; typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -25,6 +26,9 @@ typedef int (*event_attr_op)(struct perf_tool *tool, typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, struct perf_session *session); +typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, + struct ordered_events *oe); + struct perf_tool { event_sample sample, read; @@ -38,8 +42,8 @@ struct perf_tool { unthrottle; event_attr_op attr; event_op2 tracing_data; - event_op2 finished_round, - build_id, + event_oe finished_round; + event_op2 build_id, id_index; bool ordered_events; bool ordering_requires_timestamps; -- cgit v0.10.2 From 01fbc1fee926888f7c256ada95fa5fa3b06cba94 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 Mar 2015 16:29:28 +0100 Subject: perf tools: Remove superfluous thread->comm_set setting It is set by calling thread__set_comm right before the removed line. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1425396581-17716-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index a5dbba9..1c8fbc9 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -206,7 +206,6 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) err = thread__set_comm(thread, comm, timestamp); if (err) return err; - thread->comm_set = true; } thread->ppid = parent->tid; -- cgit v0.10.2 From 4a6b362f36e68618ee4d3cdb361d05a5e80af023 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Mar 2015 13:02:24 -0300 Subject: perf ordered_events: Adopt queue() method From perf_session, will be used in 'trace'. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-mfihndzaumx44h6y37ng2irb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 0d8cea9..6002fa3 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -131,8 +131,8 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, return new; } -struct ordered_event * -ordered_events__new(struct ordered_events *oe, u64 timestamp, +static struct ordered_event * +ordered_events__new_event(struct ordered_events *oe, u64 timestamp, union perf_event *event) { struct ordered_event *new; @@ -153,6 +153,36 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve free_dup_event(oe, event->event); } +int ordered_events__queue(struct ordered_events *oe, union perf_event *event, + struct perf_sample *sample, u64 file_offset) +{ + u64 timestamp = sample->time; + struct ordered_event *oevent; + + if (!timestamp || timestamp == ~0ULL) + return -ETIME; + + if (timestamp < oe->last_flush) { + pr_oe_time(timestamp, "out of order event\n"); + pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", + oe->last_flush_type); + + oe->evlist->stats.nr_unordered_events++; + } + + oevent = ordered_events__new_event(oe, timestamp, event); + if (!oevent) { + ordered_events__flush(oe, OE_FLUSH__HALF); + oevent = ordered_events__new_event(oe, timestamp, event); + } + + if (!oevent) + return -ENOMEM; + + oevent->file_offset = file_offset; + return 0; +} + static int __ordered_events__flush(struct ordered_events *oe) { struct list_head *head = &oe->events; diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index c6cf0ba..173e13f 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -49,8 +49,8 @@ struct ordered_events { bool copy_on_queue; }; -struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp, - union perf_event *event); +int ordered_events__queue(struct ordered_events *oe, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, struct machines *machines, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 703a370..adf0740 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -541,33 +541,7 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, int perf_session__queue_event(struct perf_session *s, union perf_event *event, struct perf_sample *sample, u64 file_offset) { - struct ordered_events *oe = &s->ordered_events; - - u64 timestamp = sample->time; - struct ordered_event *new; - - if (!timestamp || timestamp == ~0ULL) - return -ETIME; - - if (timestamp < oe->last_flush) { - pr_oe_time(timestamp, "out of order event\n"); - pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", - oe->last_flush_type); - - s->evlist->stats.nr_unordered_events++; - } - - new = ordered_events__new(oe, timestamp, event); - if (!new) { - ordered_events__flush(oe, OE_FLUSH__HALF); - new = ordered_events__new(oe, timestamp, event); - } - - if (!new) - return -ENOMEM; - - new->file_offset = file_offset; - return 0; + return ordered_events__queue(&s->ordered_events, event, sample, file_offset); } static void callchain__lbr_callstack_printf(struct perf_sample *sample) -- cgit v0.10.2 From 9b118acae310f57baee770b5db402500d8695e50 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 6 Mar 2015 16:31:20 +0900 Subject: perf probe: Fix to handle aliased symbols in glibc Fix perf probe to handle aliased symbols correctly in glibc. In the glibc, several symbols are defined as an alias of __libc_XXX, e.g. malloc is an alias of __libc_malloc. In such cases, dwarf has no subroutine instances of the alias functions (e.g. no "malloc" instance), but the map has that symbol and its address. Thus, if we search the alieased symbol in debuginfo, we always fail to find it, but it is in the map. To solve this problem, this fails back to address-based alternative search, which searches the symbol in the map, translates its address to alternative (correct) function name by using debuginfo, and retry to find the alternative function point from debuginfo. This adds fail-back process to --vars, --lines and --add options. So, now you can use those on malloc@libc :) Without this patch; ----- # ./perf probe -x /usr/lib64/libc-2.17.so -V malloc Failed to find the address of malloc Error: Failed to show vars. # ./perf probe -x /usr/lib64/libc-2.17.so -a "malloc bytes" Probe point 'malloc' not found in debuginfo. Error: Failed to add events. ----- With this patch; ----- # ./perf probe -x /usr/lib64/libc-2.17.so -V malloc Available variables at malloc @<__libc_malloc+0> size_t bytes # ./perf probe -x /usr/lib64/libc-2.17.so -a "malloc bytes" Added new event: probe_libc:malloc (on malloc in /usr/lib64/libc-2.17.so with bytes) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 ----- Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Naohiro Aota Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150306073120.6904.13779.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 1c570c2..b8f4578 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -178,6 +178,25 @@ static struct map *kernel_get_module_map(const char *module) return NULL; } +static struct map *get_target_map(const char *target, bool user) +{ + /* Init maps of given executable or kernel */ + if (user) + return dso__new_map(target); + else + return kernel_get_module_map(target); +} + +static void put_target_map(struct map *map, bool user) +{ + if (map && user) { + /* Only the user map needs to be released */ + dso__delete(map->dso); + map__delete(map); + } +} + + static struct dso *kernel_get_module_dso(const char *module) { struct dso *dso; @@ -249,6 +268,13 @@ out: return ret; } +static void clear_perf_probe_point(struct perf_probe_point *pp) +{ + free(pp->file); + free(pp->function); + free(pp->lazy_line); +} + static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) { int i; @@ -258,6 +284,74 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) } #ifdef HAVE_DWARF_SUPPORT +/* + * Some binaries like glibc have special symbols which are on the symbol + * table, but not in the debuginfo. If we can find the address of the + * symbol from map, we can translate the address back to the probe point. + */ +static int find_alternative_probe_point(struct debuginfo *dinfo, + struct perf_probe_point *pp, + struct perf_probe_point *result, + const char *target, bool uprobes) +{ + struct map *map = NULL; + struct symbol *sym; + u64 address = 0; + int ret = -ENOENT; + + /* This can work only for function-name based one */ + if (!pp->function || pp->file) + return -ENOTSUP; + + map = get_target_map(target, uprobes); + if (!map) + return -EINVAL; + + /* Find the address of given function */ + map__for_each_symbol_by_name(map, pp->function, sym) { + if (sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) { + address = sym->start; + break; + } + } + if (!address) { + ret = -ENOENT; + goto out; + } + pr_debug("Symbol %s address found : %lx\n", pp->function, address); + + ret = debuginfo__find_probe_point(dinfo, (unsigned long)address, + result); + if (ret <= 0) + ret = (!ret) ? -ENOENT : ret; + else { + result->offset += pp->offset; + result->line += pp->line; + ret = 0; + } + +out: + put_target_map(map, uprobes); + return ret; + +} + +static int get_alternative_probe_event(struct debuginfo *dinfo, + struct perf_probe_event *pev, + struct perf_probe_point *tmp, + const char *target) +{ + int ret; + + memcpy(tmp, &pev->point, sizeof(*tmp)); + memset(&pev->point, 0, sizeof(pev->point)); + ret = find_alternative_probe_point(dinfo, tmp, &pev->point, + target, pev->uprobes); + if (ret < 0) + memcpy(&pev->point, tmp, sizeof(*tmp)); + + return ret; +} /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module, bool silent) @@ -466,6 +560,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, int max_tevs, const char *target) { bool need_dwarf = perf_probe_event_need_dwarf(pev); + struct perf_probe_point tmp; struct debuginfo *dinfo; int ntevs, ret = 0; @@ -482,6 +577,20 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, /* Searching trace events corresponding to a probe event */ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs); + if (ntevs == 0) { /* Not found, retry with an alternative */ + ret = get_alternative_probe_event(dinfo, pev, &tmp, target); + if (!ret) { + ntevs = debuginfo__find_trace_events(dinfo, pev, + tevs, max_tevs); + /* + * Write back to the original probe_event for + * setting appropriate (user given) event name + */ + clear_perf_probe_point(&pev->point); + memcpy(&pev->point, &tmp, sizeof(tmp)); + } + } + debuginfo__delete(dinfo); if (ntevs > 0) { /* Succeeded to find trace events */ @@ -719,12 +828,13 @@ int show_line_range(struct line_range *lr, const char *module, bool user) static int show_available_vars_at(struct debuginfo *dinfo, struct perf_probe_event *pev, int max_vls, struct strfilter *_filter, - bool externs) + bool externs, const char *target) { char *buf; int ret, i, nvars; struct str_node *node; struct variable_list *vls = NULL, *vl; + struct perf_probe_point tmp; const char *var; buf = synthesize_perf_probe_point(&pev->point); @@ -734,6 +844,15 @@ static int show_available_vars_at(struct debuginfo *dinfo, ret = debuginfo__find_available_vars_at(dinfo, pev, &vls, max_vls, externs); + if (!ret) { /* Not found, retry with an alternative */ + ret = get_alternative_probe_event(dinfo, pev, &tmp, target); + if (!ret) { + ret = debuginfo__find_available_vars_at(dinfo, pev, + &vls, max_vls, externs); + /* Release the old probe_point */ + clear_perf_probe_point(&tmp); + } + } if (ret <= 0) { if (ret == 0 || ret == -ENOENT) { pr_err("Failed to find the address of %s\n", buf); @@ -796,7 +915,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, for (i = 0; i < npevs && ret >= 0; i++) ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter, - externs); + externs, module); debuginfo__delete(dinfo); out: @@ -1742,15 +1861,12 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, void clear_perf_probe_event(struct perf_probe_event *pev) { - struct perf_probe_point *pp = &pev->point; struct perf_probe_arg_field *field, *next; int i; free(pev->event); free(pev->group); - free(pp->file); - free(pp->function); - free(pp->lazy_line); + clear_perf_probe_point(&pev->point); for (i = 0; i < pev->nargs; i++) { free(pev->args[i].name); @@ -2367,11 +2483,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, int num_matched_functions; int ret, i; - /* Init maps of given executable or kernel */ - if (pev->uprobes) - map = dso__new_map(target); - else - map = kernel_get_module_map(target); + map = get_target_map(target, pev->uprobes); if (!map) { ret = -EINVAL; goto out; @@ -2464,11 +2576,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } out: - if (map && pev->uprobes) { - /* Only when using uprobe(exec) map needs to be released */ - dso__delete(map->dso); - map__delete(map); - } + put_target_map(map, pev->uprobes); return ret; nomem_out: -- cgit v0.10.2 From 811dd2ae7cd670fefbb3b220b529bb9876edde70 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 6 Mar 2015 16:31:22 +0900 Subject: perf probe: Fix --line to handle aliased symbols in glibc Fix perf probe --line to handle aliased symbols correctly in glibc. This makes line_range search failing back to address-based alternative search as same as --add and --vars. Without this patch; ----- # ./perf probe -x /usr/lib64/libc-2.17.so -L malloc Specified source line is not found. Error: Failed to show lines. ----- With this patch; ----- # ./perf probe -x /usr/lib64/libc-2.17.so -L malloc <__libc_malloc@/usr/src/debug/glibc-2.17-c758a686/malloc/malloc.c:0> 0 __libc_malloc(size_t bytes) 1 { mstate ar_ptr; void *victim; __malloc_ptr_t (*hook) (size_t, const __malloc_ptr_t) 6 = force_reg (__malloc_hook); 7 if (__builtin_expect (hook != NULL, 0)) 8 return (*hook)(bytes, RETURN_ADDRESS (0)); 10 arena_lookup(ar_ptr); 12 arena_lock(ar_ptr, bytes); ----- Note that this actually shows __libc_malloc, since it is the real instance of malloc. User can use both __libc_malloc and malloc for --line. Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Naohiro Aota Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150306073122.6904.18540.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b8f4578..4cfd121 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -353,6 +353,31 @@ static int get_alternative_probe_event(struct debuginfo *dinfo, return ret; } +static int get_alternative_line_range(struct debuginfo *dinfo, + struct line_range *lr, + const char *target, bool user) +{ + struct perf_probe_point pp = { 0 }, result = { 0 }; + int ret, len = 0; + + pp.function = lr->function; + pp.file = lr->file; + pp.line = lr->start; + if (lr->end != INT_MAX) + len = lr->end - lr->start; + ret = find_alternative_probe_point(dinfo, &pp, &result, + target, user); + if (!ret) { + lr->function = result.function; + lr->file = result.file; + lr->start = result.line; + if (lr->end != INT_MAX) + lr->end = lr->start + len; + clear_perf_probe_point(&pp); + } + return ret; +} + /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module, bool silent) { @@ -734,7 +759,8 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) * Show line-range always requires debuginfo to find source file and * line number. */ -static int __show_line_range(struct line_range *lr, const char *module) +static int __show_line_range(struct line_range *lr, const char *module, + bool user) { int l = 1; struct int_node *ln; @@ -750,6 +776,11 @@ static int __show_line_range(struct line_range *lr, const char *module) return -ENOENT; ret = debuginfo__find_line_range(dinfo, lr); + if (!ret) { /* Not found, retry with an alternative */ + ret = get_alternative_line_range(dinfo, lr, module, user); + if (!ret) + ret = debuginfo__find_line_range(dinfo, lr); + } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); @@ -819,7 +850,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user) ret = init_symbol_maps(user); if (ret < 0) return ret; - ret = __show_line_range(lr, module); + ret = __show_line_range(lr, module, user); exit_symbol_maps(); return ret; -- cgit v0.10.2 From 0687eba7872d7dbe01b074c54359315e97502360 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 6 Mar 2015 16:31:25 +0900 Subject: Revert "perf probe: Fix to fall back to find probe point in symbols" This reverts commit 906451b98b67 ("perf probe: Fix to fall back to find probe point in symbols"). Since 'perf probe' now retries with the address of given symbol searched from map before this path, this fall back routine isn't needed anymore. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Naohiro Aota Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150306073124.6904.1751.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 4cfd121..c379ea0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -630,11 +630,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, } if (ntevs == 0) { /* No error but failed to find probe point. */ - pr_warning("Probe point '%s' not found in debuginfo.\n", + pr_warning("Probe point '%s' not found.\n", synthesize_perf_probe_point(&pev->point)); - if (need_dwarf) - return -ENOENT; - return 0; + return -ENOENT; } /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); -- cgit v0.10.2 From 680d926a8cb08dd9cf173e2bb93d4a4477771949 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 6 Mar 2015 16:31:27 +0900 Subject: perf symbols: Allow symbol alias when loading map for symbol name When perf probe tries to add a probe in a binary using symbol name, it sometimes failed since some symbols were discard during loading dso. When it resolves an address to symbol, it'd be better to have just one symbol at given address. But for finding address from symbol, it'd be better to keep all names (including aliases). So allow tools to state that they want to allow aliases via symbol_conf.allow_aliases. Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Naohiro Aota Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150306073127.6904.3232.stgit@localhost.localdomain [ Original patch passwd allow_alias to many functions, use symbol_conf.allow_aliases instead ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c379ea0..9feba0e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -80,6 +80,7 @@ static int init_symbol_maps(bool user_only) int ret; symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; ret = symbol__init(NULL); if (ret < 0) { pr_debug("Failed to init symbol map.\n"); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index ada1676..62742e4 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1048,7 +1048,8 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_duplicate(&dso->symbols[map->type]); + if (!symbol_conf.allow_aliases) + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); if (kmap) { /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1650dcb..efdaaa5 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -87,6 +87,7 @@ struct symbol_conf { ignore_vmlinux_buildid, show_kernel_path, use_modules, + allow_aliases, sort_by_name, show_nr_samples, show_total_period, -- cgit v0.10.2 From e578da3b2009da2a9ae2d25fd0f78c7b76ca5e56 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 6 Mar 2015 16:31:29 +0900 Subject: perf probe: Allow weak symbols to be probed It currently prevents adding probes in weak symbols. But there're cases that given name is an only weak symbol so that we cannot add probe. $ perf probe -x /usr/lib/libc.so.6 -a calloc Failed to find symbol calloc in /usr/lib/libc-2.21.so Error: Failed to add events. $ nm /usr/lib/libc.so.6 | grep calloc 000000000007b1f0 t __calloc 000000000007b1f0 T __libc_calloc 000000000007b1f0 W calloc This change will result in duplicate probes when strong and weak symbols co-exist in a binary. But I think it's not a big problem since probes at the weak symbol will never be hit anyway. Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Naohiro Aota Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150306073129.6904.41078.stgit@localhost.localdomain Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 9feba0e..8af8e7f 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -310,10 +310,8 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, /* Find the address of given function */ map__for_each_symbol_by_name(map, pp->function, sym) { - if (sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) { - address = sym->start; - break; - } + address = sym->start; + break; } if (!address) { ret = -ENOENT; @@ -2485,8 +2483,7 @@ static int find_probe_functions(struct map *map, char *name) struct symbol *sym; map__for_each_symbol_by_name(map, name, sym) { - if (sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) - found++; + found++; } return found; @@ -2846,8 +2843,7 @@ static struct strfilter *available_func_filter; static int filter_available_functions(struct map *map __maybe_unused, struct symbol *sym) { - if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) && - strfilter__compare(available_func_filter, sym->name)) + if (strfilter__compare(available_func_filter, sym->name)) return 0; return 1; } -- cgit v0.10.2 From 19a9df35fe9e8ffd60ce4b6f888b72e7c8422d31 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 10 Mar 2015 13:00:35 +0100 Subject: perf build: Fix libbabeltrace detection Following patch added -Werror for feature builds: b49f1a4be701 perf tools: Improve feature test debuggability and exposed a problem in the libbabeltrace feature build, because it was including wrong header and gcc couldn't find the used symbol definition. Adding proper header and keeping the old one as it is needed also (libbabeltrace quirk). Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jeremie Galarneau Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Tom Zanussi Cc: Wang Nan Link: http://lkml.kernel.org/r/20150310120035.GA4333@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/test-libbabeltrace.c b/tools/perf/config/feature-checks/test-libbabeltrace.c index 3b7dd68..9cf802a 100644 --- a/tools/perf/config/feature-checks/test-libbabeltrace.c +++ b/tools/perf/config/feature-checks/test-libbabeltrace.c @@ -1,5 +1,6 @@ #include +#include int main(void) { -- cgit v0.10.2 From 443a70541c56d4a7dff0ce693870935e138201b2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Mar 2015 19:04:31 -0300 Subject: perf tools: Output feature detection's gcc output to a file So that we can debug feature detection problems. It will appear on $(OUTPUT)feature-checks/.make-libbabeltrace.output, using the libbabeltrace feature test. Whole process: [acme@ssdandy linux]$ make -C tools/perf install-bin make: Entering directory `/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j8' parallel build config/Makefile:425: No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR config/Makefile:709: No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev Auto-detecting system features: ... dwarf: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libunwind: [ OFF ] ... libdw-dwarf-unwind: [ on ] ... libbabeltrace: [ OFF ] [acme@ssdandy linux]$ find tools/perf -name ".make-*.output" | grep lib | tail -5 tools/perf/config/feature-checks/.make-libdw-dwarf-unwind.output tools/perf/config/feature-checks/.make-libbabeltrace.output tools/perf/config/feature-checks/.make-zlib.output tools/perf/config/feature-checks/.make-liberty.output tools/perf/config/feature-checks/.make-liberty-z.output [acme@ssdandy linux]$ [acme@ssdandy linux]$ cat tools/perf/config/feature-checks/.make-libbabeltrace.output make[1]: Entering directory `/home/acme/git/linux/tools/perf/config/feature-checks' gcc -MD -Wall -Werror -o test-libbabeltrace.bin test-libbabeltrace.c -Wl,-z,noexecstack -lbabeltrace-ctf # -lbabeltrace provided by test-libbabeltrace.c:2:42: fatal error: babeltrace/ctf-writer/writer.h: No such file or directory #include ^ compilation terminated. make[1]: *** [test-libbabeltrace.bin] Error 1 make[1]: Leaving directory `/home/acme/git/linux/tools/perf/config/feature-checks' [acme@ssdandy linux]$ So the libbabeltrace feature will not be builtin, but if we do what is required for it to be built, namely point where we have it installed: [acme@ssdandy linux]$ time make -C tools/perf LIBBABELTRACE_DIR=/opt/libbabeltrace install-bin make: Entering directory `/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j8' parallel build config/Makefile:425: No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR Auto-detecting system features: ... dwarf: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libunwind: [ OFF ] ... libdw-dwarf-unwind: [ on ] ... libbabeltrace: [ on ] ... zlib: [ on ] ... DWARF post unwind library: libdw [acme@ssdandy linux]$ find tools/perf -name ".make-libbabel*.output" | grep lib | tail -5 tools/perf/config/feature-checks/.make-libbabeltrace.output [acme@ssdandy linux]$ cat tools/perf/config/feature-checks/.make-libbabeltrace.output make[1]: Entering directory `/home/acme/git/linux/tools/perf/config/feature-checks' gcc -MD -I/opt/libbabeltrace/include -Wall -Werror -o test-libbabeltrace.bin test-libbabeltrace.c -Wl,-z,noexecstack -L/opt/libbabeltrace/lib -lbabeltrace-ctf # -lbabeltrace provided by make[1]: Leaving directory `/home/acme/git/linux/tools/perf/config/feature-checks' [acme@ssdandy linux]$ Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jeremie Galarneau Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Tom Zanussi Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-h53xwueqwdeeiqcv9f50nqqb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index ec4c063..933d703 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -529,6 +529,7 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(Q)$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, feature-detect) $(RM) $(OUTPUT)config/feature-checks/.make-*.output $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index d44c64d..bd09718 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -184,7 +184,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1.bin > $(OUTPUT)config/feature-checks/.make-$(1).output 2>&1 && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) -- cgit v0.10.2 From a78604defffbc1da1497a8c8b48275b723eb5946 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Wed, 4 Mar 2015 18:01:42 +0800 Subject: perf probe: Fix possible double free on error A double free occurred when get source file path failed. If lr->path failed to assign a new value, it will be freed as the old path and then be freed again during line_range__clear(), and causes this: $ perf probe -L do_execve -k vmlinux *** Error in `/usr/bin/perf': double free or corruption (fasttop): 0x0000000000a9ac50 *** ======= Backtrace: ========= ../lib64/libc.so.6(+0x6eeef)[0x7ffff5e44eef] ../lib64/libc.so.6(+0x78cae)[0x7ffff5e4ecae] ../lib64/libc.so.6(+0x79987)[0x7ffff5e4f987] ../bin/perf[0x4ab41f] ... This patch fix this problem. Signed-off-by: He Kuang Acked-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1425463302-1687-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8af8e7f..e2bf620 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -790,7 +790,11 @@ static int __show_line_range(struct line_range *lr, const char *module, /* Convert source file path */ tmp = lr->path; ret = get_real_path(tmp, lr->comp_dir, &lr->path); - free(tmp); /* Free old path */ + + /* Free old path when new path is assigned */ + if (tmp != lr->path) + free(tmp); + if (ret < 0) { pr_warning("Failed to find source file path.\n"); return ret; -- cgit v0.10.2 From a8cd1f4393032cd87e98803346865cdbceb15ad3 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Wed, 11 Mar 2015 20:36:03 +0800 Subject: perf hists browser: Fix UI bug after zoom into thread/dso/symbol When zoom into thread/dso/symbol, the fold/unfold stat is cleared in hists__filter_by_thread/dso/symbol(), but h->nr_rows is not cleared. So if we toggle fold stat on the unfold entires, nr_entries got a wrong value. This bug can be reproduced as follows: $ perf record -g -e syscalls:sys_enter_open ls $ perf report Children Self Command Shared Object Symbol ================================================================ + 50.00% 0.00% ls ld64.so [.] _dl_get_ready_to_run - 50.00% 0.00% ls ld64.so [.] _dl_load_shared_library _dl_load_shared_library <= [Zoom into thread/dso] _dl_get_ready_to_run _start ... In the new thread hists, all entries reset to fold, if we unfold the same entry as we previously unfolded, nr_entries got wrong value, and we can't move down cursor to bottom row. Thread: ls Children Self Command Shared Object Symbol ================================================================ + 50.00% 0.00% ls ld64.so [.] _dl_get_ready_to_run - 50.00% 0.00% ls ld64.so [.] _dl_load_shared_library _dl_load_shared_library _dl_get_ready_to_run <= [cursor may stop here, can't move down] _start ... This patch clear h->nr_rows to fix this bug. Signed-off-by: He Kuang Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426077363-855-2-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 95f5ab7..d9a6d35 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1171,6 +1171,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h /* force fold unfiltered entry for simplicity */ h->ms.unfolded = false; h->row_offset = 0; + h->nr_rows = 0; hists->stats.nr_non_filtered_samples += h->stat.nr_events; -- cgit v0.10.2 From 6d4a48968bfb5c67002f253fbaeb5acd41d7897a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 11 Mar 2015 10:36:20 -0400 Subject: perf probe: Fix compiles due to declarations using perf_probe_point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf fails to build with gcc "(GCC) 4.4.7 20120313 (Red Hat 4.4.7-4.0.9)" (a.k.a., RHEL6 / CentOS 6 / OL 6): cc1: warnings being treated as errors util/probe-event.c: In function ‘get_alternative_line_range’: util/probe-event.c:359: error: missing initializer util/probe-event.c:359: error: (near initialization for ‘pp.file’) util/probe-event.c:359: error: missing initializer util/probe-event.c:359: error: (near initialization for ‘result.function’) Fix by bringing in initializers to declaration. Signed-off-by: David Ahern Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/1426084580-60780-1-git-send-email-david.ahern@oracle.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e2bf620..f272a71 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -356,12 +356,14 @@ static int get_alternative_line_range(struct debuginfo *dinfo, struct line_range *lr, const char *target, bool user) { - struct perf_probe_point pp = { 0 }, result = { 0 }; + struct perf_probe_point pp = { .function = lr->function, + .file = lr->file, + .line = lr->start }; + struct perf_probe_point result; int ret, len = 0; - pp.function = lr->function; - pp.file = lr->file; - pp.line = lr->start; + memset(&result, 0, sizeof(result)); + if (lr->end != INT_MAX) len = lr->end - lr->start; ret = find_alternative_probe_point(dinfo, &pp, &result, -- cgit v0.10.2 From 4fabf3d19cec11d9faa567f8cf0290298c5a93ea Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 12 Mar 2015 15:21:49 +0800 Subject: perf hists browser: Fix UI bug after fold/unfold In perf hists browser, the fold/unfold stat of each hist entry is recorded but hb->nr_callchain_rows loses its value after zoom out and zoom in back. This causes a wrong row cursor range that restrict user to move down anymore. This bug can be reproduced as follows: $ perf record -g -e syscalls:* ls $ perf report Available samples ================================================================ 2 syscalls:sys_enter_mprotect <= [enter one of the entries] 2 syscalls:sys_exit_mprotect 13 syscalls:sys_enter_brk ... In the hists brower, unfold some of the items, now the cursor can reach to any rows: Children Self Command Shared Object Symbol ================================================================ - 100.00% 100.00% ls libuClibc-0.9.33.2.so [.] lstat64 - lstat64 16.67% 0x6469702e64 8.33% 0x646970 8.33% 0x617461 8.33% 0x65 - 16.67% 0.00% ls [unknown] [.]0x6469702e64 0x6469702e64 <= [cursor can reach to bottom line, everything is ok] Now, zoom back to "Available samples" and enter again: Children Self Command Shared Object Symbol ================================================================ - 100.00% 100.00% ls libuClibc-0.9.33.2.so [.] lstat64 - lstat64 16.67% 0x6469702e64 8.33% 0x646970 8.33% 0x617461 <= [cursor may stop here, can't move down anymore] 8.33% 0x65 - 16.67% 0.00% ls [unknown] [.]0x6469702e64 0x6469702e64 This patch recalculates hb->nr_callchain_rows to fix the bug. Signed-off-by: He Kuang Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426144909-18951-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index ad312d9..49eddeb 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -48,6 +48,24 @@ static bool hist_browser__has_filter(struct hist_browser *hb) return hists__has_filter(hb->hists) || hb->min_pcnt; } +static int hist_browser__get_folding(struct hist_browser *browser) +{ + struct rb_node *nd; + struct hists *hists = browser->hists; + int unfolded_rows = 0; + + for (nd = rb_first(&hists->entries); + (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; + nd = rb_next(nd)) { + struct hist_entry *he = + rb_entry(nd, struct hist_entry, rb_node); + + if (he->ms.unfolded) + unfolded_rows += he->nr_rows; + } + return unfolded_rows; +} + static u32 hist_browser__nr_entries(struct hist_browser *hb) { u32 nr_entries; @@ -57,6 +75,7 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb) else nr_entries = hb->hists->nr_entries; + hb->nr_callchain_rows = hist_browser__get_folding(hb); return nr_entries + hb->nr_callchain_rows; } -- cgit v0.10.2 From d73515c03c6a2706e088094ff6095a3abefd398b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 Mar 2015 07:16:27 -0700 Subject: perf stat: Output running time and run/enabled ratio in CSV mode The information how much a counter ran in 'perf stat' can be quite interesting for other tools to judge how trustworthy a measurement is. Currently it is only output in non CSV mode. This patches make perf stat always output the running time and the enabled/running ratio in CSV mode. This adds two new fields at the end for each line. I assume that existing tools ignore new fields at the end, so it's on by default. Only CSV mode is affected, no difference otherwise. v2: Add extra print_running function v3: Avoid printing nan v4: Remove some elses and add brackets. v5: Move non CSV case into print_running Signed-off-by: Andi Kleen Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1426083387-17006-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d28949d..d58e50c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -769,6 +769,19 @@ static int run_perf_stat(int argc, const char **argv) return ret; } +static void print_running(u64 run, u64 ena) +{ + if (csv_output) { + fprintf(output, "%s%" PRIu64 "%s%.2f", + csv_sep, + run, + csv_sep, + ena ? 100.0 * run / ena : 100.0); + } else if (run != ena) { + fprintf(output, " (%.2f%%)", 100.0 * run / ena); + } +} + static void print_noise_pct(double total, double avg) { double pct = rel_stddev_stats(total, avg); @@ -1252,6 +1265,7 @@ static void print_aggr(char *prefix) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(run, ena); fputc('\n', output); continue; } @@ -1262,13 +1276,10 @@ static void print_aggr(char *prefix) else abs_printout(id, nr, counter, uval); - if (!csv_output) { + if (!csv_output) print_noise(counter, 1.0); - if (run != ena) - fprintf(output, " (%.2f%%)", - 100.0 * run / ena); - } + print_running(run, ena); fputc('\n', output); } } @@ -1284,6 +1295,10 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; double uval; + double avg_enabled, avg_running; + + avg_enabled = avg_stats(&ps->res_stats[1]); + avg_running = avg_stats(&ps->res_stats[2]); if (prefix) fprintf(output, "%s", prefix); @@ -1303,6 +1318,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) if (counter->cgrp) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(avg_running, avg_enabled); fputc('\n', output); return; } @@ -1316,19 +1332,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) print_noise(counter, avg); - if (csv_output) { - fputc('\n', output); - return; - } - - if (scaled) { - double avg_enabled, avg_running; - - avg_enabled = avg_stats(&ps->res_stats[1]); - avg_running = avg_stats(&ps->res_stats[2]); - - fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); - } + print_running(avg_running, avg_enabled); fprintf(output, "\n"); } @@ -1370,6 +1374,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(run, ena); fputc('\n', output); continue; } @@ -1381,13 +1386,10 @@ static void print_counter(struct perf_evsel *counter, char *prefix) else abs_printout(cpu, 0, counter, uval); - if (!csv_output) { + if (!csv_output) print_noise(counter, 1.0); + print_running(run, ena); - if (run != ena) - fprintf(output, " (%.2f%%)", - 100.0 * run / ena); - } fputc('\n', output); } } -- cgit v0.10.2 From 56f0fd45d8df51542930b9b2e1acee5034b53479 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 Mar 2015 08:28:01 -0700 Subject: perf stat: Fix IPC and other formulas with -A perf stat didn't compute the IPC and other formulas for individual CPUs with -A. Fix this for the easy -A case. As before, --per-core and --per-socket do not handle it, they simply print nothing. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1426087682-22765-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d58e50c..c95dbda 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -353,39 +353,40 @@ static struct perf_evsel *nth_evsel(int n) * more semantic information such as miss/hit ratios, * instruction rates, etc: */ -static void update_shadow_stats(struct perf_evsel *counter, u64 *count) +static void update_shadow_stats(struct perf_evsel *counter, u64 *count, + int cpu) { if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) - update_stats(&runtime_nsecs_stats[0], count[0]); + update_stats(&runtime_nsecs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[0], count[0]); + update_stats(&runtime_cycles_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) - update_stats(&runtime_cycles_in_tx_stats[0], count[0]); + update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) - update_stats(&runtime_transaction_stats[0], count[0]); + update_stats(&runtime_transaction_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) - update_stats(&runtime_elision_stats[0], count[0]); + update_stats(&runtime_elision_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); + update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); + update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[0], count[0]); + update_stats(&runtime_branches_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[0], count[0]); + update_stats(&runtime_cacherefs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[0], count[0]); + update_stats(&runtime_l1_dcache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_l1_icache_stats[0], count[0]); + update_stats(&runtime_l1_icache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[0], count[0]); + update_stats(&runtime_ll_cache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[0], count[0]); + update_stats(&runtime_dtlb_cache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[0], count[0]); + update_stats(&runtime_itlb_cache_stats[cpu], count[0]); } static void zero_per_pkg(struct perf_evsel *counter) @@ -447,7 +448,8 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, perf_evsel__compute_deltas(evsel, cpu, count); perf_counts_values__scale(count, scale, NULL); evsel->counts->cpu[cpu] = *count; - update_shadow_stats(evsel, count->values); + if (aggr_mode == AGGR_NONE) + update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -495,7 +497,7 @@ static int read_counter_aggr(struct perf_evsel *counter) /* * Save the full runtime - to allow normalization during printout: */ - update_shadow_stats(counter, count); + update_shadow_stats(counter, count, 0); return 0; } -- cgit v0.10.2 From 7910352852f377f6d12286f922299d7ad1cfb2e3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 Mar 2015 08:28:02 -0700 Subject: perf stat: Always correctly indent ratio column When cycles or instructions do not print anything, as in being, --per-socket or --per-core modi, the ratio column was not correctly indented for them. This lead to some ratios not lining up with the others. Always indent correctly when nothing is printed. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1426087682-22765-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c95dbda..d4d1b77 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1094,6 +1094,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (total) { ratio = avg / total; fprintf(output, " # %5.2f insns per cycle ", ratio); + } else { + fprintf(output, " "); } total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); @@ -1163,6 +1165,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (total) { ratio = avg / total; fprintf(output, " # %8.3f GHz ", ratio); + } else { + fprintf(output, " "); } } else if (transaction_run && perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { -- cgit v0.10.2 From 405f87557da35a03ba4663eca971ffac58b0a818 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 12 Mar 2015 16:32:46 +0900 Subject: perf kmem: Fix segfault when invalid sort key is given When it tries to free 'str', it was already updated by strsep() - so it needs to save the original pointer. # perf kmem stat -s xxx,hit Error: Unknown --sort key: 'xxx' *** Error in `perf': free(): invalid pointer: 0x0000000000e9e7b6 *** ======= Backtrace: ========= /usr/lib/libc.so.6(+0x7198e)[0x7fc7e6e0d98e] /usr/lib/libc.so.6(+0x76dee)[0x7fc7e6e12dee] /usr/lib/libc.so.6(+0x775cb)[0x7fc7e6e135cb] ./perf[0x44a1b5] ./perf[0x490b20] ./perf(parse_options_step+0x173)[0x491773] ./perf(parse_options_subcommand+0xa7)[0x491fb7] ./perf(cmd_kmem+0x2bc)[0x44ae4c] ./perf[0x47aa13] ./perf(main+0x60a)[0x427a9a] /usr/lib/libc.so.6(__libc_start_main+0xf0)[0x7fc7e6dbc800] ./perf(_start+0x29)[0x427bb9] Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Joonsoo Kim Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1426145571-3065-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 62f165a..1e69ea5 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -559,6 +559,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) { char *tok; char *str = strdup(arg); + char *pos = str; if (!str) { pr_err("%s: strdup failed\n", __func__); @@ -566,7 +567,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) } while (true) { - tok = strsep(&str, ","); + tok = strsep(&pos, ","); if (!tok) break; if (sort_dimension__add(tok, sort_list) < 0) { -- cgit v0.10.2 From bd72a33ebae8b4d37e3d2a3f0f3f3333ac9654dd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 12 Mar 2015 16:32:47 +0900 Subject: perf kmem: Allow -v option Current perf kmem fails when -v option is used. As it's very useful for debugging, let's allow it. Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Joonsoo Kim Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1426145571-3065-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 7c8fbbf..150253c 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -25,6 +25,10 @@ OPTIONS --input=:: Select the input file (default: perf.data unless stdin is a fifo) +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + --caller:: Show per-callsite statistics diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 1e69ea5..02b7697 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -663,6 +663,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) const char * const default_sort_order = "frag,hit,bytes"; const struct option kmem_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, "show per-callsite statistics", parse_caller_opt), OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, -- cgit v0.10.2 From 65f46e0214c64198a0266c37181a7776e16b7e53 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 12 Mar 2015 16:32:48 +0900 Subject: perf kmem: Fix alignment of slab result table Its table was a bit misaligned. Fix it. Before: # perf kmem stat --caller -l 10 ------------------------------------------------------------------------------------------------------ Callsite | Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag ------------------------------------------------------------------------------------------------------ radeon_cs_parser_init.part.1+11a | 2080/260 | 1504/188 | 8 | 0 | 27.692% radeon_cs_parser_init.part.1+e1 | 384/96 | 288/72 | 4 | 0 | 25.000% radeon_cs_parser_init.part.1+93 | 128/32 | 96/24 | 4 | 0 | 25.000% load_elf_binary+a39 | 512/512 | 392/392 | 1 | 0 | 23.438% __alloc_skb+89 | 6144/877 | 4800/685 | 7 | 6 | 21.875% radeon_fence_emit+5c | 1152/192 | 912/152 | 6 | 0 | 20.833% radeon_cs_parser_relocs+ad | 8192/2048 | 6624/1656 | 4 | 0 | 19.141% radeon_sa_bo_new+78 | 1280/64 | 1120/56 | 20 | 0 | 12.500% load_elf_binary+2c4 | 32/32 | 28/28 | 1 | 0 | 12.500% anon_vma_prepare+101 | 576/72 | 512/64 | 8 | 0 | 11.111% ... | ... | ... | ... | ... | ... ------------------------------------------------------------------------------------------------------ After: --------------------------------------------------------------------------------------------------------- Callsite | Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag --------------------------------------------------------------------------------------------------------- radeon_cs_parser_init.part.1+11a | 2080/260 | 1504/188 | 8 | 0 | 27.692% radeon_cs_parser_init.part.1+e1 | 384/96 | 288/72 | 4 | 0 | 25.000% radeon_cs_parser_init.part.1+93 | 128/32 | 96/24 | 4 | 0 | 25.000% load_elf_binary+a39 | 512/512 | 392/392 | 1 | 0 | 23.438% __alloc_skb+89 | 6144/877 | 4800/685 | 7 | 6 | 21.875% radeon_fence_emit+5c | 1152/192 | 912/152 | 6 | 0 | 20.833% radeon_cs_parser_relocs+ad | 8192/2048 | 6624/1656 | 4 | 0 | 19.141% radeon_sa_bo_new+78 | 1280/64 | 1120/56 | 20 | 0 | 12.500% load_elf_binary+2c4 | 32/32 | 28/28 | 1 | 0 | 12.500% anon_vma_prepare+101 | 576/72 | 512/64 | 8 | 0 | 11.111% ... | ... | ... | ... | ... | ... --------------------------------------------------------------------------------------------------------- Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Minchan Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1426145571-3065-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 02b7697..8c85aeb 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -275,10 +275,10 @@ static void __print_result(struct rb_root *root, struct perf_session *session, struct rb_node *next; struct machine *machine = &session->machines.host; - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); next = rb_first(root); @@ -304,7 +304,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session, snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); printf(" %-34s |", buf); - printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n", + printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n", (unsigned long long)data->bytes_alloc, (unsigned long)data->bytes_alloc / data->hit, (unsigned long long)data->bytes_req, @@ -317,9 +317,9 @@ static void __print_result(struct rb_root *root, struct perf_session *session, } if (n_lines == -1) - printf(" ... | ... | ... | ... | ... | ... \n"); + printf(" ... | ... | ... | ... | ... | ... \n"); - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); } static void print_summary(void) -- cgit v0.10.2 From 69364727be2f3dc71a046771965c3c9d5ccce699 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 20 Feb 2015 23:17:02 +0100 Subject: perf data: Add tracepoint events fields CTF conversion support Adding support to convert tracepoint event fields into CTF event fields. We parse each tracepoint event for CTF conversion and add tracepoint fields as regular CTF event fields, so they appear in babeltrace output like: $ babeltrace ./ctf-data/ ... [09:02:00.950703057] (+?.?????????) sched:sched_stat_runtime: { }, { perf_ip = ... SNIP ... common_type = 298, common_flags = 1, \ common_preempt_count = 0, common_pid = 31813, comm = "perf", pid = 31813, runtime = 458800, vruntime = 52059858071 } ... Signed-off-by: Sebastian Andrzej Siewior Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Frederic Weisbecker Cc: Jeremie Galarneau Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Tom Zanussi Cc: Wang Nan Link: http://lkml.kernel.org/r/1424470628-5969-6-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 1afd381..c6d6226 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -126,6 +126,177 @@ FUNC_VALUE_SET(s64) FUNC_VALUE_SET(u64) __FUNC_VALUE_SET(u64_hex, u64) +static struct bt_ctf_field_type* +get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field) +{ + unsigned long flags = field->flags; + + if (flags & FIELD_IS_STRING) + return cw->data.string; + + if (!(flags & FIELD_IS_SIGNED)) { + /* unsigned long are mostly pointers */ + if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER) + return cw->data.u64_hex; + } + + if (flags & FIELD_IS_SIGNED) { + if (field->size == 8) + return cw->data.s64; + else + return cw->data.s32; + } + + if (field->size == 8) + return cw->data.u64; + else + return cw->data.u32; +} + +static int add_tracepoint_field_value(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample, + struct format_field *fmtf) +{ + struct bt_ctf_field_type *type; + struct bt_ctf_field *array_field; + struct bt_ctf_field *field; + const char *name = fmtf->name; + void *data = sample->raw_data; + unsigned long long value_int; + unsigned long flags = fmtf->flags; + unsigned int n_items; + unsigned int i; + unsigned int offset; + unsigned int len; + int ret; + + offset = fmtf->offset; + len = fmtf->size; + if (flags & FIELD_IS_STRING) + flags &= ~FIELD_IS_ARRAY; + + if (flags & FIELD_IS_DYNAMIC) { + unsigned long long tmp_val; + + tmp_val = pevent_read_number(fmtf->event->pevent, + data + offset, len); + offset = tmp_val; + len = offset >> 16; + offset &= 0xffff; + } + + if (flags & FIELD_IS_ARRAY) { + + type = bt_ctf_event_class_get_field_by_name( + event_class, name); + array_field = bt_ctf_field_create(type); + bt_ctf_field_type_put(type); + if (!array_field) { + pr_err("Failed to create array type %s\n", name); + return -1; + } + + len = fmtf->size / fmtf->arraylen; + n_items = fmtf->arraylen; + } else { + n_items = 1; + array_field = NULL; + } + + type = get_tracepoint_field_type(cw, fmtf); + + for (i = 0; i < n_items; i++) { + if (!(flags & FIELD_IS_STRING)) + value_int = pevent_read_number( + fmtf->event->pevent, + data + offset + i * len, len); + + if (flags & FIELD_IS_ARRAY) + field = bt_ctf_field_array_get_field(array_field, i); + else + field = bt_ctf_field_create(type); + + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (flags & FIELD_IS_STRING) + ret = bt_ctf_field_string_set_value(field, + data + offset + i * len); + else if (!(flags & FIELD_IS_SIGNED)) + ret = bt_ctf_field_unsigned_integer_set_value( + field, value_int); + else + ret = bt_ctf_field_signed_integer_set_value( + field, value_int); + if (ret) { + pr_err("failed to set file value %s\n", name); + goto err_put_field; + } + if (!(flags & FIELD_IS_ARRAY)) { + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err_put_field; + } + } + bt_ctf_field_put(field); + } + if (flags & FIELD_IS_ARRAY) { + ret = bt_ctf_event_set_payload(event, name, array_field); + if (ret) { + pr_err("Failed add payload array %s\n", name); + return -1; + } + bt_ctf_field_put(array_field); + } + return 0; + +err_put_field: + bt_ctf_field_put(field); + return -1; +} + +static int add_tracepoint_fields_values(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct format_field *fields, + struct perf_sample *sample) +{ + struct format_field *field; + int ret; + + for (field = fields; field; field = field->next) { + ret = add_tracepoint_field_value(cw, event_class, event, sample, + field); + if (ret) + return -1; + } + return 0; +} + +static int add_tracepoint_values(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + struct format_field *common_fields = evsel->tp_format->format.common_fields; + struct format_field *fields = evsel->tp_format->format.fields; + int ret; + + ret = add_tracepoint_fields_values(cw, event_class, event, + common_fields, sample); + if (!ret) + ret = add_tracepoint_fields_values(cw, event_class, event, + fields, sample); + + return ret; +} + static int add_generic_values(struct ctf_writer *cw, struct bt_ctf_event *event, struct perf_evsel *evsel, @@ -246,11 +417,76 @@ static int process_sample_event(struct perf_tool *tool, if (ret) return -1; + if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { + ret = add_tracepoint_values(cw, event_class, event, + evsel, sample); + if (ret) + return -1; + } + bt_ctf_stream_append_event(cw->stream, event); bt_ctf_event_put(event); return 0; } +static int add_tracepoint_fields_types(struct ctf_writer *cw, + struct format_field *fields, + struct bt_ctf_event_class *event_class) +{ + struct format_field *field; + int ret; + + for (field = fields; field; field = field->next) { + struct bt_ctf_field_type *type; + unsigned long flags = field->flags; + + pr2(" field '%s'\n", field->name); + + type = get_tracepoint_field_type(cw, field); + if (!type) + return -1; + + /* + * A string is an array of chars. For this we use the string + * type and don't care that it is an array. What we don't + * support is an array of strings. + */ + if (flags & FIELD_IS_STRING) + flags &= ~FIELD_IS_ARRAY; + + if (flags & FIELD_IS_ARRAY) + type = bt_ctf_field_type_array_create(type, field->arraylen); + + ret = bt_ctf_event_class_add_field(event_class, type, + field->name); + + if (flags & FIELD_IS_ARRAY) + bt_ctf_field_type_put(type); + + if (ret) { + pr_err("Failed to add field '%s\n", field->name); + return -1; + } + } + + return 0; +} + +static int add_tracepoint_types(struct ctf_writer *cw, + struct perf_evsel *evsel, + struct bt_ctf_event_class *class) +{ + struct format_field *common_fields = evsel->tp_format->format.common_fields; + struct format_field *fields = evsel->tp_format->format.fields; + int ret; + + ret = add_tracepoint_fields_types(cw, common_fields, class); + if (!ret) + ret = add_tracepoint_fields_types(cw, fields, class); + + return ret; +} + static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, struct bt_ctf_event_class *event_class) { @@ -328,6 +564,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) if (ret) goto err; + if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { + ret = add_tracepoint_types(cw, evsel, event_class); + if (ret) + goto err; + } + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); if (ret) { pr("Failed to add event class into stream.\n"); -- cgit v0.10.2 From 0ba332f70a555548430ef3cf459b5240df0ffbd5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Mar 2015 17:41:52 -0300 Subject: perf hists browser: Simplify symbol annotation menu setup No need to repeat some tests, skip annotation instead. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6h6igrb81u4e6rwfmx7dv47n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 49eddeb..f69371b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1612,19 +1612,22 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (!sort__has_sym) goto add_exit_option; + if (browser->selection == NULL) + goto skip_annotation; + if (sort__mode == SORT_MODE__BRANCH) { bi = browser->he_selection->branch_info; - if (browser->selection != NULL && - bi && - bi->from.sym != NULL && + + if (bi == NULL) + goto skip_annotation; + + if (bi->from.sym != NULL && !bi->from.map->dso->annotate_warned && asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) annotate_f = nr_options++; - if (browser->selection != NULL && - bi && - bi->to.sym != NULL && + if (bi->to.sym != NULL && !bi->to.map->dso->annotate_warned && (bi->to.sym != bi->from.sym || bi->to.map->dso != bi->from.map->dso) && @@ -1632,8 +1635,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, bi->to.sym->name) > 0) annotate_t = nr_options++; } else { - if (browser->selection != NULL && - browser->selection->sym != NULL && + if (browser->selection->sym != NULL && !browser->selection->map->dso->annotate_warned) { struct annotation *notes; @@ -1645,7 +1647,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, annotate = nr_options++; } } - +skip_annotation: if (thread != NULL && asprintf(&options[nr_options], "Zoom %s %s(%d) thread", (browser->hists->thread_filter ? "out of" : "into"), -- cgit v0.10.2 From 446fb96c4a72ba390fbdecf27a88eaa50ea179dc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Mar 2015 17:46:57 -0300 Subject: perf hists browser: Fix up some branch alignment Those asprintf return checks should be aligned with the other conditionals, fix it. Also add {} blocks to further clarify. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian echo Link: http://lkml.kernel.org/n/tip-`ranpwd -l 24`@git.kernel.org Link: http://lkml.kernel.org/n/tip-nqgs07jfphbkw67wja870d3r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f69371b..ebd9d3a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1623,17 +1623,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (bi->from.sym != NULL && !bi->from.map->dso->annotate_warned && - asprintf(&options[nr_options], "Annotate %s", - bi->from.sym->name) > 0) + asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) { annotate_f = nr_options++; + } if (bi->to.sym != NULL && !bi->to.map->dso->annotate_warned && (bi->to.sym != bi->from.sym || bi->to.map->dso != bi->from.map->dso) && - asprintf(&options[nr_options], "Annotate %s", - bi->to.sym->name) > 0) + asprintf(&options[nr_options], "Annotate %s", bi->to.sym->name) > 0) { annotate_t = nr_options++; + } } else { if (browser->selection->sym != NULL && !browser->selection->map->dso->annotate_warned) { @@ -1643,8 +1643,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (notes->src && asprintf(&options[nr_options], "Annotate %s", - browser->selection->sym->name) > 0) + browser->selection->sym->name) > 0) { annotate = nr_options++; + } } } skip_annotation: -- cgit v0.10.2 From c80e5c0c23ce2282476fdc64c4b5e3d3a40723fd Mon Sep 17 00:00:00 2001 From: Eugene Shatokhin Date: Tue, 17 Mar 2015 19:09:18 +0900 Subject: kprobes/x86: Return correct length in __copy_instruction() On x86-64, __copy_instruction() always returns 0 (error) if the instruction uses %rip-relative addressing. This is because kernel_insn_init() is called the second time for 'insn' instance in such cases and sets all its fields to 0. Because of this, trying to place a kprobe on such instruction will fail, register_kprobe() will return -EINVAL. This patch fixes the problem. Signed-off-by: Eugene Shatokhin Signed-off-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/20150317100918.28349.94654.stgit@localhost.localdomain Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4e3d5a96..03189d8 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -354,6 +354,7 @@ int __copy_instruction(u8 *dest, u8 *src) { struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; + int length; unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); @@ -361,16 +362,18 @@ int __copy_instruction(u8 *dest, u8 *src) return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); + length = insn.length; + /* Another subsystem puts a breakpoint, failed to recover */ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; - memcpy(dest, insn.kaddr, insn.length); + memcpy(dest, insn.kaddr, length); #ifdef CONFIG_X86_64 if (insn_rip_relative(&insn)) { s64 newdisp; u8 *disp; - kernel_insn_init(&insn, dest, insn.length); + kernel_insn_init(&insn, dest, length); insn_get_displacement(&insn); /* * The copied instruction uses the %rip-relative addressing @@ -394,7 +397,7 @@ int __copy_instruction(u8 *dest, u8 *src) *(s32 *) disp = (s32) newdisp; } #endif - return insn.length; + return length; } static int arch_copy_kprobe(struct kprobe *p) -- cgit v0.10.2 From 4c47f4fcd60a2f4153d6fe0c31650fbec112a1bd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Mar 2015 17:18:58 -0300 Subject: perf hists: Remove hist_entry->used, not used anymore Since hist_entry__delete() nowadays doesn't actually frees anything that may be in use by the annotation code. Eventually we will solve this for good by reference counting struct symbol. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-uldtgljymtrkns0knpiso5op@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index ebd9d3a..c37e70a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1731,12 +1731,7 @@ do_annotate: if (!notes->src) continue; - /* - * Don't let this be freed, say, by hists__decay_entry. - */ - he->used = true; err = hist_entry__tui_annotate(he, evsel, hbt); - he->used = false; /* * offer option to annotate the other branch source or target * (if they exists) when returning from annotate diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index d9a6d35..cc22b91 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -263,15 +263,9 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - /* - * We may be annotating this, for instance, so keep it here in - * case some it gets new samples, we'll eventually free it when - * the user stops browsing and it agains gets fully decayed. - */ if (((zap_user && n->level == '.') || (zap_kernel && n->level != '.') || - hists__decay_entry(hists, n)) && - !n->used) { + hists__decay_entry(hists, n))) { hists__delete_entry(hists, n); } } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index c03e4ff..9c01b83 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -102,7 +102,6 @@ struct hist_entry { bool init_have_children; char level; - bool used; u8 filtered; char *srcline; struct symbol *parent; -- cgit v0.10.2 From d5dbc518cd8fbc7cf54b91d5b506eb4d67e4047d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Mar 2015 18:27:28 -0300 Subject: perf hists browser: Allow annotating entries in callchains Instead of annotating just the top level hist_entry, allow instead annotating a map_symbol, i.e. the top level hist_entry or one of the callchains for which there were samples. Suggested-by: Stephane Eranian Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/n/tip-k1zxj5564je9jei4yd15ouwn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 9d32e3c..e5250eb 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -829,10 +829,16 @@ out: return key; } +int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, + struct hist_browser_timer *hbt) +{ + return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); +} + int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - return symbol__tui_annotate(he->ms.sym, he->ms.map, evsel, hbt); + return map_symbol__tui_annotate(&he->ms, evsel, hbt); } static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index c37e70a..cd7350a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1704,6 +1704,7 @@ retry_popup_menu: if (choice == annotate || choice == annotate_t || choice == annotate_f) { struct hist_entry *he; struct annotation *notes; + struct map_symbol ms; int err; do_annotate: if (!objdump_path && perf_session_env__lookup_objdump(env)) @@ -1713,25 +1714,21 @@ do_annotate: if (he == NULL) continue; - /* - * we stash the branch_info symbol + map into the - * the ms so we don't have to rewrite all the annotation - * code to use branch_info. - * in branch mode, the ms struct is not used - */ if (choice == annotate_f) { - he->ms.sym = he->branch_info->from.sym; - he->ms.map = he->branch_info->from.map; - } else if (choice == annotate_t) { - he->ms.sym = he->branch_info->to.sym; - he->ms.map = he->branch_info->to.map; + ms.map = he->branch_info->from.map; + ms.sym = he->branch_info->from.sym; + } else if (choice == annotate_t) { + ms.map = he->branch_info->to.map; + ms.sym = he->branch_info->to.sym; + } else { + ms = *browser->selection; } - notes = symbol__annotation(he->ms.sym); + notes = symbol__annotation(ms.sym); if (!notes->src) continue; - err = hist_entry__tui_annotate(he, evsel, hbt); + err = map_symbol__tui_annotate(&ms, evsel, hbt); /* * offer option to annotate the other branch source or target * (if they exists) when returning from annotate diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index e988c9f..9f31b89 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -303,6 +303,9 @@ struct hist_browser_timer { #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" +int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, + struct hist_browser_timer *hbt); + int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt); @@ -321,6 +324,12 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, { return 0; } +static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused) +{ + return 0; +} static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct perf_evsel *evsel __maybe_unused, -- cgit v0.10.2 From 235504dec113089856b39c65afb77a2f444aa2a9 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 17 Mar 2015 13:29:47 +0000 Subject: perf tools: Fix building error for arm64. Commit b11db6581beaccef8ae9a388ae96074aa5cc144f ("perf tools: Fix build error on ARCH=i386/x86_64/sparc64") uses sed on ARCH, which triggers a bug in sequence of sed expression, where 's/arm.*/arm/' will replace 'arm64' to 'arm', causes arm64 building failure. This patch prevent 'arm64' to be mached for 'arm.*' case. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Zefan Li Link: http://lkml.kernel.org/r/1426598987-75245-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index e972057..e11fbd6 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch @@ -4,7 +4,7 @@ endif ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e s/sun4u/sparc/ -e s/sparc64/sparc/ \ - -e s/arm.*/arm/ -e s/sa110/arm/ \ + -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ -- cgit v0.10.2 From 97e7a5153e8835c4cd03c77e258c1f556d8e9655 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Mar 2015 09:00:41 +0100 Subject: perf build: Disable default check for libbabeltrace Remove libbabeltrace check from default features set, because the requested version is not released yet in most distributions. We'll enable later. Calling libbabeltrace check manually via feature_check before $(feature-libbabeltrace) is used. Signed-off-by: Jiri Olsa Acked-by: Ingo Molnar Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-5n7mr6ugcwdbxk0n1z8uukaa@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index bd09718..b799649 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -227,7 +227,6 @@ CORE_FEATURE_TESTS = \ stackprotector-all \ timerfd \ libdw-dwarf-unwind \ - libbabeltrace \ zlib LIB_FEATURE_TESTS = \ @@ -243,7 +242,6 @@ LIB_FEATURE_TESTS = \ libslang \ libunwind \ libdw-dwarf-unwind \ - libbabeltrace \ zlib VF_FEATURE_TESTS = \ @@ -705,14 +703,15 @@ else endif ifndef NO_LIBBABELTRACE - ifeq ($(feature-libbabeltrace), 0) - msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev); - NO_LIBBABELTRACE := 1 - else + $(call feature_check,libbabeltrace) + ifeq ($(feature-libbabeltrace), 1) CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) LDFLAGS += $(LIBBABELTRACE_LDFLAGS) EXTLIBS += -lbabeltrace-ctf $(call detected,CONFIG_LIBBABELTRACE) + else + msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev); + NO_LIBBABELTRACE := 1 endif endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 8fe0678..b6e2e31 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -44,7 +44,7 @@ BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $( ############################### test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -lbabeltrace + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz test-hello.bin: $(BUILD) diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 1ffc3da..06f5c8a 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -101,9 +101,17 @@ # include "test-pthread_attr_setaffinity_np.c" #undef main +# if 0 +/* + * Disable libbabeltrace check for test-all, because the requested + * library version is not released yet in most distributions. Will + * reenable later. + */ + #define main main_test_libbabeltrace # include "test-libbabeltrace.c" #undef main +#endif int main(int argc, char *argv[]) { @@ -130,7 +138,6 @@ int main(int argc, char *argv[]) main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); - main_test_libbabeltrace(); return 0; } -- cgit v0.10.2 From 806f0727fa3b9140a6603ba6b3627cbbe346d5a3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Mar 2015 09:01:26 +0100 Subject: perf build: Move features build output under features directory Following commit introduced features build dump: 443a70541c56 perf tools: Output feature detection's gcc output to a file Moving them into to have code more compact and renaming build dump files. For each feature 'test-X' new file 'test-X.make.output' is created and contains the build out. It's created in the same directory as the feature itself. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-dk6svnhcephrzgz4mfpcmtm7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 933d703..ec4c063 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -529,7 +529,6 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(Q)$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* - $(call QUIET_CLEAN, feature-detect) $(RM) $(OUTPUT)config/feature-checks/.make-*.output $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index b799649..45f6125 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -184,7 +184,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1.bin > $(OUTPUT)config/feature-checks/.make-$(1).output 2>&1 && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index b6e2e31..0d694a9 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -39,7 +39,8 @@ PKG_CONFIG := $(CROSS_COMPILE)pkg-config all: $(FILES) -BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) +__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) + BUILD = $(__BUILD) > $(OUTPUT)$(@:.bin=.make.output) 2>&1 ############################### @@ -156,4 +157,4 @@ test-zlib.bin: ############################### clean: - rm -f $(FILES) *.d + rm -f $(FILES) *.d $(FILES:.bin=.make.output) -- cgit v0.10.2 From c59a14241387f7a66723f9ee6901cb638d665777 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Mar 2015 09:02:00 +0100 Subject: perf build: Fix pthread-attr-setaffinity-np include in test-all The test-all fails to build due to type in pthread-attr-setaffinity-np include. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-awn2658267slejnebyrlns86@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 06f5c8a..a61c204 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -98,7 +98,7 @@ #undef main #define main main_test_pthread_attr_setaffinity_np -# include "test-pthread_attr_setaffinity_np.c" +# include "test-pthread-attr-setaffinity-np.c" #undef main # if 0 -- cgit v0.10.2 From 9444e874e6d0b1db2ad31e3f755bab8a571240f5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Mar 2015 20:11:24 +0100 Subject: perf build: Get rid of LIB_INCLUDE variable It has no use, so we can directly use the value for CFLAGS. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ywyr5v962s32daq5hpgfkjap@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 45f6125..467106b 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -15,7 +15,6 @@ $(shell echo -n > .config-detected) detected = $(shell echo "$(1)=y" >> .config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected) -LIB_INCLUDE := $(srctree)/tools/lib/ CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(src-perf)/config/Makefile.arch @@ -320,7 +319,7 @@ endif CFLAGS += -I$(src-perf)/util CFLAGS += -I$(src-perf) -CFLAGS += -I$(LIB_INCLUDE) +CFLAGS += -I$(srctree)/tools/lib/ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -- cgit v0.10.2 From ee9c80a13ea84bbf469a669685962099e11b9567 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Mar 2015 20:32:42 +0100 Subject: perf build: Rename CORE_FEATURE_TESTS to FEATURE_TESTS Preparing for feature checks separation, moving related stuff under 'FEATURE*' namespace. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-iobj4f9gygcakrk2v5u61159@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 467106b..5073505 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -203,7 +203,7 @@ endef # the rule that uses them - an example for that is the 'bionic' # feature check. ] # -CORE_FEATURE_TESTS = \ +FEATURE_TESTS = \ backtrace \ dwarf \ fortify-source \ @@ -262,7 +262,7 @@ VF_FEATURE_TESTS = \ compile-32 \ compile-x32 -# Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features. +# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not # mentioned in this list we need to refactor this ;-). set_test_all_flags = $(eval $(set_test_all_flags_code)) @@ -271,7 +271,7 @@ define set_test_all_flags_code FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1)) endef -$(foreach feat,$(CORE_FEATURE_TESTS),$(call set_test_all_flags,$(feat))) +$(foreach feat,$(FEATURE_TESTS),$(call set_test_all_flags,$(feat))) # # Special fast-path for the 'all features are available' case: @@ -286,10 +286,10 @@ ifeq ($(feature-all), 1) # # test-all.c passed - just set all the core feature flags to 1: # - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat))) + $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) else - $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(addsuffix .bin,$(CORE_FEATURE_TESTS)) >/dev/null 2>&1) - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) + $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(addsuffix .bin,$(FEATURE_TESTS)) >/dev/null 2>&1) + $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) endif ifeq ($(feature-stackprotector-all), 1) -- cgit v0.10.2 From 07efbf528c9f72bad67cc5a8c3afaa9c937fde43 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Mar 2015 20:44:18 +0100 Subject: perf build: Get rid of VF_FEATURE_TESTS It only contains (FEATURE_TESTS - FEATURE_DISPLAY) tests to display the rest of the checks on 'make VF=1'. But we can actually compute this list, which is less confusing. Also renaming LIB_FEATURE_TESTS into FEATURE_DISPLAY, so it reflects what this variable actually does - display its tests status to user. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-gs160y03hpmx5ezpcr4gunxc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 5073505..198609e 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -228,7 +228,7 @@ FEATURE_TESTS = \ libdw-dwarf-unwind \ zlib -LIB_FEATURE_TESTS = \ +FEATURE_DISPLAY = \ dwarf \ glibc \ gtk2 \ @@ -243,25 +243,6 @@ LIB_FEATURE_TESTS = \ libdw-dwarf-unwind \ zlib -VF_FEATURE_TESTS = \ - backtrace \ - fortify-source \ - sync-compare-and-swap \ - gtk2-infobar \ - libelf-getphdrnum \ - libelf-mmap \ - libpython-version \ - pthread-attr-setaffinity-np \ - stackprotector-all \ - timerfd \ - libunwind-debug-frame \ - bionic \ - liberty \ - liberty-z \ - cplus-demangle \ - compile-32 \ - compile-x32 - # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not # mentioned in this list we need to refactor this ;-). @@ -805,7 +786,7 @@ define feature_print_text_code MSG = $(shell printf '...%30s: %s' $(1) $(2)) endef -PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat)))) +PERF_FEATURES := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat)))) PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES) ifeq ($(dwarf-post-unwind),1) @@ -816,7 +797,7 @@ endif # output. It's set if: # - detected features differes from stored features from # last build (in PERF-FEATURES file) -# - one of the $(LIB_FEATURE_TESTS) is not detected +# - one of the $(FEATURE_DISPLAY) is not detected # - VF is enabled ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)") @@ -831,7 +812,7 @@ define feature_check_code endif endef -$(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_check,$(feat))) +$(foreach feat,$(FEATURE_DISPLAY),$(call feature_check,$(feat))) ifeq ($(VF),1) display_lib := 1 @@ -841,7 +822,7 @@ endif ifeq ($(display_lib),1) $(info ) $(info Auto-detecting system features:) - $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print_status,$(feat),)) + $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) ifeq ($(dwarf-post-unwind),1) $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) @@ -849,7 +830,8 @@ ifeq ($(display_lib),1) endif ifeq ($(display_vf),1) - $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print_status,$(feat),)) + TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) + $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) $(info ) $(call feature_print_var,prefix) $(call feature_print_var,bindir) -- cgit v0.10.2 From 48e383eca276c02d4bd5c5b468b07b73ca52dd08 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Mar 2015 20:46:28 +0100 Subject: perf build: Rename display_lib into feature_display Preparing for feature checks separation, moving related stuff under 'feature*' namespace. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-t72o4nwx81owjv14y43b2wpf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 198609e..f80fc7e 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -793,7 +793,7 @@ ifeq ($(dwarf-post-unwind),1) PERF_FEATURES += dwarf-post-unwind($(dwarf-post-unwind-text)) endif -# The $(display_lib) controls the default detection message +# The $(feature_display) controls the default detection message # output. It's set if: # - detected features differes from stored features from # last build (in PERF-FEATURES file) @@ -802,24 +802,24 @@ endif ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)") $(shell echo "$(PERF_FEATURES)" > $(OUTPUT)PERF-FEATURES) - display_lib := 1 + feature_display := 1 endif feature_check = $(eval $(feature_check_code)) define feature_check_code ifneq ($(feature-$(1)), 1) - display_lib := 1 + feature_display := 1 endif endef $(foreach feat,$(FEATURE_DISPLAY),$(call feature_check,$(feat))) ifeq ($(VF),1) - display_lib := 1 + feature_display := 1 display_vf := 1 endif -ifeq ($(display_lib),1) +ifeq ($(feature_display),1) $(info ) $(info Auto-detecting system features:) $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) @@ -841,7 +841,7 @@ ifeq ($(display_vf),1) $(call feature_print_var,LIBDW_DIR) endif -ifeq ($(display_lib),1) +ifeq ($(feature_display),1) $(info ) endif -- cgit v0.10.2 From a15599ac6bd2e6196f3aeb2fb08af73129621fe1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Mar 2015 20:58:24 +0100 Subject: perf build: Rename display_vf to feature_verbose Preparing for feature checks separation, moving related stuff under 'feature*' namespace. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ciaflab01mf0ljmfb9xr4p41@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f80fc7e..370ad50 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -816,7 +816,7 @@ $(foreach feat,$(FEATURE_DISPLAY),$(call feature_check,$(feat))) ifeq ($(VF),1) feature_display := 1 - display_vf := 1 + feature_verbose := 1 endif ifeq ($(feature_display),1) @@ -829,7 +829,7 @@ ifeq ($(feature_display),1) endif endif -ifeq ($(display_vf),1) +ifeq ($(feature_verbose),1) TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) $(info ) -- cgit v0.10.2 From 4ae61202b31c91259ff45cc9744b8a95add6b52a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 1 Mar 2015 20:50:47 +0100 Subject: perf build: Rename PERF-FEATURES into FEATURE-DUMP Preparing for feature checks separation, moving related stuff under 'FEATURE*' namespace. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-v9oo22ra70rrk1dy495a7bjc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index ec4c063..e323eab 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -528,7 +528,7 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 370ad50..b98ab77 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -786,22 +786,22 @@ define feature_print_text_code MSG = $(shell printf '...%30s: %s' $(1) $(2)) endef -PERF_FEATURES := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat)))) -PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES) +FEATURE_DUMP := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat)))) +FEATURE_DUMP_FILE := $(shell touch $(OUTPUT)FEATURE-DUMP; cat $(OUTPUT)FEATURE-DUMP) ifeq ($(dwarf-post-unwind),1) - PERF_FEATURES += dwarf-post-unwind($(dwarf-post-unwind-text)) + FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text)) endif # The $(feature_display) controls the default detection message # output. It's set if: # - detected features differes from stored features from -# last build (in PERF-FEATURES file) +# last build (in FEATURE-DUMP file) # - one of the $(FEATURE_DISPLAY) is not detected # - VF is enabled -ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)") - $(shell echo "$(PERF_FEATURES)" > $(OUTPUT)PERF-FEATURES) +ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)") + $(shell echo "$(FEATURE_DUMP)" > $(OUTPUT)FEATURE-DUMP) feature_display := 1 endif -- cgit v0.10.2 From 4b20d684b4e1d11795af8783b45f2149b0ba0faf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Mar 2015 13:30:30 +0100 Subject: perf build: Rename feature_print_var_code to print_var_code As it has nothing to do with features and won't be moved into tools/build. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-6qgf37nss4wwjatgj5i4ng0o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index b98ab77..489d333 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -776,8 +776,8 @@ define feature_print_status_code endif endef -feature_print_var = $(eval $(feature_print_var_code)) $(info $(MSG)) -define feature_print_var_code +print_var = $(eval $(print_var_code)) $(info $(MSG)) +define print_var_code MSG = $(shell printf '...%30s: %s' $(1) $($(1))) endef @@ -827,21 +827,22 @@ ifeq ($(feature_display),1) ifeq ($(dwarf-post-unwind),1) $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) endif + + ifneq ($(feature_verbose),1) + $(info ) + endif endif ifeq ($(feature_verbose),1) TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) $(info ) - $(call feature_print_var,prefix) - $(call feature_print_var,bindir) - $(call feature_print_var,libdir) - $(call feature_print_var,sysconfdir) - $(call feature_print_var,LIBUNWIND_DIR) - $(call feature_print_var,LIBDW_DIR) -endif - -ifeq ($(feature_display),1) + $(call print_var,prefix) + $(call print_var,bindir) + $(call print_var,libdir) + $(call print_var,sysconfdir) + $(call print_var,LIBUNWIND_DIR) + $(call print_var,LIBDW_DIR) $(info ) endif -- cgit v0.10.2 From 76aea7731e7050c066943a1d7456ec6510702601 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 17 Mar 2015 15:27:48 -0700 Subject: perf tools: Fix perf-read-vdsox32 not building and lib64 install dir Commit: c6e5e9fbc3ea ("perf tools: Fix building error in x86_64 when dwarf unwind is on") removed the definition of IS_X86_64 but not all places using it, with the consequence that perf-read-vdsox32 would not be built anymore, and the default lib install directory was 'lib' instead of 'lib64'. Also needs to go to v3.19. Signed-off-by: H.J. Lu Acked-by: Adrian Hunter Acked-by: Jiri Olsa Cc: stable@vger.kernel.org # 3.19 Link: http://lkml.kernel.org/r/CAMe9rOqpGVq3D88w+D15ef7sv6G6k57ZeTvxBm46=WFgzo9p1w@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 489d333..e7f83b1 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -666,7 +666,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq (${IS_X86_64}, 1) + ifneq ($(ARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -727,7 +727,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(IS_X86_64),1) +ifeq ($(ARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 75709d2..bff8532 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -5,7 +5,7 @@ include config/Makefile.arch # FIXME looks like x86 is the only arch running tests ;-) # we need some IS_(32/64) flag to make this generic -ifeq ($(IS_X86_64),1) +ifeq ($(ARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib -- cgit v0.10.2 From 02fde323b9aaebb4a21e7c0e04759470b6c07594 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:46 +0800 Subject: perf tools: Fix the bash completion for listing options of perf subcommand The bash completion does not support listing options for 'perf kvm|kmem|mem|lock|sched --', where 'kvm|kmem|mem|lock|sched' are all subcommands of perf. Example: Before this patch: $ perf kvm -- $ As shown above, the options of perf kvm does not come out. After this patch: $ perf kvm -- --alloc --caller --input --line --raw-ip --sort --verbose As shown above, the options of perf kvm can come out now. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-2-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index c2595e9..4822ed3 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -119,15 +119,18 @@ __perf_main () elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) __perfcomp_colon "$evts" "$cur" - # List subcommands for perf commands - elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then - subcmds=$($cmd $prev --list-cmds) - __perfcomp_colon "$subcmds" "$cur" - # List long option names - elif [[ $cur == --* ]]; then - subcmd=${words[1]} - opts=$($cmd $subcmd --list-opts) - __perfcomp "$opts" "$cur" + else + # List subcommands for perf commands + if [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then + subcmds=$($cmd $prev --list-cmds) + __perfcomp_colon "$subcmds" "$cur" + fi + # List long option names + if [[ $cur == --* ]]; then + subcmd=${words[1]} + opts=$($cmd $subcmd --list-opts) + __perfcomp "$opts" "$cur" + fi fi } -- cgit v0.10.2 From 67afff485b2ce742374edb2e17d21e2bc664eb1f Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:47 +0800 Subject: perf tools: Fix the bash completion for listing subsubcommands of perf subcommand The bash completion does not support listing subsubcommands for 'perf kvm|kmem|mem|lock|sched -- ', where 'kvm|kmem|mem| lock|sched' are all subcommands of perf. Example: Before this patch: $ perf kvm --verbose $ As shown above, the subsubcommands of perf kvm does not come out. After this patch: $ perf kvm --verbose buildid-list diff record report stat top As shown above, the subsubcommands of perf kvm can come out now. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-3-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 4822ed3..bbb61d0 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -100,6 +100,23 @@ __perfcomp_colon () __ltrim_colon_completions $cur } +__perf_prev_skip_opts () +{ + local i cmd_ cmds_ + + let i=cword-1 + cmds_=$($cmd --list-cmds) + prev_skip_opts=() + while [ $i -ge 0 ]; do + for cmd_ in $cmds_; do + if [[ ${words[i]} == $cmd_ ]]; then + prev_skip_opts=${words[i]} + return + fi + done + ((i--)) + done +} __perf_main () { local cmd @@ -107,6 +124,8 @@ __perf_main () cmd=${words[0]} COMPREPLY=() + # Skip options backward and find the last perf command + __perf_prev_skip_opts # List perf subcommands or long options if [ $cword -eq 1 ]; then if [[ $cur == --* ]]; then @@ -121,8 +140,8 @@ __perf_main () __perfcomp_colon "$evts" "$cur" else # List subcommands for perf commands - if [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then - subcmds=$($cmd $prev --list-cmds) + if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched) ]]; then + subcmds=$($cmd $prev_skip_opts --list-cmds) __perfcomp_colon "$subcmds" "$cur" fi # List long option names -- cgit v0.10.2 From eee200a6c4a9712146ba999f944ba4f2c3fc2d44 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:48 +0800 Subject: perf tools: Provide the right bash completion for listing options of perf subcommand subsubcommand The bash completion gives wrong options for 'perf kvm|kmem|mem|lock| sched subsubcommand --', where 'kvm|kmem|mem|lock|sched' are all subcommands of perf and 'subsubcommand' is a subcommand of 'kvm|kmem|mem |lock|sched'. In fact, the result incorrectly lists the bash completion of 'perf subcommand' rather than 'perf subcommand subsubcommand'. Example: Before this patch: $ perf kvm record -- --guest --guestkallsyms --guestmodules --guestmount --guestvmlinux --host --input --output --verbose As shown above, the result is the options of kvm rather than record. After this patch: $ perf kvm record -- --all-cpus --cgroup --delay --group --no-buildid --output --quiet --stat --uid --branch-any --count --event --intr-regs --no-buildid-cache --period --raw-samples --tid --verbose --branch-filter --cpu --filter --mmap-pages --no-inherit --per-thread --realtime --timestamp --weight --call-graph --data --freq --no-buffering --no-samples --pid --running-time --transaction As shown above, the result is exactly the options of record as we wished. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-4-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index bbb61d0..01ce841 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -105,9 +105,12 @@ __perf_prev_skip_opts () local i cmd_ cmds_ let i=cword-1 - cmds_=$($cmd --list-cmds) + cmds_=$($cmd $1 --list-cmds) prev_skip_opts=() while [ $i -ge 0 ]; do + if [[ ${words[i]} == $1 ]]; then + return + fi for cmd_ in $cmds_; do if [[ ${words[i]} == $cmd_ ]]; then prev_skip_opts=${words[i]} @@ -146,7 +149,9 @@ __perf_main () fi # List long option names if [[ $cur == --* ]]; then - subcmd=${words[1]} + subcmd=$prev_skip_opts + __perf_prev_skip_opts $subcmd + subcmd=$subcmd" "$prev_skip_opts opts=$($cmd $subcmd --list-opts) __perfcomp "$opts" "$cur" fi -- cgit v0.10.2 From bc81fad125dba7da33dbeac4c10d9c84614eb3ab Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:49 +0800 Subject: perf tools: Fix the bash completion for listing events of perf subcommand record|stat|top -e The bash completion does not support listing events for 'perf kvm|kmem| mem|lock|sched record|stat|top -e ', where 'kvm|kmem|mem|lock|sched' are all subcommands of perf. Example: Before this patch: $ perf kvm record -e $ As shown above, the events of record does not come out. After this patch: $ perf kvm record -e alignment-faults cpu/instructions/ L1-dcache-prefetch-misses node-prefetches uncore_rbox_0/qpi0_idle_filt/ branch-instructions cpu/mem-loads/ L1-dcache-store-misses node-prefetch-misses uncore_rbox_0/qpi1_date_response/ branch-load-misses cpu-migrations L1-dcache-stores node-store-misses uncore_rbox_0/qpi1_filt_send/ branch-loads dTLB-load-misses L1-icache-load-misses node-stores uncore_rbox_0/qpi1_idle_filt/ ... As shown above, the events of record can come out now. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-5-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 01ce841..4b58ac2 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -138,7 +138,7 @@ __perf_main () fi __perfcomp "$cmds" "$cur" # List possible events for -e option - elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then + elif [[ $prev == "-e" && $prev_skip_opts == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) __perfcomp_colon "$evts" "$cur" else -- cgit v0.10.2 From 3346b542f4165cb0007cfe3600866acbee67c686 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:50 +0800 Subject: perf tools: Fix the bash completion to support listing events for --event The bash completion only supports -e rather than --event, so fix it. Example: Before this patch: $ perf record --event $ As shown above, the events of record does not come out. After this patch: $ perf record --event lignment-faults cpu/instructions/ L1-dcache-prefetch-misses node-prefetches uncore_rbox_0/qpi0_idle_filt/ branch-instructions cpu/mem-loads/ L1-dcache-store-misses node-prefetch-misses uncore_rbox_0/qpi1_date_response/ branch-load-misses cpu-migrations L1-dcache-stores node-store-misses uncore_rbox_0/qpi1_filt_send/ branch-loads dTLB-load-misses L1-icache-load-misses node-stores uncore_rbox_0/qpi1_idle_filt/ ... As shown above, the events of record can come out now. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-6-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 4b58ac2..3e25d3e 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -137,8 +137,8 @@ __perf_main () cmds=$($cmd --list-cmds) fi __perfcomp "$cmds" "$cur" - # List possible events for -e option - elif [[ $prev == "-e" && $prev_skip_opts == @(record|stat|top) ]]; then + # List possible events for -e and --event option + elif [[ $prev == @("-e"|"--event") && $prev_skip_opts == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) __perfcomp_colon "$evts" "$cur" else -- cgit v0.10.2 From e003ce54d2ccbb8da0fd4f421e1cc9686ef25add Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:51 +0800 Subject: perf tools: Fix the bash completion for listing subcommands of perf The bash completion does not support listing subcommands for 'perf -- '. Example: Before this patch: $ perf --debug $ As shown above, the subcommands of perf does not come out. After this patch: $ perf --debug annotate buildid-cache data evlist inject kvm lock probe report script test top version bench buildid-list diff help kmem list mem record sched stat timechart trace As shown above, the subcommands of perf can come out now. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-7-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 3e25d3e..7b98ae4 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -130,7 +130,7 @@ __perf_main () # Skip options backward and find the last perf command __perf_prev_skip_opts # List perf subcommands or long options - if [ $cword -eq 1 ]; then + if [ -z $prev_skip_opts ]; then if [[ $cur == --* ]]; then cmds=$($cmd --list-opts) else -- cgit v0.10.2 From 01b7160bc6674912f6b7043bbf58d66e62e41054 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:52 +0800 Subject: perf tools: Add the bash completion for listing subsubcommands of perf data The bash completion does not support listing subsubcommands for 'perf data ', so fix it. Example: Before this patch: $ perf data $ As shown above, the subsubcommands of perf data does not come out. After this patch: $ perf data convert As shown above, the subsubcommands of perf data can come out now. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-8-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index 155cf75..709152a 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -22,7 +22,9 @@ static const struct option data_options[] = { OPT_END() }; -static const char * const data_usage[] = { +static const char * const data_subcommands[] = { "convert", NULL }; + +static const char *data_usage[] = { "perf data [] []", NULL }; @@ -98,7 +100,7 @@ int cmd_data(int argc, const char **argv, const char *prefix) if (argc < 2) goto usage; - argc = parse_options(argc, argv, data_options, data_usage, + argc = parse_options_subcommand(argc, argv, data_options, data_subcommands, data_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (argc < 1) goto usage; diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 7b98ae4..535ff72 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -143,7 +143,7 @@ __perf_main () __perfcomp_colon "$evts" "$cur" else # List subcommands for perf commands - if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched) ]]; then + if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched|data) ]]; then subcmds=$($cmd $prev_skip_opts --list-cmds) __perfcomp_colon "$subcmds" "$cur" fi -- cgit v0.10.2 From e24a110882949c609c5fb7625b71d8e77264257b Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:53 +0800 Subject: perf tools: Add the bash completion for listing subsubcommands of perf help The bash completion does not support listing subsubcommands for 'perf help ', so fix it. Example: Before this patch: $ perf help $ As shown above, the subsubcommands of perf help does not come out. After this patch: $ perf help annotate buildid-cache data evlist inject kvm lock probe report script test top bench buildid-list diff help kmem list mem record sched stat timechart trace As shown above, the subsubcommands of perf help can come out now. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-9-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 25d2062..36486ea 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -437,7 +437,18 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) HELP_FORMAT_INFO), OPT_END(), }; - const char * const builtin_help_usage[] = { + const char * const builtin_help_subcommands[] = { + "buildid-cache", "buildid-list", "diff", "evlist", "help", "list", + "record", "report", "bench", "stat", "timechart", "top", "annotate", + "script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data", +#ifdef HAVE_LIBELF_SUPPORT + "probe", +#endif +#ifdef HAVE_LIBAUDIT_SUPPORT + "trace", +#endif + NULL }; + const char *builtin_help_usage[] = { "perf help [--all] [--man|--web|--info] [command]", NULL }; @@ -448,8 +459,8 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) perf_config(perf_help_config, &help_format); - argc = parse_options(argc, argv, builtin_help_options, - builtin_help_usage, 0); + argc = parse_options_subcommand(argc, argv, builtin_help_options, + builtin_help_subcommands, builtin_help_usage, 0); if (show_all) { printf("\n usage: %s\n\n", perf_usage_string); diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 535ff72..dbca6a6 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -143,7 +143,7 @@ __perf_main () __perfcomp_colon "$evts" "$cur" else # List subcommands for perf commands - if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched|data) ]]; then + if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched|data|help) ]]; then subcmds=$($cmd $prev_skip_opts --list-cmds) __perfcomp_colon "$subcmds" "$cur" fi -- cgit v0.10.2 From 40cae2b779f2826f3d82674027299332c2007716 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 18 Mar 2015 21:35:54 +0800 Subject: perf tools: Add the bash completion for listing subsubcommands of perf script The bash completion does not support listing subsubcommands for 'perf script ', so fix it. Example: Before this patch: $ perf script $ As shown above, the subsubcommands of perf script does not come out. After this patch: $ perf script record report As shown above, the subsubcommands of perf script can come out now. Signed-off-by: Yunlong Song Tested-by: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1426685758-25488-10-git-send-email-yunlong.song@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c7e6750..f2a348b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1572,7 +1572,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the mmap events"), OPT_END() }; - const char * const script_usage[] = { + const char * const script_subcommands[] = { "record", "report", NULL }; + const char *script_usage[] = { "perf script []", "perf script [] record