From bb4c5500c9c5b809696eee212843e731721a6e15 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Nov 2013 08:33:24 +0100 Subject: tools/perf/build: Fix timerfd feature check 'feature_timerfd' is checked all the time and calculated explicitly, in a serial fashion. Add it to CORE_FEATURE_TESTS which causes it to be built in parallel, using the newfangled parallel build autodetection code. This shaves 137 msecs off the perf build time on my system, which speeds up the common case cached build by 43%: Before: comet:~/tip> perf stat --null --repeat 5 make -C tools/perf/ [...] 0,453771441 seconds time elapsed ( +- 0,09% ) After: comet:~/tip> perf stat --null --repeat 5 make -C tools/perf/ [...] 0,316290185 seconds time elapsed ( +- 0,24% ) Cc: David Ahern Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/n/tip-bb92CmexihopoSyqnkqepvsy@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f5905f2..861379e 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -142,7 +142,8 @@ CORE_FEATURE_TESTS = \ libunwind \ on-exit \ stackprotector \ - stackprotector-all + stackprotector-all \ + timerfd # # So here we detect whether test-all was rebuilt, to be able @@ -405,7 +406,6 @@ else endif endif -$(call feature_check,timerfd) ifeq ($(feature-timerfd), 1) CFLAGS += -DHAVE_TIMERFD_SUPPORT else -- cgit v0.10.2 From e310718d0e83aeb9969264dc577c45db16d9104d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Nov 2013 08:25:24 +0100 Subject: tools/perf/build: Fix feature-libunwind-debug-frame handling Set feature-libunwind-debug-frame. We don't want it in CORE_FEATURE_TESTS because it's not the generic case, but we need to set it in the !feature-libunwind case. Also, because x86 distributions typically don't have dwarf_find_debug_frame() unwinding method: test-libunwind-debug-frame.c:(.text+0x31): undefined reference to `_Ux86_64_dwarf_find_debug_frame' Restrict this new API to ARM for the time being. With this patch test-all.c works again, so repeat perf builds are fast again: comet:~/tip> perf stat --null --repeat 5 make -C tools/perf/ [...] 0,452899660 seconds time elapsed ( +- 0,11% ) While with before it was: comet:~/tip> perf stat --null --repeat 5 make -C tools/perf/ [...] 1,674001829 seconds time elapsed ( +- 0,16% ) [ Includes fix to config/feature-checks/Makefile from Will Deacon. ] Tested-by: Will Deacon Tested-by: Jean Pihet Cc: Russell King Cc: Linus Torvalds Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/n/tip-scsoctqzmou3rpkixCHezy9e@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 861379e..f7d11a8 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -329,8 +329,14 @@ ifndef NO_LIBUNWIND msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1); NO_LIBUNWIND := 1 else - ifneq ($(feature-libunwind-debug-frame), 1) - msg := $(warning No debug_frame support found in libunwind); + ifeq ($(ARCH),arm) + $(call feature_check,libunwind-debug-frame) + ifneq ($(feature-libunwind-debug-frame), 1) + msg := $(warning No debug_frame support found in libunwind); + CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME + endif + else + # non-ARM has no dwarf_find_debug_frame() function: CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME endif endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index e8e195f..87e7900 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -76,6 +76,9 @@ test-libnuma: test-libunwind: $(BUILD) $(LIBUNWIND_LIBS) -lelf +test-libunwind-debug-frame: + $(BUILD) $(LIBUNWIND_LIBS) -lelf + test-libaudit: $(BUILD) -laudit diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 799865b..59e7a70 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -49,10 +49,6 @@ # include "test-libunwind.c" #undef main -#define main main_test_libunwind_debug_frame -# include "test-libunwind-debug-frame.c" -#undef main - #define main main_test_libaudit # include "test-libaudit.c" #undef main -- cgit v0.10.2 From 27a778b512e002d856952b4f01842ba4d34bc3d1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 13 Nov 2013 14:21:48 +0200 Subject: perf trace: Tweak summary output Tweak the summary output as suggested by Ingo Molnar: [penberg@localhost ~]$ perf trace -a --duration 10000 --summary -- sleep 1 ^C Summary of events: Xorg (817), 148 events, 0.0%, 0.000 msec syscall calls min avg max stddev (msec) (msec) (msec) (%) --------------- -------- --------- --------- --------- ------ read 7 0.002 0.004 0.011 32.00% rt_sigprocmask 40 0.001 0.001 0.002 1.31% ioctl 6 0.002 0.003 0.005 19.45% writev 7 0.004 0.018 0.059 43.76% select 9 0.000 74.513 507.869 74.61% setitimer 4 0.001 0.002 0.002 10.08% Suggested-by: Ingo Molnar Signed-off-by: Pekka Enberg Acked-by: Ingo Molnar Cc: David Ahern Cc: Ingo Molnar Link: http://lkml.kernel.org/r/1384345308-24404-1-git-send-email-penberg@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6b230af..8be17fc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2112,9 +2112,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, "\n"); - printed += fprintf(fp, " msec/call\n"); - printed += fprintf(fp, " syscall calls min avg max stddev\n"); - printed += fprintf(fp, " --------------- -------- -------- -------- -------- ------\n"); + printed += fprintf(fp, " syscall calls min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); /* each int_node is a syscall */ while (inode) { @@ -2131,9 +2131,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, sc = &trace->syscalls.table[inode->i]; printed += fprintf(fp, " %-15s", sc->name); - printed += fprintf(fp, " %8" PRIu64 " %8.3f %8.3f", + printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", n, min, avg); - printed += fprintf(fp, " %8.3f %6.2f\n", max, pct); + printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); } inode = intlist__next(inode); -- cgit v0.10.2 From b222213936ef7d48908be2fab7639dd535c88045 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 12 Nov 2013 22:24:24 -0800 Subject: perf tools: Remove trivial extra semincolon Accidentally ran into these, get rid of them. Signed-off-by: Davidlohr Bueso Link: http://lkml.kernel.org/r/1384323864.2527.8.camel@buesod1.americas.hpqcorp.net Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index bbc782e..3648d4e 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -680,7 +680,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser, if (end >= browser->top_idx + browser->height) end_row = browser->height - 1; else - end_row = end - browser->top_idx;; + end_row = end - browser->top_idx; ui_browser__gotorc(browser, row, column); SLsmg_draw_vline(end_row - row + 1); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index dc6fa3f..5ce2ace 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1148,7 +1148,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) perf_evsel__name(evsel)); } - return printed + fprintf(fp, "\n");; + return printed + fprintf(fp, "\n"); } int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, -- cgit v0.10.2 From ea432a8bb940e6bea2aaeca3c0ff3d931ad81f2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Nov 2013 00:26:09 +0100 Subject: perf top: Add missing newline if the 'uid' is invalid Add missing newline if the 'uid' is invalid: hubble:~> perf top --stdio -u help Error: Invalid User: helphubble:~> Fixed by this patch: comet:~/tip/tools/perf> perf top --stdio -u help Error: Invalid User: help comet:~/tip/tools/perf> Signed-off-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131112232609.GA31474@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b8f8e29..71e6402 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1172,7 +1172,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) status = target__validate(target); if (status) { target__strerror(target, status, errbuf, BUFSIZ); - ui__warning("%s", errbuf); + ui__warning("%s\n", errbuf); } status = target__parse_uid(target); @@ -1180,7 +1180,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) int saved_errno = errno; target__strerror(target, status, errbuf, BUFSIZ); - ui__error("%s", errbuf); + ui__error("%s\n", errbuf); status = -saved_errno; goto out_delete_evlist; -- cgit v0.10.2 From 9d4ecc8893832337daf241236841db966fa53489 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 13 Nov 2013 15:32:06 -0300 Subject: perf tools: Synthesize anon MMAP records again When introducing the PERF_RECORD_MMAP2 in: 5c5e854bc760 perf tools: Add attr->mmap2 support A check for the number of entries parsed by sscanf was introduced that assumed all of the 8 fields needed to be correctly parsed so that particular /proc/pid/maps line would be considered synthesizable. That broke anon records synthesizing, as it doesn't have the 'execname' field. Fix it by keeping the sscanf return check, changing it to not require that the 'execname' variable be parsed, so that the preexisting logic can kick in and set it to '//anon'. This should get things like JIT profiling working again. Signed-off-by: Don Zickus Cc: Bill Gray Cc: Jiri Olsa Cc: Joe Mario Cc: Richard Fowles Cc: Stephane Eranian Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/n/tip-bo4akalno7579shpz29u867j@git.kernel.org [ commit log message is mine, dzickus reported the problem with a patch ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6e3a846..bb788c1 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -209,8 +209,10 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, &event->mmap.start, &event->mmap.len, prot, &event->mmap.pgoff, execname); - - if (n != 5) + /* + * Anon maps don't have the execname. + */ + if (n < 4) continue; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c -- cgit v0.10.2 From 9a354cdc2f40344a177d369fb4987a8270dd94df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Nov 2013 15:54:30 -0300 Subject: perf tools: Use perf_evlist__{first,last}, perf_evsel__next In a few remaining places where the equivalent open coded variant was still being used. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-4vjnloi5fisilykwxalb5nel@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index ef671cd..3cbd104 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -441,9 +441,8 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) static int test__checkevent_pmu_events(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel = perf_evlist__first(evlist); - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong exclude_user", diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 16848bb..089fd37 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1847,13 +1847,13 @@ browse_hists: switch (key) { case K_TAB: if (pos->node.next == &evlist->entries) - pos = list_entry(evlist->entries.next, struct perf_evsel, node); + pos = perf_evlist__first(evlist); else - pos = list_entry(pos->node.next, struct perf_evsel, node); + pos = perf_evsel__next(pos); goto browse_hists; case K_UNTAB: if (pos->node.prev == &evlist->entries) - pos = list_entry(evlist->entries.prev, struct perf_evsel, node); + pos = perf_evlist__last(evlist); else pos = list_entry(pos->node.prev, struct perf_evsel, node); goto browse_hists; @@ -1943,8 +1943,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, single_entry: if (nr_entries == 1) { - struct perf_evsel *first = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist); const char *ev_name = perf_evsel__name(first); return perf_evsel__hists_browse(first, nr_entries, help, -- cgit v0.10.2 From d87fcb4a2d990ba2de9284ede84a816c5066d54b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Nov 2013 15:56:40 -0300 Subject: perf evsel: Introduce perf_evsel__prev() method Just one use so far, on the hists browser, for completeness since there we use perf_evlist__{first,last} and perf_evsel__next() for handling the TAB and UNTAB keys. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-d09l4lejp5427enuf3igpckw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 089fd37..a440e03 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1855,7 +1855,7 @@ browse_hists: if (pos->node.prev == &evlist->entries) pos = perf_evlist__last(evlist); else - pos = list_entry(pos->node.prev, struct perf_evsel, node); + pos = perf_evsel__prev(pos); goto browse_hists; case K_ESC: if (!ui_browser__dialog_yesno(&menu->b, diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f502965..1ea7c92 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -279,6 +279,11 @@ static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) return list_entry(evsel->node.next, struct perf_evsel, node); } +static inline struct perf_evsel *perf_evsel__prev(struct perf_evsel *evsel) +{ + return list_entry(evsel->node.prev, struct perf_evsel, node); +} + /** * perf_evsel__is_group_leader - Return whether given evsel is a leader event * -- cgit v0.10.2 From 37676af15c8d5a9689c9d1220d2a27d510cbe238 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Nov 2013 17:40:36 -0300 Subject: perf symbols: Limit max callchain using max_stack on DWARF unwinding too It was affecting only frame-pointer (fp) based callchain processing. Usage example: perf top --call-graph dwarf,1024 --max-stack 2 Works for any tool that does callchain resolving and provides a --max-stack option. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Waiman Long Link: http://lkml.kernel.org/n/tip-eu45v8s3tq9ruay8tpfyon79@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 0393912..84cdb07 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1368,7 +1368,7 @@ int machine__resolve_callchain(struct machine *machine, return unwind__get_entries(unwind_entry, &callchain_cursor, machine, thread, evsel->attr.sample_regs_user, - sample); + sample, max_stack); } diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c index 5390d0b..0efd539 100644 --- a/tools/perf/util/unwind.c +++ b/tools/perf/util/unwind.c @@ -559,7 +559,7 @@ static unw_accessors_t accessors = { }; static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, - void *arg) + void *arg, int max_stack) { unw_addr_space_t addr_space; unw_cursor_t c; @@ -575,7 +575,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, if (ret) display_error(ret); - while (!ret && (unw_step(&c) > 0)) { + while (!ret && (unw_step(&c) > 0) && max_stack--) { unw_word_t ip; unw_get_reg(&c, UNW_REG_IP, &ip); @@ -588,7 +588,8 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, - u64 sample_uregs, struct perf_sample *data) + u64 sample_uregs, struct perf_sample *data, + int max_stack) { unw_word_t ip; struct unwind_info ui = { @@ -610,5 +611,5 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (ret) return -ENOMEM; - return get_entries(&ui, cb, arg); + return get_entries(&ui, cb, arg, max_stack); } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index ec0c71a..d5966f49 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -18,7 +18,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, u64 sample_uregs, - struct perf_sample *data); + struct perf_sample *data, int max_stack); int unwind__arch_reg_id(int regnum); #else static inline int @@ -27,7 +27,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, struct machine *machine __maybe_unused, struct thread *thread __maybe_unused, u64 sample_uregs __maybe_unused, - struct perf_sample *data __maybe_unused) + struct perf_sample *data __maybe_unused, + int max_stack __maybe_unused) { return 0; } -- cgit v0.10.2 From 48d038fcd09fa231e254965c3b69f8f640c9e62d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Nov 2013 15:30:41 -0300 Subject: perf ui browser: Fix segfault caused by off by one handling END key $ perf record ls $ perf report Press 'down enter end' Result: Program received signal SIGSEGV, Segmentation fault. The UI browser, used on a argv array would access past the end of the array on SEEK_END because it wasn't using 'nr_entries - 1', fix it. Reported-by: v.karpov@samsung.com Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=59291 Link: http://lkml.kernel.org/n/tip-3g83ipasqi219ktv764xzzjs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 3648d4e..cbaa7af 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -569,7 +569,7 @@ void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence) browser->top = browser->top + browser->top_idx + offset; break; case SEEK_END: - browser->top = browser->top + browser->nr_entries + offset; + browser->top = browser->top + browser->nr_entries - 1 + offset; break; default: return; -- cgit v0.10.2 From 35e17b2450e09968f9702d4048c228199af171bc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 28 Oct 2013 12:04:24 +0400 Subject: perf probe: Add '--demangle'/'--no-demangle' You can't pass demangled name into "perf probe", because of special chars: ./perf probe -f -x /tmp/a.out 'foo(int)' Semantic error :There is non-digit char in line number. And you can't even pass without demangling (because it search symbol in DSO with demangle=true): ./perf probe -f -x /tmp/a.out _Z3fooi no symbols found in /tmp/a.out, maybe install a debug package? However: nm /tmp/a.out | grep foo 000000000040056d T _Z3fooi After this patch, using the next command: ./perf probe -f --no-demangle -x /tmp/a.out _Z3fooi probe will be successfully added. Signed-off-by: Azat Khuzhin Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1382947464-31266-1-git-send-email-a3at.mail@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 89acc17..6ea9e85 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -325,6 +325,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) opt_set_filter), OPT_CALLBACK('x', "exec", NULL, "executable|path", "target executable name or path", opt_set_target), + OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, + "Disable symbol demangling"), OPT_END() }; int ret; -- cgit v0.10.2 From 539e6bb71e350541105e67e3d6c31392d9da25ef Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Nov 2013 15:51:34 +0200 Subject: perf record: Add an option to force per-cpu mmaps By default, when tasks are specified (i.e. -p, -t or -u options) per-thread mmaps are created. Add an option to override that and force per-cpu mmaps. Further comments by peterz: So this option allows -t/-p/-u to create one buffer per cpu and attach all the various thread/process/user tasks' their counters to that one buffer? As opposed to the current state where each such counter would have its own buffer. Signed-off-by: Adrian Hunter Tested-by: Sukadev Bhattiprolu Acked-by: Peter Zijlstra Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1383313899-15987-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 052f7c4..43b42c4 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -201,6 +201,12 @@ abort events and some memory events in precise mode on modern Intel CPUs. --transaction:: Record transaction flags for transaction related events. +--force-per-cpu:: +Force the use of per-cpu mmaps. By default, when tasks are specified (i.e. -p, +-t or -u options) per-thread mmaps are created. This option overrides that and +forces per-cpu mmaps. A side-effect of that is that inheritance is +automatically enabled. Add the -i option also to disable inheritance. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4d644fe..7c8020a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -888,6 +888,8 @@ const struct option record_options[] = { "sample by weight (on special events only)"), OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction, "sample transaction flags (special events only)"), + OPT_BOOLEAN(0, "force-per-cpu", &record.opts.target.force_per_cpu, + "force the use of per-cpu mmaps"), OPT_END() }; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5ce2ace..bbc746a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -819,7 +819,9 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->threads == NULL) return -1; - if (target__has_task(target)) + if (target->force_per_cpu) + evlist->cpus = cpu_map__new(target->cpu_list); + else if (target__has_task(target)) evlist->cpus = cpu_map__dummy_new(); else if (!target__has_cpu(target) && !target->uses_mmap) evlist->cpus = cpu_map__dummy_new(); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 18f7c18..46dd4c2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -645,7 +645,7 @@ void perf_evsel__config(struct perf_evsel *evsel, } } - if (target__has_cpu(&opts->target)) + if (target__has_cpu(&opts->target) || opts->target.force_per_cpu) perf_evsel__set_sample_bit(evsel, CPU); if (opts->period) @@ -653,7 +653,7 @@ void perf_evsel__config(struct perf_evsel *evsel, if (!perf_missing_features.sample_id_all && (opts->sample_time || !opts->no_inherit || - target__has_cpu(&opts->target))) + target__has_cpu(&opts->target) || opts->target.force_per_cpu)) perf_evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples) { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 89bab71..2d0c506 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -12,6 +12,7 @@ struct target { uid_t uid; bool system_wide; bool uses_mmap; + bool force_per_cpu; }; enum target_errno { -- cgit v0.10.2