diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-05-20 06:19:20 (GMT) |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-05-20 06:20:14 (GMT) |
commit | 21f77d231fabd33c5de61fbff31818d93203353e (patch) | |
tree | 74bd85f1184b26409605884bf65ae1c1ba5d724c /tools | |
parent | b0a434fb7412937d55f15b8897c5646c81497bbe (diff) | |
parent | a29d5c9b8167dbc21a7ca8c0302e3799f9063b4e (diff) | |
download | linux-21f77d231fabd33c5de61fbff31818d93203353e.tar.xz |
Merge tag 'perf-core-for-mingo-20160516' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
User visible changes:
- Honour the kernel.perf_event_max_stack knob more precisely by not counting
PERF_CONTEXT_{KERNEL,USER} when deciding when to stop adding entries to
the perf_sample->ip_callchain[] array (Arnaldo Carvalho de Melo)
- Fix identation of 'stalled-backend-cycles' in 'perf stat' (Namhyung Kim)
- Update runtime using 'cpu-clock' event in 'perf stat' (Namhyung Kim)
- Use 'cpu-clock' for cpu targets in 'perf stat' (Namhyung Kim)
- Avoid fractional digits for integer scales in 'perf stat' (Andi Kleen)
- Store vdso buildid unconditionally, as it appears in callchains and
we're not checking those when creating the build-id table, so we
end up not being able to resolve VDSO symbols when doing analysis
on a different machine than the one where recording was done, possibly
of a different arch even (arm -> x86_64) (He Kuang)
Infrastructure changes:
- Generalize max_stack sysctl handler, will be used for configuring
multiple kernel knobs related to callchains (Arnaldo Carvalho de Melo)
Cleanups:
- Introduce DSO__NAME_KALLSYMS and DSO__NAME_KCORE, to stop using
open coded strings (Masami Hiramatsu)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/builtin-buildid-cache.c | 8 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 22 | ||||
-rw-r--r-- | tools/perf/perf.c | 3 | ||||
-rw-r--r-- | tools/perf/util/annotate.c | 2 | ||||
-rw-r--r-- | tools/perf/util/build-id.c | 2 | ||||
-rw-r--r-- | tools/perf/util/dso.c | 3 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 28 | ||||
-rw-r--r-- | tools/perf/util/stat-shadow.c | 8 | ||||
-rw-r--r-- | tools/perf/util/symbol.c | 10 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 3 | ||||
-rw-r--r-- | tools/perf/util/util.c | 3 | ||||
-rw-r--r-- | tools/perf/util/util.h | 3 |
12 files changed, 59 insertions, 36 deletions
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 632efc6..d75bded 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -119,8 +119,8 @@ static int build_id_cache__add_kcore(const char *filename, bool force) if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0) return -1; - scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", - buildid_dir, sbuildid); + scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuildid); if (!force && !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { @@ -131,8 +131,8 @@ static int build_id_cache__add_kcore(const char *filename, bool force) if (build_id_cache__kcore_dir(dir, sizeof(dir))) return -1; - scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", - buildid_dir, sbuildid, dir); + scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuildid, dir); if (mkdir_p(to_dir, 0755)) return -1; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e459b68..ee7ada7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -66,6 +66,7 @@ #include <stdlib.h> #include <sys/prctl.h> #include <locale.h> +#include <math.h> #define DEFAULT_SEPARATOR " " #define CNTR_NOT_SUPPORTED "<not supported>" @@ -991,12 +992,12 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) const char *fmt; if (csv_output) { - fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; + fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; } else { if (big_num) - fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; + fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; else - fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; + fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; } aggr_printout(evsel, id, nr); @@ -1909,6 +1910,9 @@ static int add_default_attributes(void) } if (!evsel_list->nr_entries) { + if (target__has_cpu(&target)) + default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) return -1; if (pmu_have_event("cpu", "stalled-cycles-frontend")) { @@ -2000,7 +2004,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_session *session) { - struct stat_round_event *round = &event->stat_round; + struct stat_round_event *stat_round = &event->stat_round; struct perf_evsel *counter; struct timespec tsh, *ts = NULL; const char **argv = session->header.env.cmdline_argv; @@ -2009,12 +2013,12 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, evlist__for_each(evsel_list, counter) perf_stat_process_counter(&stat_config, counter); - if (round->type == PERF_STAT_ROUND_TYPE__FINAL) - update_stats(&walltime_nsecs_stats, round->time); + if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) + update_stats(&walltime_nsecs_stats, stat_round->time); - if (stat_config.interval && round->time) { - tsh.tv_sec = round->time / NSECS_PER_SEC; - tsh.tv_nsec = round->time % NSECS_PER_SEC; + if (stat_config.interval && stat_round->time) { + tsh.tv_sec = stat_round->time / NSECS_PER_SEC; + tsh.tv_nsec = stat_round->time % NSECS_PER_SEC; ts = &tsh; } diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 7970008..15982ce 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -549,6 +549,9 @@ int main(int argc, const char **argv) if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) sysctl_perf_event_max_stack = value; + if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0) + sysctl_perf_event_max_contexts_per_stack = value; + cmd = extract_argv0_path(argv[0]); if (!cmd) cmd = "perf-help"; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4db73d5..b811924 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1122,7 +1122,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) } else if (dso__is_kcore(dso)) { goto fallback; } else if (readlink(symfs_filename, command, sizeof(command)) < 0 || - strstr(command, "[kernel.kallsyms]") || + strstr(command, DSO__NAME_KALLSYMS) || access(symfs_filename, R_OK)) { free(filename); fallback: diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index bff425e..67e5966 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -256,7 +256,7 @@ static int machine__write_buildid_table(struct machine *machine, int fd) size_t name_len; bool in_kernel = false; - if (!pos->hit) + if (!pos->hit && !dso__is_vdso(pos)) continue; if (dso__is_vdso(pos)) { diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 3357479..75b7561 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -7,6 +7,7 @@ #include "auxtrace.h" #include "util.h" #include "debug.h" +#include "vdso.h" char dso__symtab_origin(const struct dso *dso) { @@ -1169,7 +1170,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) struct dso *pos; list_for_each_entry(pos, head, node) { - if (with_hits && !pos->hit) + if (with_hits && !pos->hit && !dso__is_vdso(pos)) continue; if (pos->has_build_id) { have_build_id = true; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 639a290..7ba9fad 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -709,7 +709,7 @@ static struct dso *machine__get_kernel(struct machine *machine) if (machine__is_host(machine)) { vmlinux_name = symbol_conf.vmlinux_name; if (!vmlinux_name) - vmlinux_name = "[kernel.kallsyms]"; + vmlinux_name = DSO__NAME_KALLSYMS; kernel = machine__findnew_kernel(machine, vmlinux_name, "[kernel]", DSO_TYPE_KERNEL); @@ -1811,9 +1811,9 @@ static int thread__resolve_callchain_sample(struct thread *thread, { struct branch_stack *branch = sample->branch_stack; struct ip_callchain *chain = sample->callchain; - int chain_nr = min(max_stack, (int)chain->nr); + int chain_nr = chain->nr; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err; + int i, j, err, nr_entries, nr_contexts; int skip_idx = -1; int first_call = 0; @@ -1828,7 +1828,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. */ - if (chain->nr < sysctl_perf_event_max_stack) + if (chain_nr < sysctl_perf_event_max_stack) skip_idx = arch_skip_callchain_idx(thread, chain); /* @@ -1889,12 +1889,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) { - pr_warning("corrupted callchain. skipping...\n"); - return 0; - } - - for (i = first_call; i < chain_nr; i++) { + for (i = first_call, nr_entries = 0, nr_contexts = 0; + i < chain_nr && nr_entries < max_stack; i++) { u64 ip; if (callchain_param.order == ORDER_CALLEE) @@ -1908,6 +1904,14 @@ check_calls: #endif ip = chain->ips[j]; + if (ip >= PERF_CONTEXT_MAX) { + if (++nr_contexts > sysctl_perf_event_max_contexts_per_stack) + goto out_corrupted_callchain; + } else { + if (++nr_entries > sysctl_perf_event_max_stack) + goto out_corrupted_callchain; + } + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); if (err) @@ -1915,6 +1919,10 @@ check_calls: } return 0; + +out_corrupted_callchain: + pr_warning("corrupted callchain. skipping...\n"); + return 0; } static int unwind_entry(struct unwind_entry *entry, void *arg) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index fdb7196..aa9efe0 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -94,7 +94,8 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, { int ctx = evsel_context(counter); - if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) + if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || + perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) update_stats(&runtime_nsecs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); @@ -188,7 +189,7 @@ static void print_stalled_cycles_backend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio); + out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); } static void print_branch_misses(int cpu, @@ -444,7 +445,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = total / avg; print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); - } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { + } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) || + perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", avg / ratio); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 7fb3330..2252b54 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1662,8 +1662,8 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); - scnprintf(path, sizeof(path), "%s/[kernel.kcore]/%s", buildid_dir, - sbuild_id); + scnprintf(path, sizeof(path), "%s/%s/%s", buildid_dir, + DSO__NAME_KCORE, sbuild_id); /* Use /proc/kallsyms if possible */ if (is_host) { @@ -1699,8 +1699,8 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) if (!find_matching_kcore(map, path, sizeof(path))) return strdup(path); - scnprintf(path, sizeof(path), "%s/[kernel.kallsyms]/%s", - buildid_dir, sbuild_id); + scnprintf(path, sizeof(path), "%s/%s/%s", + buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); if (access(path, F_OK)) { pr_err("No kallsyms or vmlinux with build-id %s was found\n", @@ -1769,7 +1769,7 @@ do_kallsyms: if (err > 0 && !dso__is_kcore(dso)) { dso->binary_type = DSO_BINARY_TYPE__KALLSYMS; - dso__set_long_name(dso, "[kernel.kallsyms]", false); + dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); map__fixup_start(map); map__fixup_end(map); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2b5e4ed..25f2fd67 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -44,6 +44,9 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ #endif +#define DSO__NAME_KALLSYMS "[kernel.kallsyms]" +#define DSO__NAME_KCORE "[kernel.kcore]" + /** struct symbol - symtab entry * * @ignore - resolvable but tools ignore it (e.g. idle routines) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index eab077a..23504ad 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -33,7 +33,8 @@ struct callchain_param callchain_param = { unsigned int page_size; int cacheline_size; -unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; +int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; +int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK; bool test_attr__enabled; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7651633..1e8c316 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -261,7 +261,8 @@ void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; -extern unsigned int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; struct parse_tag { char tag; |