From 361c99a661a78ed22264649440e87fe4fe8da1f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Jan 2011 20:56:53 -0200 Subject: perf evsel: Introduce perf_evlist Killing two more perf wide global variables: nr_counters and evsel_list as a list_head. There are more operations that will need more fields in perf_evlist, like the pollfd for polling all the fds in a list of evsel instances. Use option->value to pass the evsel_list to parse_{events,filters}. LKML-Reference: Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7141c42..f20bc6f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -402,6 +402,7 @@ LIB_H += util/debug.h LIB_H += util/debugfs.h LIB_H += util/event.h LIB_H += util/evsel.h +LIB_H += util/evlist.h LIB_H += util/exec_cmd.h LIB_H += util/types.h LIB_H += util/levenshtein.h @@ -440,6 +441,7 @@ LIB_OBJS += $(OUTPUT)util/ctype.o LIB_OBJS += $(OUTPUT)util/debugfs.o LIB_OBJS += $(OUTPUT)util/environment.o LIB_OBJS += $(OUTPUT)util/event.o +LIB_OBJS += $(OUTPUT)util/evlist.o LIB_OBJS += $(OUTPUT)util/evsel.o LIB_OBJS += $(OUTPUT)util/exec_cmd.o LIB_OBJS += $(OUTPUT)util/help.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b2f729f..252ace8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -18,6 +18,7 @@ #include "util/header.h" #include "util/event.h" +#include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" #include "util/session.h" @@ -66,6 +67,7 @@ static bool sample_address = false; static bool sample_time = false; static bool no_buildid = false; static bool no_buildid_cache = false; +static struct perf_evlist *evsel_list; static long samples = 0; static u64 bytes_written = 0; @@ -229,7 +231,8 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n return h_attr; } -static void create_counter(struct perf_evsel *evsel, int cpu) +static void create_counter(struct perf_evlist *evlist, + struct perf_evsel *evsel, int cpu) { char *filter = evsel->filter; struct perf_event_attr *attr = &evsel->attr; @@ -263,7 +266,7 @@ static void create_counter(struct perf_evsel *evsel, int cpu) attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; - if (nr_counters > 1) + if (evlist->nr_entries > 1) attr->sample_type |= PERF_SAMPLE_ID; /* @@ -410,7 +413,7 @@ try_again: if (evsel->idx || thread_index) { struct perf_evsel *first; - first = list_entry(evsel_list.next, struct perf_evsel, node); + first = list_entry(evlist->entries.next, struct perf_evsel, node); ret = ioctl(FD(evsel, nr_cpu, thread_index), PERF_EVENT_IOC_SET_OUTPUT, FD(first, nr_cpu, 0)); @@ -449,14 +452,14 @@ try_again: sample_type = attr->sample_type; } -static void open_counters(int cpu) +static void open_counters(struct perf_evlist *evlist, int cpu) { struct perf_evsel *pos; group_fd = -1; - list_for_each_entry(pos, &evsel_list, node) - create_counter(pos, cpu); + list_for_each_entry(pos, &evlist->entries, node) + create_counter(evlist, pos, cpu); nr_cpu++; } @@ -481,9 +484,9 @@ static void atexit_header(void) if (!no_buildid) process_buildids(); - perf_header__write(&session->header, output, true); + perf_header__write(&session->header, evsel_list, output, true); perf_session__delete(session); - perf_evsel_list__delete(); + perf_evlist__delete(evsel_list); symbol__exit(); } } @@ -611,7 +614,7 @@ static int __cmd_record(int argc, const char **argv) goto out_delete_session; } - if (have_tracepoints(&evsel_list)) + if (have_tracepoints(&evsel_list->entries)) perf_header__set_feat(&session->header, HEADER_TRACE_INFO); /* @@ -674,10 +677,10 @@ static int __cmd_record(int argc, const char **argv) } if (!system_wide && no_inherit && !cpu_list) { - open_counters(-1); + open_counters(evsel_list, -1); } else { for (i = 0; i < cpus->nr; i++) - open_counters(cpus->map[i]); + open_counters(evsel_list, cpus->map[i]); } perf_session__set_sample_type(session, sample_type); @@ -687,7 +690,8 @@ static int __cmd_record(int argc, const char **argv) if (err < 0) return err; } else if (file_new) { - err = perf_header__write(&session->header, output, false); + err = perf_header__write(&session->header, evsel_list, + output, false); if (err < 0) return err; } @@ -712,7 +716,7 @@ static int __cmd_record(int argc, const char **argv) return err; } - if (have_tracepoints(&evsel_list)) { + if (have_tracepoints(&evsel_list->entries)) { /* * FIXME err <= 0 here actually means that * there were no tracepoints so its not really @@ -721,7 +725,7 @@ static int __cmd_record(int argc, const char **argv) * return this more properly and also * propagate errors that now are calling die() */ - err = event__synthesize_tracing_data(output, &evsel_list, + err = event__synthesize_tracing_data(output, evsel_list, process_synthesized_event, session); if (err <= 0) { @@ -797,7 +801,7 @@ static int __cmd_record(int argc, const char **argv) for (i = 0; i < nr_cpu; i++) { struct perf_evsel *pos; - list_for_each_entry(pos, &evsel_list, node) { + list_for_each_entry(pos, &evsel_list->entries, node) { for (thread = 0; thread < threads->nr; thread++) @@ -838,10 +842,10 @@ static const char * const record_usage[] = { static bool force, append_file; const struct option record_options[] = { - OPT_CALLBACK('e', "event", NULL, "event", + OPT_CALLBACK('e', "event", &evsel_list, "event", "event selector. use 'perf list' to list available events", parse_events), - OPT_CALLBACK(0, "filter", NULL, "filter", + OPT_CALLBACK(0, "filter", &evsel_list, "filter", "event filter", parse_filter), OPT_INTEGER('p', "pid", &target_pid, "record events on existing process id"), @@ -892,6 +896,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) int err = -ENOMEM; struct perf_evsel *pos; + evsel_list = perf_evlist__new(); + if (evsel_list == NULL) + return -ENOMEM; + argc = parse_options(argc, argv, record_options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && target_pid == -1 && target_tid == -1 && @@ -913,7 +921,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) if (no_buildid_cache || no_buildid) disable_buildid_cache(); - if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) { + if (evsel_list->nr_entries == 0 && + perf_evlist__add_default(evsel_list) < 0) { pr_err("Not enough memory for event selector list\n"); goto out_symbol_exit; } @@ -933,7 +942,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) return -1; } - list_for_each_entry(pos, &evsel_list, node) { + list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) goto out_free_fd; if (perf_header__push_event(pos->attr.config, event_name(pos))) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a482a19..da90902 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -43,6 +43,7 @@ #include "util/parse-options.h" #include "util/parse-events.h" #include "util/event.h" +#include "util/evlist.h" #include "util/evsel.h" #include "util/debug.h" #include "util/header.h" @@ -71,6 +72,8 @@ static struct perf_event_attr default_attrs[] = { }; +struct perf_evlist *evsel_list; + static bool system_wide = false; static struct cpu_map *cpus; static int run_idx = 0; @@ -309,7 +312,7 @@ static int run_perf_stat(int argc __used, const char **argv) close(child_ready_pipe[0]); } - list_for_each_entry(counter, &evsel_list, node) { + list_for_each_entry(counter, &evsel_list->entries, node) { if (create_perf_stat_counter(counter) < 0) { if (errno == -EPERM || errno == -EACCES) { error("You may not have permission to collect %sstats.\n" @@ -347,12 +350,12 @@ static int run_perf_stat(int argc __used, const char **argv) update_stats(&walltime_nsecs_stats, t1 - t0); if (no_aggr) { - list_for_each_entry(counter, &evsel_list, node) { + list_for_each_entry(counter, &evsel_list->entries, node) { read_counter(counter); perf_evsel__close_fd(counter, cpus->nr, 1); } } else { - list_for_each_entry(counter, &evsel_list, node) { + list_for_each_entry(counter, &evsel_list->entries, node) { read_counter_aggr(counter); perf_evsel__close_fd(counter, cpus->nr, threads->nr); } @@ -555,10 +558,10 @@ static void print_stat(int argc, const char **argv) } if (no_aggr) { - list_for_each_entry(counter, &evsel_list, node) + list_for_each_entry(counter, &evsel_list->entries, node) print_counter(counter); } else { - list_for_each_entry(counter, &evsel_list, node) + list_for_each_entry(counter, &evsel_list->entries, node) print_counter_aggr(counter); } @@ -610,7 +613,7 @@ static int stat__set_big_num(const struct option *opt __used, } static const struct option options[] = { - OPT_CALLBACK('e', "event", NULL, "event", + OPT_CALLBACK('e', "event", &evsel_list, "event", "event selector. use 'perf list' to list available events", parse_events), OPT_BOOLEAN('i', "no-inherit", &no_inherit, @@ -648,6 +651,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) setlocale(LC_ALL, ""); + evsel_list = perf_evlist__new(); + if (evsel_list == NULL) + return -ENOMEM; + argc = parse_options(argc, argv, options, stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -679,17 +686,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) usage_with_options(stat_usage, options); /* Set attrs and nr_counters if no event is selected and !null_run */ - if (!null_run && !nr_counters) { + if (!null_run && !evsel_list->nr_entries) { size_t c; - nr_counters = ARRAY_SIZE(default_attrs); - for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { - pos = perf_evsel__new(&default_attrs[c], - nr_counters); + pos = perf_evsel__new(&default_attrs[c], c); if (pos == NULL) goto out; - list_add(&pos->node, &evsel_list); + perf_evlist__add(evsel_list, pos); } } @@ -713,7 +717,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) return -1; } - list_for_each_entry(pos, &evsel_list, node) { + list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_stat_priv(pos) < 0 || perf_evsel__alloc_counts(pos, cpus->nr) < 0 || perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) @@ -741,9 +745,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) if (status != -1) print_stat(argc, argv); out_free_fd: - list_for_each_entry(pos, &evsel_list, node) + list_for_each_entry(pos, &evsel_list->entries, node) perf_evsel__free_stat_priv(pos); - perf_evsel_list__delete(); + perf_evlist__delete(evsel_list); out: thread_map__delete(threads); threads = NULL; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b6998e0..216b62e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -21,6 +21,7 @@ #include "perf.h" #include "util/color.h" +#include "util/evlist.h" #include "util/evsel.h" #include "util/session.h" #include "util/symbol.h" @@ -60,6 +61,8 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) +struct perf_evlist *evsel_list; + static bool system_wide = false; static int default_interval = 0; @@ -267,7 +270,7 @@ static void __zero_source_counters(struct sym_entry *syme) line = syme->src->lines; while (line) { - for (i = 0; i < nr_counters; i++) + for (i = 0; i < evsel_list->nr_entries; i++) line->count[i] = 0; line = line->next; } @@ -414,7 +417,7 @@ static double sym_weight(const struct sym_entry *sym) if (!display_weighted) return weight; - for (counter = 1; counter < nr_counters-1; counter++) + for (counter = 1; counter < evsel_list->nr_entries - 1; counter++) weight *= sym->count[counter]; weight /= (sym->count[counter] + 1); @@ -501,7 +504,7 @@ static void print_sym_table(void) rb_insert_active_sym(&tmp, syme); sum_ksamples += syme->snap_count; - for (j = 0; j < nr_counters; j++) + for (j = 0; j < evsel_list->nr_entries; j++) syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; } else list_remove_active_sym(syme); @@ -535,9 +538,9 @@ static void print_sym_table(void) esamples_percent); } - if (nr_counters == 1 || !display_weighted) { + if (evsel_list->nr_entries == 1 || !display_weighted) { struct perf_evsel *first; - first = list_entry(evsel_list.next, struct perf_evsel, node); + first = list_entry(evsel_list->entries.next, struct perf_evsel, node); printf("%" PRIu64, (uint64_t)first->attr.sample_period); if (freq) printf("Hz "); @@ -547,7 +550,7 @@ static void print_sym_table(void) if (!display_weighted) printf("%s", event_name(sym_evsel)); - else list_for_each_entry(counter, &evsel_list, node) { + else list_for_each_entry(counter, &evsel_list->entries, node) { if (counter->idx) printf("/"); @@ -606,7 +609,7 @@ static void print_sym_table(void) sym_width = winsize.ws_col - dso_width - 29; } putchar('\n'); - if (nr_counters == 1) + if (evsel_list->nr_entries == 1) printf(" samples pcnt"); else printf(" weight samples pcnt"); @@ -615,7 +618,7 @@ static void print_sym_table(void) printf(" RIP "); printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); printf(" %s _______ _____", - nr_counters == 1 ? " " : "______"); + evsel_list->nr_entries == 1 ? " " : "______"); if (verbose) printf(" ________________"); printf(" %-*.*s", sym_width, sym_width, graph_line); @@ -634,7 +637,7 @@ static void print_sym_table(void) pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / sum_ksamples)); - if (nr_counters == 1 || !display_weighted) + if (evsel_list->nr_entries == 1 || !display_weighted) printf("%20.2f ", syme->weight); else printf("%9.1f %10ld ", syme->weight, syme->snap_count); @@ -744,7 +747,7 @@ static void print_mapped_keys(void) fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); - if (nr_counters > 1) + if (evsel_list->nr_entries > 1) fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel)); fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); @@ -753,7 +756,7 @@ static void print_mapped_keys(void) fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); - if (nr_counters > 1) + if (evsel_list->nr_entries > 1) fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); fprintf(stdout, @@ -783,7 +786,7 @@ static int key_mapped(int c) return 1; case 'E': case 'w': - return nr_counters > 1 ? 1 : 0; + return evsel_list->nr_entries > 1 ? 1 : 0; default: break; } @@ -831,22 +834,22 @@ static void handle_keypress(struct perf_session *session, int c) signal(SIGWINCH, SIG_DFL); break; case 'E': - if (nr_counters > 1) { + if (evsel_list->nr_entries > 1) { fprintf(stderr, "\nAvailable events:"); - list_for_each_entry(sym_evsel, &evsel_list, node) + list_for_each_entry(sym_evsel, &evsel_list->entries, node) fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel)); prompt_integer(&sym_counter, "Enter details event counter"); - if (sym_counter >= nr_counters) { - sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node); + if (sym_counter >= evsel_list->nr_entries) { + sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node); sym_counter = 0; fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel)); sleep(1); break; } - list_for_each_entry(sym_evsel, &evsel_list, node) + list_for_each_entry(sym_evsel, &evsel_list->entries, node) if (sym_evsel->idx == sym_counter) break; } else sym_counter = 0; @@ -1198,7 +1201,7 @@ static void perf_session__mmap_read(struct perf_session *self) int i, thread_index; for (i = 0; i < cpus->nr; i++) { - list_for_each_entry(counter, &evsel_list, node) { + list_for_each_entry(counter, &evsel_list->entries, node) { for (thread_index = 0; thread_index < threads->nr; thread_index++) { @@ -1312,7 +1315,7 @@ static int __cmd_top(void) for (i = 0; i < cpus->nr; i++) { group_fd = -1; - list_for_each_entry(counter, &evsel_list, node) + list_for_each_entry(counter, &evsel_list->entries, node) start_counter(i, counter); } @@ -1354,7 +1357,7 @@ static const char * const top_usage[] = { }; static const struct option options[] = { - OPT_CALLBACK('e', "event", NULL, "event", + OPT_CALLBACK('e', "event", &evsel_list, "event", "event selector. use 'perf list' to list available events", parse_events), OPT_INTEGER('c', "count", &default_interval, @@ -1404,6 +1407,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) struct perf_evsel *pos; int status = -ENOMEM; + evsel_list = perf_evlist__new(); + if (evsel_list == NULL) + return -ENOMEM; + page_size = sysconf(_SC_PAGE_SIZE); argc = parse_options(argc, argv, options, top_usage, 0); @@ -1431,7 +1438,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) cpu_list = NULL; } - if (!nr_counters && perf_evsel_list__create_default() < 0) { + if (!evsel_list->nr_entries && + perf_evlist__add_default(evsel_list) < 0) { pr_err("Not enough memory for event selector list\n"); return -ENOMEM; } @@ -1459,7 +1467,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (cpus == NULL) usage_with_options(top_usage, options); - list_for_each_entry(pos, &evsel_list, node) { + list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 || perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) goto out_free_fd; @@ -1472,10 +1480,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = default_interval; } - sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node); + sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node); symbol_conf.priv_size = (sizeof(struct sym_entry) + - (nr_counters + 1) * sizeof(unsigned long)); + (evsel_list->nr_entries + 1) * sizeof(unsigned long)); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); if (symbol__init() < 0) @@ -1489,9 +1497,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) status = __cmd_top(); out_free_fd: - list_for_each_entry(pos, &evsel_list, node) + list_for_each_entry(pos, &evsel_list->entries, node) perf_evsel__free_mmap(pos); - perf_evsel_list__delete(); + perf_evlist__delete(evsel_list); return status; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c new file mode 100644 index 0000000..7b4faec --- /dev/null +++ b/tools/perf/util/evlist.c @@ -0,0 +1,53 @@ +#include "evlist.h" +#include "evsel.h" +#include "util.h" + +struct perf_evlist *perf_evlist__new(void) +{ + struct perf_evlist *evlist = zalloc(sizeof(*evlist)); + + if (evlist != NULL) { + INIT_LIST_HEAD(&evlist->entries); + } + + return evlist; +} + +static void perf_evlist__purge(struct perf_evlist *evlist) +{ + struct perf_evsel *pos, *n; + + list_for_each_entry_safe(pos, n, &evlist->entries, node) { + list_del_init(&pos->node); + perf_evsel__delete(pos); + } + + evlist->nr_entries = 0; +} + +void perf_evlist__delete(struct perf_evlist *evlist) +{ + perf_evlist__purge(evlist); + free(evlist); +} + +void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) +{ + list_add_tail(&entry->node, &evlist->entries); + ++evlist->nr_entries; +} + +int perf_evlist__add_default(struct perf_evlist *evlist) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + }; + struct perf_evsel *evsel = perf_evsel__new(&attr, 0); + + if (evsel == NULL) + return -ENOMEM; + + perf_evlist__add(evlist, evsel); + return 0; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h new file mode 100644 index 0000000..48db91a --- /dev/null +++ b/tools/perf/util/evlist.h @@ -0,0 +1,19 @@ +#ifndef __PERF_EVLIST_H +#define __PERF_EVLIST_H 1 + +#include + +struct perf_evlist { + struct list_head entries; + int nr_entries; +}; + +struct perf_evsel; + +struct perf_evlist *perf_evlist__new(void); +void perf_evlist__delete(struct perf_evlist *evlist); + +void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); +int perf_evlist__add_default(struct perf_evlist *evlist); + +#endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f6a929e..f0138d4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -8,6 +8,7 @@ #include #include +#include "evlist.h" #include "util.h" #include "header.h" #include "../perf.h" @@ -428,7 +429,8 @@ static bool perf_session__read_build_ids(struct perf_session *self, bool with_hi return ret; } -static int perf_header__adds_write(struct perf_header *self, int fd) +static int perf_header__adds_write(struct perf_header *self, + struct perf_evlist *evlist, int fd) { int nr_sections; struct perf_session *session; @@ -463,7 +465,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd) /* Write trace info */ trace_sec->offset = lseek(fd, 0, SEEK_CUR); - read_tracing_data(fd, &evsel_list); + read_tracing_data(fd, &evlist->entries); trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; } @@ -513,7 +515,8 @@ int perf_header__write_pipe(int fd) return 0; } -int perf_header__write(struct perf_header *self, int fd, bool at_exit) +int perf_header__write(struct perf_header *self, struct perf_evlist *evlist, + int fd, bool at_exit) { struct perf_file_header f_header; struct perf_file_attr f_attr; @@ -566,7 +569,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit) self->data_offset = lseek(fd, 0, SEEK_CUR); if (at_exit) { - err = perf_header__adds_write(self, fd); + err = perf_header__adds_write(self, evlist, fd); if (err < 0) return err; } @@ -1133,7 +1136,7 @@ int event__process_event_type(event_t *self, return 0; } -int event__synthesize_tracing_data(int fd, struct list_head *pattrs, +int event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, event__handler_t process, struct perf_session *session __unused) { @@ -1144,7 +1147,7 @@ int event__synthesize_tracing_data(int fd, struct list_head *pattrs, memset(&ev, 0, sizeof(ev)); ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; - size = read_tracing_data_size(fd, pattrs); + size = read_tracing_data_size(fd, &evlist->entries); if (size <= 0) return size; aligned_size = ALIGN(size, sizeof(u64)); @@ -1154,7 +1157,7 @@ int event__synthesize_tracing_data(int fd, struct list_head *pattrs, process(&ev, NULL, session); - err = read_tracing_data(fd, pattrs); + err = read_tracing_data(fd, &evlist->entries); write_padded(fd, NULL, 0, padding); return aligned_size; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 33f16be..65afd7f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -65,8 +65,11 @@ struct perf_header { int perf_header__init(struct perf_header *self); void perf_header__exit(struct perf_header *self); +struct perf_evlist; + int perf_header__read(struct perf_session *session, int fd); -int perf_header__write(struct perf_header *self, int fd, bool at_exit); +int perf_header__write(struct perf_header *self, struct perf_evlist *evlist, + int fd, bool at_exit); int perf_header__write_pipe(int fd); int perf_header__add_attr(struct perf_header *self, @@ -113,7 +116,7 @@ int event__synthesize_event_types(event__handler_t process, int event__process_event_type(event_t *self, struct perf_session *session); -int event__synthesize_tracing_data(int fd, struct list_head *pattrs, +int event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, event__handler_t process, struct perf_session *session); int event__process_tracing_data(event_t *self, diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h index f5ca26e..356c7e4 100644 --- a/tools/perf/util/include/linux/list.h +++ b/tools/perf/util/include/linux/list.h @@ -1,3 +1,4 @@ +#include #include "../../../../include/linux/list.h" #ifndef PERF_LIST_H diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 135f69b..d3086ce 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,6 +1,7 @@ #include "../../../include/linux/hw_breakpoint.h" #include "util.h" #include "../perf.h" +#include "evlist.h" #include "evsel.h" #include "parse-options.h" #include "parse-events.h" @@ -11,10 +12,6 @@ #include "header.h" #include "debugfs.h" -int nr_counters; - -LIST_HEAD(evsel_list); - struct event_symbol { u8 type; u64 config; @@ -778,8 +775,9 @@ modifier: return ret; } -int parse_events(const struct option *opt __used, const char *str, int unset __used) +int parse_events(const struct option *opt, const char *str, int unset __used) { + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; struct perf_event_attr attr; enum event_result ret; @@ -794,12 +792,10 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u if (ret != EVT_HANDLED_ALL) { struct perf_evsel *evsel; - evsel = perf_evsel__new(&attr, - nr_counters); + evsel = perf_evsel__new(&attr, evlist->nr_entries); if (evsel == NULL) return -1; - list_add_tail(&evsel->node, &evsel_list); - ++nr_counters; + perf_evlist__add(evlist, evsel); } if (*str == 0) @@ -813,13 +809,14 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u return 0; } -int parse_filter(const struct option *opt __used, const char *str, +int parse_filter(const struct option *opt, const char *str, int unset __used) { + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; struct perf_evsel *last = NULL; - if (!list_empty(&evsel_list)) - last = list_entry(evsel_list.prev, struct perf_evsel, node); + if (evlist->nr_entries > 0) + last = list_entry(evlist->entries.prev, struct perf_evsel, node); if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { fprintf(stderr, @@ -981,33 +978,3 @@ void print_events(void) exit(129); } - -int perf_evsel_list__create_default(void) -{ - struct perf_evsel *evsel; - struct perf_event_attr attr; - - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_HARDWARE; - attr.config = PERF_COUNT_HW_CPU_CYCLES; - - evsel = perf_evsel__new(&attr, 0); - - if (evsel == NULL) - return -ENOMEM; - - list_add(&evsel->node, &evsel_list); - ++nr_counters; - return 0; -} - -void perf_evsel_list__delete(void) -{ - struct perf_evsel *pos, *n; - - list_for_each_entry_safe(pos, n, &evsel_list, node) { - list_del_init(&pos->node); - perf_evsel__delete(pos); - } - nr_counters = 0; -} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 458e3ec..cf7e94a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -9,11 +9,6 @@ struct list_head; struct perf_evsel; -extern struct list_head evsel_list; - -int perf_evsel_list__create_default(void); -void perf_evsel_list__delete(void); - struct option; struct tracepoint_path { @@ -25,8 +20,6 @@ struct tracepoint_path { extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern bool have_tracepoints(struct list_head *evlist); -extern int nr_counters; - const char *event_name(struct perf_evsel *event); extern const char *__event_name(int type, u64 config); -- cgit v0.10.2 From 5c581041cf97aa7980b442de81ddea8273d6dcde Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Jan 2011 22:30:02 -0200 Subject: perf evlist: Adopt the pollfd array Allocating just the space needed for nr_cpus * nr_threads * nr_evsels, not the MAX_NR_CPUS and counters. LKML-Reference: Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 252ace8..1614d89 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -72,9 +72,6 @@ static struct perf_evlist *evsel_list; static long samples = 0; static u64 bytes_written = 0; -static struct pollfd *event_array; - -static int nr_poll = 0; static int nr_cpu = 0; static int file_new = 1; @@ -432,9 +429,9 @@ try_again: exit(-1); } - event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index); - event_array[nr_poll].events = POLLIN; - nr_poll++; + evlist->pollfd[evlist->nr_fds].fd = FD(evsel, nr_cpu, thread_index); + evlist->pollfd[evlist->nr_fds].events = POLLIN; + evlist->nr_fds++; } if (filter != NULL) { @@ -793,7 +790,7 @@ static int __cmd_record(int argc, const char **argv) if (hits == samples) { if (done) break; - err = poll(event_array, nr_poll, -1); + err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1); waking++; } @@ -948,9 +945,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) if (perf_header__push_event(pos->attr.config, event_name(pos))) goto out_free_fd; } - event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS * - MAX_COUNTERS * threads->nr)); - if (!event_array) + + if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0) goto out_free_fd; if (user_interval != ULLONG_MAX) @@ -968,13 +964,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) } else { fprintf(stderr, "frequency and count are zero, aborting\n"); err = -EINVAL; - goto out_free_event_array; + goto out_free_fd; } err = __cmd_record(argc, argv); -out_free_event_array: - free(event_array); out_free_fd: thread_map__delete(threads); threads = NULL; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 216b62e..1bc4652 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1193,8 +1193,6 @@ static void perf_session__mmap_read_counter(struct perf_session *self, md->prev = old; } -static struct pollfd *event_array; - static void perf_session__mmap_read(struct perf_session *self) { struct perf_evsel *counter; @@ -1212,10 +1210,10 @@ static void perf_session__mmap_read(struct perf_session *self) } } -int nr_poll; int group_fd; -static void start_counter(int i, struct perf_evsel *evsel) +static void start_counter(int i, struct perf_evlist *evlist, + struct perf_evsel *evsel) { struct xyarray *mmap_array = evsel->priv; struct mmap_data *mm; @@ -1281,9 +1279,9 @@ try_again: if (group && group_fd == -1) group_fd = FD(evsel, i, thread_index); - event_array[nr_poll].fd = FD(evsel, i, thread_index); - event_array[nr_poll].events = POLLIN; - nr_poll++; + evlist->pollfd[evlist->nr_fds].fd = FD(evsel, i, thread_index); + evlist->pollfd[evlist->nr_fds].events = POLLIN; + evlist->nr_fds++; mm = xyarray__entry(mmap_array, i, thread_index); mm->prev = 0; @@ -1316,11 +1314,11 @@ static int __cmd_top(void) for (i = 0; i < cpus->nr; i++) { group_fd = -1; list_for_each_entry(counter, &evsel_list->entries, node) - start_counter(i, counter); + start_counter(i, evsel_list, counter); } /* Wait for a minimal set of events before starting the snapshot */ - poll(&event_array[0], nr_poll, 100); + poll(evsel_list->pollfd, evsel_list->nr_fds, 100); perf_session__mmap_read(session); @@ -1345,7 +1343,7 @@ static int __cmd_top(void) perf_session__mmap_read(session); if (hits == samples) - ret = poll(event_array, nr_poll, 100); + ret = poll(evsel_list->pollfd, evsel_list->nr_fds, 100); } return 0; @@ -1426,11 +1424,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) usage_with_options(top_usage, options); } - event_array = malloc((sizeof(struct pollfd) * - MAX_NR_CPUS * MAX_COUNTERS * threads->nr)); - if (!event_array) - return -ENOMEM; - /* CPU and PID are mutually exclusive */ if (target_tid > 0 && cpu_list) { printf("WARNING: PID switch overriding CPU\n"); @@ -1480,6 +1473,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = default_interval; } + if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0) + goto out_free_fd; + sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node); symbol_conf.priv_size = (sizeof(struct sym_entry) + diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7b4faec..2abf949 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1,3 +1,4 @@ +#include #include "evlist.h" #include "evsel.h" #include "util.h" @@ -28,6 +29,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__delete(struct perf_evlist *evlist) { perf_evlist__purge(evlist); + free(evlist->pollfd); free(evlist); } @@ -51,3 +53,10 @@ int perf_evlist__add_default(struct perf_evlist *evlist) perf_evlist__add(evlist, evsel); return 0; } + +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads) +{ + int nfds = ncpus * nthreads * evlist->nr_entries; + evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); + return evlist->pollfd != NULL ? 0 : -ENOMEM; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 48db91a..a7d7e12 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -3,9 +3,13 @@ #include +struct pollfd; + struct perf_evlist { struct list_head entries; int nr_entries; + int nr_fds; + struct pollfd *pollfd; }; struct perf_evsel; @@ -16,4 +20,6 @@ void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads); + #endif /* __PERF_EVLIST_H */ -- cgit v0.10.2 From f08199d314458610d4ca52f8e86e0a4ec7a7bc54 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Jan 2011 23:42:19 -0200 Subject: perf evsel: Support event groups The perf_evsel__open now have an extra boolean argument specifying if event grouping is desired. The first file descriptor created on a CPU becomes the group leader. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index da90902..b5fe522 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -169,7 +169,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) - return perf_evsel__open_per_cpu(evsel, cpus); + return perf_evsel__open_per_cpu(evsel, cpus, false); attr->inherit = !no_inherit; if (target_pid == -1 && target_tid == -1) { @@ -177,7 +177,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->enable_on_exec = 1; } - return perf_evsel__open_per_thread(evsel, threads); + return perf_evsel__open_per_thread(evsel, threads, false); } /* diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 5dcdba6..4282d67 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -289,7 +289,7 @@ static int test__open_syscall_event(void) goto out_thread_map_delete; } - if (perf_evsel__open_per_thread(evsel, threads) < 0) { + if (perf_evsel__open_per_thread(evsel, threads, false) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -364,7 +364,7 @@ static int test__open_syscall_event_on_all_cpus(void) goto out_thread_map_delete; } - if (perf_evsel__open(evsel, cpus, threads) < 0) { + if (perf_evsel__open(evsel, cpus, threads, false) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f5cfed6..da473ec 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -128,7 +128,7 @@ int __perf_evsel__read(struct perf_evsel *evsel, } static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads) + struct thread_map *threads, bool group) { int cpu, thread; @@ -137,12 +137,18 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, return -1; for (cpu = 0; cpu < cpus->nr; cpu++) { + int group_fd = -1; + for (thread = 0; thread < threads->nr; thread++) { FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, threads->map[thread], - cpus->map[cpu], -1, 0); + cpus->map[cpu], + group_fd, 0); if (FD(evsel, cpu, thread) < 0) goto out_close; + + if (group && group_fd == -1) + group_fd = FD(evsel, cpu, thread); } } @@ -175,10 +181,9 @@ static struct { .threads = { -1, }, }; -int perf_evsel__open(struct perf_evsel *evsel, - struct cpu_map *cpus, struct thread_map *threads) +int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, + struct thread_map *threads, bool group) { - if (cpus == NULL) { /* Work around old compiler warnings about strict aliasing */ cpus = &empty_cpu_map.map; @@ -187,15 +192,17 @@ int perf_evsel__open(struct perf_evsel *evsel, if (threads == NULL) threads = &empty_thread_map.map; - return __perf_evsel__open(evsel, cpus, threads); + return __perf_evsel__open(evsel, cpus, threads, group); } -int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) +int perf_evsel__open_per_cpu(struct perf_evsel *evsel, + struct cpu_map *cpus, bool group) { - return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group); } -int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) +int perf_evsel__open_per_thread(struct perf_evsel *evsel, + struct thread_map *threads, bool group) { - return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b2d755f..0962b50 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -45,10 +45,12 @@ int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); void perf_evsel__free_fd(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); -int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus); -int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads); -int perf_evsel__open(struct perf_evsel *evsel, - struct cpu_map *cpus, struct thread_map *threads); +int perf_evsel__open_per_cpu(struct perf_evsel *evsel, + struct cpu_map *cpus, bool group); +int perf_evsel__open_per_thread(struct perf_evsel *evsel, + struct thread_map *threads, bool group); +int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, + struct thread_map *threads, bool group); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ -- cgit v0.10.2 From 9d04f1781772e11bd58806391555fc23ebb54377 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2011 00:08:18 -0200 Subject: perf evsel: Allow specifying if the inherit bit should be set As this is a per-cpu attribute, we can't set it up in advance and use it for all the calls. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b5fe522..e2a2d02 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -169,7 +169,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) - return perf_evsel__open_per_cpu(evsel, cpus, false); + return perf_evsel__open_per_cpu(evsel, cpus, false, false); attr->inherit = !no_inherit; if (target_pid == -1 && target_tid == -1) { @@ -177,7 +177,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->enable_on_exec = 1; } - return perf_evsel__open_per_thread(evsel, threads, false); + return perf_evsel__open_per_thread(evsel, threads, false, false); } /* diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 4282d67..7287158 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -289,7 +289,7 @@ static int test__open_syscall_event(void) goto out_thread_map_delete; } - if (perf_evsel__open_per_thread(evsel, threads, false) < 0) { + if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -364,7 +364,7 @@ static int test__open_syscall_event_on_all_cpus(void) goto out_thread_map_delete; } - if (perf_evsel__open(evsel, cpus, threads, false) < 0) { + if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index da473ec..82a0053 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -128,7 +128,7 @@ int __perf_evsel__read(struct perf_evsel *evsel, } static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group) + struct thread_map *threads, bool group, bool inherit) { int cpu, thread; @@ -139,6 +139,8 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, for (cpu = 0; cpu < cpus->nr; cpu++) { int group_fd = -1; + evsel->attr.inherit = (cpus->map[cpu] < 0) && inherit; + for (thread = 0; thread < threads->nr; thread++) { FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, threads->map[thread], @@ -182,7 +184,7 @@ static struct { }; int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group) + struct thread_map *threads, bool group, bool inherit) { if (cpus == NULL) { /* Work around old compiler warnings about strict aliasing */ @@ -192,17 +194,17 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, if (threads == NULL) threads = &empty_thread_map.map; - return __perf_evsel__open(evsel, cpus, threads, group); + return __perf_evsel__open(evsel, cpus, threads, group, inherit); } int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group) + struct cpu_map *cpus, bool group, bool inherit) { - return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group); + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit); } int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group) + struct thread_map *threads, bool group, bool inherit) { - return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group); + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0962b50..1594696 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -46,11 +46,11 @@ void perf_evsel__free_fd(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group); + struct cpu_map *cpus, bool group, bool inherit); int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group); + struct thread_map *threads, bool group, bool inherit); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group); + struct thread_map *threads, bool group, bool inherit); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ -- cgit v0.10.2 From 72cb7013e08dec29631e0447f9496b7bacd3e14b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2011 10:52:47 -0200 Subject: perf top: Use perf_evsel__open Now that it handles group_fd and inherit we can use it, sharing it with stat. Next step: 'perf record' should use, then move the mmap_array out of ->priv and into perf_evsel, with top and record sharing this, and at the same time, write a 'perf test' stress test. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1bc4652..15d89be 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1210,39 +1210,50 @@ static void perf_session__mmap_read(struct perf_session *self) } } -int group_fd; - static void start_counter(int i, struct perf_evlist *evlist, struct perf_evsel *evsel) { struct xyarray *mmap_array = evsel->priv; struct mmap_data *mm; - struct perf_event_attr *attr; - int cpu = -1; int thread_index; - if (target_tid == -1) - cpu = cpus->map[i]; - - attr = &evsel->attr; + for (thread_index = 0; thread_index < threads->nr; thread_index++) { + assert(FD(evsel, i, thread_index) >= 0); + fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK); - attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + evlist->pollfd[evlist->nr_fds].fd = FD(evsel, i, thread_index); + evlist->pollfd[evlist->nr_fds].events = POLLIN; + evlist->nr_fds++; - if (freq) { - attr->sample_type |= PERF_SAMPLE_PERIOD; - attr->freq = 1; - attr->sample_freq = freq; + mm = xyarray__entry(mmap_array, i, thread_index); + mm->prev = 0; + mm->mask = mmap_pages*page_size - 1; + mm->base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0); + if (mm->base == MAP_FAILED) + die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } +} + +static void start_counters(struct perf_evlist *evlist) +{ + struct perf_evsel *counter; + int i; - attr->inherit = (cpu < 0) && inherit; - attr->mmap = 1; + list_for_each_entry(counter, &evlist->entries, node) { + struct perf_event_attr *attr = &counter->attr; - for (thread_index = 0; thread_index < threads->nr; thread_index++) { -try_again: - FD(evsel, i, thread_index) = sys_perf_event_open(attr, - threads->map[thread_index], cpu, group_fd, 0); + attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + + if (freq) { + attr->sample_type |= PERF_SAMPLE_PERIOD; + attr->freq = 1; + attr->sample_freq = freq; + } - if (FD(evsel, i, thread_index) < 0) { + attr->mmap = 1; +try_again: + if (perf_evsel__open(counter, cpus, threads, group, inherit) < 0) { int err = errno; if (err == EPERM || err == EACCES) @@ -1254,8 +1265,8 @@ try_again: * based cpu-clock-tick sw counter, which * is always available even if no PMU support: */ - if (attr->type == PERF_TYPE_HARDWARE - && attr->config == PERF_COUNT_HW_CPU_CYCLES) { + if (attr->type == PERF_TYPE_HARDWARE && + attr->config == PERF_COUNT_HW_CPU_CYCLES) { if (verbose) warning(" ... trying to fall back to cpu-clock-ticks\n"); @@ -1265,39 +1276,24 @@ try_again: goto try_again; } printf("\n"); - error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", - FD(evsel, i, thread_index), strerror(err)); + error("sys_perf_event_open() syscall returned with %d " + "(%s). /bin/dmesg may provide additional information.\n", + err, strerror(err)); die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } - assert(FD(evsel, i, thread_index) >= 0); - fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK); - - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = FD(evsel, i, thread_index); - - evlist->pollfd[evlist->nr_fds].fd = FD(evsel, i, thread_index); - evlist->pollfd[evlist->nr_fds].events = POLLIN; - evlist->nr_fds++; + } - mm = xyarray__entry(mmap_array, i, thread_index); - mm->prev = 0; - mm->mask = mmap_pages*page_size - 1; - mm->base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0); - if (mm->base == MAP_FAILED) - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); + for (i = 0; i < cpus->nr; i++) { + list_for_each_entry(counter, &evlist->entries, node) + start_counter(i, evsel_list, counter); } } static int __cmd_top(void) { pthread_t thread; - struct perf_evsel *counter; - int i, ret; + int ret; /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. @@ -1311,11 +1307,7 @@ static int __cmd_top(void) else event__synthesize_threads(event__process, session); - for (i = 0; i < cpus->nr; i++) { - group_fd = -1; - list_for_each_entry(counter, &evsel_list->entries, node) - start_counter(i, evsel_list, counter); - } + start_counters(evsel_list); /* Wait for a minimal set of events before starting the snapshot */ poll(evsel_list->pollfd, evsel_list->nr_fds, 100); -- cgit v0.10.2 From dd7927f4f8ee75b032ff15aeef4bda49719a443a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2011 14:28:51 -0200 Subject: perf record: Use perf_evsel__open Now its time to factor out the mmap handling bits into the perf_evsel class. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1614d89..ec43f2e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -72,8 +72,6 @@ static struct perf_evlist *evsel_list; static long samples = 0; static u64 bytes_written = 0; -static int nr_cpu = 0; - static int file_new = 1; static off_t post_processing_offset; @@ -208,8 +206,6 @@ static void sig_atexit(void) kill(getpid(), signr); } -static int group_fd; - static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) { struct perf_header_attr *h_attr; @@ -234,7 +230,6 @@ static void create_counter(struct perf_evlist *evlist, char *filter = evsel->filter; struct perf_event_attr *attr = &evsel->attr; struct perf_header_attr *h_attr; - int track = !evsel->idx; /* only the first counter needs these */ int thread_index; int ret; struct { @@ -243,19 +238,77 @@ static void create_counter(struct perf_evlist *evlist, u64 time_running; u64 id; } read_data; - /* - * Check if parse_single_tracepoint_event has already asked for - * PERF_SAMPLE_TIME. - * - * XXX this is kludgy but short term fix for problems introduced by - * eac23d1c that broke 'perf script' by having different sample_types - * when using multiple tracepoint events when we use a perf binary - * that tries to use sample_id_all on an older kernel. - * - * We need to move counter creation to perf_session, support - * different sample_types, etc. - */ - bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; + + for (thread_index = 0; thread_index < threads->nr; thread_index++) { + h_attr = get_header_attr(attr, evsel->idx); + if (h_attr == NULL) + die("nomem\n"); + + if (!file_new) { + if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { + fprintf(stderr, "incompatible append\n"); + exit(-1); + } + } + + if (read(FD(evsel, cpu, thread_index), &read_data, sizeof(read_data)) == -1) { + perror("Unable to read perf file descriptor"); + exit(-1); + } + + if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { + pr_warning("Not enough memory to add id\n"); + exit(-1); + } + + assert(FD(evsel, cpu, thread_index) >= 0); + fcntl(FD(evsel, cpu, thread_index), F_SETFL, O_NONBLOCK); + + if (evsel->idx || thread_index) { + struct perf_evsel *first; + first = list_entry(evlist->entries.next, struct perf_evsel, node); + ret = ioctl(FD(evsel, cpu, thread_index), + PERF_EVENT_IOC_SET_OUTPUT, + FD(first, cpu, 0)); + if (ret) { + error("failed to set output: %d (%s)\n", errno, + strerror(errno)); + exit(-1); + } + } else { + mmap_array[cpu].prev = 0; + mmap_array[cpu].mask = mmap_pages*page_size - 1; + mmap_array[cpu].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, cpu, thread_index), 0); + if (mmap_array[cpu].base == MAP_FAILED) { + error("failed to mmap with %d (%s)\n", errno, strerror(errno)); + exit(-1); + } + + evlist->pollfd[evlist->nr_fds].fd = FD(evsel, cpu, thread_index); + evlist->pollfd[evlist->nr_fds].events = POLLIN; + evlist->nr_fds++; + } + + if (filter != NULL) { + ret = ioctl(FD(evsel, cpu, thread_index), + PERF_EVENT_IOC_SET_FILTER, filter); + if (ret) { + error("failed to set filter with %d (%s)\n", errno, + strerror(errno)); + exit(-1); + } + } + } + + if (!sample_type) + sample_type = attr->sample_type; +} + +static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist) +{ + struct perf_event_attr *attr = &evsel->attr; + int track = !evsel->idx; /* only the first counter needs these */ attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING | @@ -315,19 +368,39 @@ static void create_counter(struct perf_evlist *evlist, attr->mmap = track; attr->comm = track; - attr->inherit = !no_inherit; + if (target_pid == -1 && target_tid == -1 && !system_wide) { attr->disabled = 1; attr->enable_on_exec = 1; } -retry_sample_id: - attr->sample_id_all = sample_id_all_avail ? 1 : 0; +} - for (thread_index = 0; thread_index < threads->nr; thread_index++) { -try_again: - FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0); +static void open_counters(struct perf_evlist *evlist) +{ + struct perf_evsel *pos; + int cpu; + + list_for_each_entry(pos, &evlist->entries, node) { + struct perf_event_attr *attr = &pos->attr; + /* + * Check if parse_single_tracepoint_event has already asked for + * PERF_SAMPLE_TIME. + * + * XXX this is kludgy but short term fix for problems introduced by + * eac23d1c that broke 'perf script' by having different sample_types + * when using multiple tracepoint events when we use a perf binary + * that tries to use sample_id_all on an older kernel. + * + * We need to move counter creation to perf_session, support + * different sample_types, etc. + */ + bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; - if (FD(evsel, nr_cpu, thread_index) < 0) { + config_attr(pos, evlist); +retry_sample_id: + attr->sample_id_all = sample_id_all_avail ? 1 : 0; +try_again: + if (perf_evsel__open(pos, cpus, threads, group, !no_inherit) < 0) { int err = errno; if (err == EPERM || err == EACCES) @@ -364,7 +437,7 @@ try_again: } printf("\n"); error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", - FD(evsel, nr_cpu, thread_index), strerror(err)); + err, strerror(err)); #if defined(__i386__) || defined(__x86_64__) if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) @@ -375,90 +448,13 @@ try_again: #endif die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - exit(-1); - } - - h_attr = get_header_attr(attr, evsel->idx); - if (h_attr == NULL) - die("nomem\n"); - - if (!file_new) { - if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { - fprintf(stderr, "incompatible append\n"); - exit(-1); - } - } - - if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) { - perror("Unable to read perf file descriptor"); - exit(-1); - } - - if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { - pr_warning("Not enough memory to add id\n"); - exit(-1); - } - - assert(FD(evsel, nr_cpu, thread_index) >= 0); - fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK); - - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = FD(evsel, nr_cpu, thread_index); - - if (evsel->idx || thread_index) { - struct perf_evsel *first; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - ret = ioctl(FD(evsel, nr_cpu, thread_index), - PERF_EVENT_IOC_SET_OUTPUT, - FD(first, nr_cpu, 0)); - if (ret) { - error("failed to set output: %d (%s)\n", errno, - strerror(errno)); - exit(-1); - } - } else { - mmap_array[nr_cpu].prev = 0; - mmap_array[nr_cpu].mask = mmap_pages*page_size - 1; - mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0); - if (mmap_array[nr_cpu].base == MAP_FAILED) { - error("failed to mmap with %d (%s)\n", errno, strerror(errno)); - exit(-1); - } - - evlist->pollfd[evlist->nr_fds].fd = FD(evsel, nr_cpu, thread_index); - evlist->pollfd[evlist->nr_fds].events = POLLIN; - evlist->nr_fds++; - } - - if (filter != NULL) { - ret = ioctl(FD(evsel, nr_cpu, thread_index), - PERF_EVENT_IOC_SET_FILTER, filter); - if (ret) { - error("failed to set filter with %d (%s)\n", errno, - strerror(errno)); - exit(-1); - } } } - if (!sample_type) - sample_type = attr->sample_type; -} - -static void open_counters(struct perf_evlist *evlist, int cpu) -{ - struct perf_evsel *pos; - - group_fd = -1; - - list_for_each_entry(pos, &evlist->entries, node) - create_counter(evlist, pos, cpu); - - nr_cpu++; + for (cpu = 0; cpu < cpus->nr; ++cpu) { + list_for_each_entry(pos, &evlist->entries, node) + create_counter(evlist, pos, cpu); + } } static int process_buildids(void) @@ -533,7 +529,7 @@ static void mmap_read_all(void) { int i; - for (i = 0; i < nr_cpu; i++) { + for (i = 0; i < cpus->nr; i++) { if (mmap_array[i].base) mmap_read(&mmap_array[i]); } @@ -673,12 +669,7 @@ static int __cmd_record(int argc, const char **argv) close(child_ready_pipe[0]); } - if (!system_wide && no_inherit && !cpu_list) { - open_counters(evsel_list, -1); - } else { - for (i = 0; i < cpus->nr; i++) - open_counters(evsel_list, cpus->map[i]); - } + open_counters(evsel_list); perf_session__set_sample_type(session, sample_type); @@ -795,7 +786,7 @@ static int __cmd_record(int argc, const char **argv) } if (done) { - for (i = 0; i < nr_cpu; i++) { + for (i = 0; i < cpus->nr; i++) { struct perf_evsel *pos; list_for_each_entry(pos, &evsel_list->entries, node) { @@ -933,11 +924,13 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) usage_with_options(record_usage, record_options); } - cpus = cpu_map__new(cpu_list); - if (cpus == NULL) { - perror("failed to parse CPUs map"); - return -1; - } + if (target_tid != -1) + cpus = cpu_map__dummy_new(); + else + cpus = cpu_map__new(cpu_list); + + if (cpus == NULL) + usage_with_options(record_usage, record_options); list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) -- cgit v0.10.2 From 70082dd92c4b288bd723a77897e2b555f0e63113 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2011 17:03:24 -0200 Subject: perf evsel: Introduce mmap support Out of the code in 'perf top'. Record is next in line. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 15d89be..7d723ad 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1095,43 +1095,12 @@ static void event__process_sample(const event_t *self, } } -struct mmap_data { - void *base; - int mask; - unsigned int prev; -}; - -static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel, - int ncpus, int nthreads) -{ - evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data)); - return evsel->priv != NULL ? 0 : -ENOMEM; -} - -static void perf_evsel__free_mmap(struct perf_evsel *evsel) -{ - xyarray__delete(evsel->priv); - evsel->priv = NULL; -} - -static unsigned int mmap_read_head(struct mmap_data *md) -{ - struct perf_event_mmap_page *pc = md->base; - int head; - - head = pc->data_head; - rmb(); - - return head; -} - static void perf_session__mmap_read_counter(struct perf_session *self, struct perf_evsel *evsel, int cpu, int thread_idx) { - struct xyarray *mmap_array = evsel->priv; - struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx); - unsigned int head = mmap_read_head(md); + struct perf_mmap *md = xyarray__entry(evsel->mmap, cpu, thread_idx); + unsigned int head = perf_mmap__read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; struct sample_data sample; @@ -1210,35 +1179,9 @@ static void perf_session__mmap_read(struct perf_session *self) } } -static void start_counter(int i, struct perf_evlist *evlist, - struct perf_evsel *evsel) -{ - struct xyarray *mmap_array = evsel->priv; - struct mmap_data *mm; - int thread_index; - - for (thread_index = 0; thread_index < threads->nr; thread_index++) { - assert(FD(evsel, i, thread_index) >= 0); - fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK); - - evlist->pollfd[evlist->nr_fds].fd = FD(evsel, i, thread_index); - evlist->pollfd[evlist->nr_fds].events = POLLIN; - evlist->nr_fds++; - - mm = xyarray__entry(mmap_array, i, thread_index); - mm->prev = 0; - mm->mask = mmap_pages*page_size - 1; - mm->base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0); - if (mm->base == MAP_FAILED) - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); - } -} - static void start_counters(struct perf_evlist *evlist) { struct perf_evsel *counter; - int i; list_for_each_entry(counter, &evlist->entries, node) { struct perf_event_attr *attr = &counter->attr; @@ -1282,11 +1225,9 @@ try_again: die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } - } - for (i = 0; i < cpus->nr; i++) { - list_for_each_entry(counter, &evlist->entries, node) - start_counter(i, evsel_list, counter); + if (perf_evsel__mmap(counter, cpus, threads, mmap_pages, evlist) < 0) + die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } } @@ -1453,7 +1394,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) usage_with_options(top_usage, options); list_for_each_entry(pos, &evsel_list->entries, node) { - if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 || + if (perf_evsel__alloc_mmap(pos, cpus->nr, threads->nr) < 0 || perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) goto out_free_fd; /* @@ -1485,8 +1426,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) status = __cmd_top(); out_free_fd: - list_for_each_entry(pos, &evsel_list->entries, node) - perf_evsel__free_mmap(pos); perf_evlist__delete(evsel_list); return status; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 95aaf56..5fb5e1f 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -94,6 +94,20 @@ void get_term_dimensions(struct winsize *ws); #include "util/types.h" #include +struct perf_mmap { + void *base; + int mask; + unsigned int prev; +}; + +static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->base; + int head = pc->data_head; + rmb(); + return head; +} + /* * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all * counters in the current task. diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 2abf949..6d41292 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -60,3 +60,11 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthread evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); return evlist->pollfd != NULL ? 0 : -ENOMEM; } + +void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) +{ + fcntl(fd, F_SETFL, O_NONBLOCK); + evlist->pollfd[evlist->nr_fds].fd = fd; + evlist->pollfd[evlist->nr_fds].events = POLLIN; + evlist->nr_fds++; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a7d7e12..16bbfcb 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -21,5 +21,6 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads); +void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 82a0053..f500695 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1,9 +1,13 @@ #include "evsel.h" +#include "evlist.h" #include "../perf.h" #include "util.h" #include "cpumap.h" #include "thread.h" +#include +#include + #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) @@ -49,10 +53,32 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) } } +void perf_evsel__munmap(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + struct perf_mmap *mm; + int cpu, thread; + + for (cpu = 0; cpu < ncpus; cpu++) + for (thread = 0; thread < nthreads; ++thread) { + mm = xyarray__entry(evsel->mmap, cpu, thread); + if (mm->base != NULL) { + munmap(mm->base, evsel->mmap_len); + mm->base = NULL; + } + } +} + +int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + evsel->mmap = xyarray__new(ncpus, nthreads, sizeof(struct perf_mmap)); + return evsel->mmap != NULL ? 0 : -ENOMEM; +} + void perf_evsel__delete(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); xyarray__delete(evsel->fd); + xyarray__delete(evsel->mmap); free(evsel); } @@ -208,3 +234,48 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, { return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit); } + +int perf_evsel__mmap(struct perf_evsel *evsel, struct cpu_map *cpus, + struct thread_map *threads, int pages, + struct perf_evlist *evlist) +{ + unsigned int page_size = sysconf(_SC_PAGE_SIZE); + int mask = pages * page_size - 1, cpu; + struct perf_mmap *mm; + int thread; + + if (evsel->mmap == NULL && + perf_evsel__alloc_mmap(evsel, cpus->nr, threads->nr) < 0) + return -ENOMEM; + + evsel->mmap_len = (pages + 1) * page_size; + + for (cpu = 0; cpu < cpus->nr; cpu++) { + for (thread = 0; thread < threads->nr; thread++) { + mm = xyarray__entry(evsel->mmap, cpu, thread); + mm->prev = 0; + mm->mask = mask; + mm->base = mmap(NULL, evsel->mmap_len, PROT_READ, + MAP_SHARED, FD(evsel, cpu, thread), 0); + if (mm->base == MAP_FAILED) + goto out_unmap; + + if (evlist != NULL) + perf_evlist__add_pollfd(evlist, FD(evsel, cpu, thread)); + } + } + + return 0; + +out_unmap: + do { + while (--thread >= 0) { + mm = xyarray__entry(evsel->mmap, cpu, thread); + munmap(mm->base, evsel->mmap_len); + mm->base = NULL; + } + thread = threads->nr; + } while (--cpu >= 0); + + return -1; +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1594696..c8fbef2 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -29,19 +29,23 @@ struct perf_evsel { struct perf_event_attr attr; char *filter; struct xyarray *fd; + struct xyarray *mmap; struct perf_counts *counts; + size_t mmap_len; int idx; void *priv; }; struct cpu_map; struct thread_map; +struct perf_evlist; struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); void perf_evsel__delete(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); +int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_fd(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); @@ -51,6 +55,10 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads, bool group, bool inherit); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads, bool group, bool inherit); +int perf_evsel__mmap(struct perf_evsel *evsel, struct cpu_map *cpus, + struct thread_map *threads, int pages, + struct perf_evlist *evlist); +void perf_evsel__munmap(struct perf_evsel *evsel, int ncpus, int nthreads); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ -- cgit v0.10.2 From 744bd8aa3c8b43447f689a27872fa95e700b8a4f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2011 17:07:28 -0200 Subject: perf record: Use struct perf_mmap and helpers Paving the way to using perf_evsel->mmap, do this to reduce the patch noise in the next ones. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ec43f2e..d89e2f1 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -78,26 +78,9 @@ static off_t post_processing_offset; static struct perf_session *session; static const char *cpu_list; -struct mmap_data { - void *base; - unsigned int mask; - unsigned int prev; -}; - -static struct mmap_data mmap_array[MAX_NR_CPUS]; - -static unsigned long mmap_read_head(struct mmap_data *md) -{ - struct perf_event_mmap_page *pc = md->base; - long head; - - head = pc->data_head; - rmb(); - - return head; -} +static struct perf_mmap mmap_array[MAX_NR_CPUS]; -static void mmap_write_tail(struct mmap_data *md, unsigned long tail) +static void mmap_write_tail(struct perf_mmap *md, unsigned long tail) { struct perf_event_mmap_page *pc = md->base; @@ -136,9 +119,9 @@ static int process_synthesized_event(event_t *event, return 0; } -static void mmap_read(struct mmap_data *md) +static void mmap_read(struct perf_mmap *md) { - unsigned int head = mmap_read_head(md); + unsigned int head = perf_mmap__read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; unsigned long size; -- cgit v0.10.2 From 115d2d8963a426670ac3ce983fc4c4e001703943 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2011 17:11:53 -0200 Subject: perf record: Move perf_mmap__write_tail to perf.h Close to perf_mmap__read_head() and the perf_mmap struct definition. This is useful for any recorder, and we will need it in 'perf test'. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d89e2f1..109f3b2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -80,17 +80,6 @@ static const char *cpu_list; static struct perf_mmap mmap_array[MAX_NR_CPUS]; -static void mmap_write_tail(struct perf_mmap *md, unsigned long tail) -{ - struct perf_event_mmap_page *pc = md->base; - - /* - * ensure all reads are done before we write the tail out. - */ - /* mb(); */ - pc->data_tail = tail; -} - static void advance_output(size_t size) { bytes_written += size; @@ -165,7 +154,7 @@ static void mmap_read(struct perf_mmap *md) write_output(buf, size); md->prev = old; - mmap_write_tail(md, old); + perf_mmap__write_tail(md, old); } static volatile int done = 0; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 5fb5e1f..a5fc660 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -108,6 +108,18 @@ static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) return head; } +static inline void perf_mmap__write_tail(struct perf_mmap *md, + unsigned long tail) +{ + struct perf_event_mmap_page *pc = md->base; + + /* + * ensure all reads are done before we write the tail out. + */ + /* mb(); */ + pc->data_tail = tail; +} + /* * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all * counters in the current task. -- cgit v0.10.2 From 70db7533caef02350ec8d6852e589491bca3a951 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Jan 2011 22:39:13 -0200 Subject: perf evlist: Move the mmap array from perf_evsel Adopting the new model used in 'perf record', where we don't have a map per thread per cpu, instead we have an mmap per cpu, established on the first fd for that cpu and ask the kernel using the PERF_EVENT_IOC_SET_OUTPUT ioctl to send events for the other fds on that cpu for the one with the mmap. The methods moved from perf_evsel to perf_evlist, but for easing review they were modified in place, in evsel.c, the next patch will move the migrated methods to evlist.c. With this 'perf top' now uses the same mmap model used by 'perf record' and the next patches will make 'perf record' use these new routines, establishing a common codebase for both tools. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7d723ad..df85c1f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -78,7 +78,7 @@ static struct cpu_map *cpus; static int realtime_prio = 0; static bool group = false; static unsigned int page_size; -static unsigned int mmap_pages = 16; +static unsigned int mmap_pages = 128; static int freq = 1000; /* 1 KHz */ static int delay_secs = 2; @@ -991,8 +991,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) static void event__process_sample(const event_t *self, struct sample_data *sample, - struct perf_session *session, - struct perf_evsel *evsel) + struct perf_session *session) { u64 ip = self->ip.ip; struct sym_entry *syme; @@ -1085,8 +1084,12 @@ static void event__process_sample(const event_t *self, syme = symbol__priv(al.sym); if (!syme->skip) { - syme->count[evsel->idx]++; + struct perf_evsel *evsel; + syme->origin = origin; + evsel = perf_evlist__id2evsel(evsel_list, sample->id); + assert(evsel != NULL); + syme->count[evsel->idx]++; record_precise_ip(syme, evsel->idx, ip); pthread_mutex_lock(&active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) @@ -1095,11 +1098,9 @@ static void event__process_sample(const event_t *self, } } -static void perf_session__mmap_read_counter(struct perf_session *self, - struct perf_evsel *evsel, - int cpu, int thread_idx) +static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu) { - struct perf_mmap *md = xyarray__entry(evsel->mmap, cpu, thread_idx); + struct perf_mmap *md = &evsel_list->mmap[cpu]; unsigned int head = perf_mmap__read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; @@ -1153,7 +1154,7 @@ static void perf_session__mmap_read_counter(struct perf_session *self, event__parse_sample(event, self, &sample); if (event->header.type == PERF_RECORD_SAMPLE) - event__process_sample(event, &sample, self, evsel); + event__process_sample(event, &sample, self); else event__process(event, &sample, self); old += size; @@ -1164,19 +1165,10 @@ static void perf_session__mmap_read_counter(struct perf_session *self, static void perf_session__mmap_read(struct perf_session *self) { - struct perf_evsel *counter; - int i, thread_index; - - for (i = 0; i < cpus->nr; i++) { - list_for_each_entry(counter, &evsel_list->entries, node) { - for (thread_index = 0; - thread_index < threads->nr; - thread_index++) { - perf_session__mmap_read_counter(self, - counter, i, thread_index); - } - } - } + int i; + + for (i = 0; i < cpus->nr; i++) + perf_session__mmap_read_cpu(self, i); } static void start_counters(struct perf_evlist *evlist) @@ -1194,6 +1186,11 @@ static void start_counters(struct perf_evlist *evlist) attr->sample_freq = freq; } + if (evlist->nr_entries > 1) { + attr->sample_type |= PERF_SAMPLE_ID; + attr->read_format |= PERF_FORMAT_ID; + } + attr->mmap = 1; try_again: if (perf_evsel__open(counter, cpus, threads, group, inherit) < 0) { @@ -1225,15 +1222,16 @@ try_again: die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } - - if (perf_evsel__mmap(counter, cpus, threads, mmap_pages, evlist) < 0) - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } + + if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, true) < 0) + die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } static int __cmd_top(void) { pthread_t thread; + struct perf_evsel *first; int ret; /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this @@ -1249,6 +1247,8 @@ static int __cmd_top(void) event__synthesize_threads(event__process, session); start_counters(evsel_list); + first = list_entry(evsel_list->entries.next, struct perf_evsel, node); + perf_session__set_sample_type(session, first->attr.sample_type); /* Wait for a minimal set of events before starting the snapshot */ poll(evsel_list->pollfd, evsel_list->nr_fds, 100); @@ -1394,8 +1394,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) usage_with_options(top_usage, options); list_for_each_entry(pos, &evsel_list->entries, node) { - if (perf_evsel__alloc_mmap(pos, cpus->nr, threads->nr) < 0 || - perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) + if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) goto out_free_fd; /* * Fill in the ones not specifically initialized via -c: @@ -1406,7 +1405,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = default_interval; } - if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0) + if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0 || + perf_evlist__alloc_mmap(evsel_list, cpus->nr) < 0) goto out_free_fd; sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6d41292..deb82a4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -3,11 +3,18 @@ #include "evsel.h" #include "util.h" +#include +#include + struct perf_evlist *perf_evlist__new(void) { struct perf_evlist *evlist = zalloc(sizeof(*evlist)); if (evlist != NULL) { + int i; + + for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) + INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); } @@ -29,6 +36,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__delete(struct perf_evlist *evlist) { perf_evlist__purge(evlist); + free(evlist->mmap); free(evlist->pollfd); free(evlist); } @@ -68,3 +76,22 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) evlist->pollfd[evlist->nr_fds].events = POLLIN; evlist->nr_fds++; } + +struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct perf_sample_id *sid; + int hash; + + if (evlist->nr_entries == 1) + return list_entry(evlist->entries.next, struct perf_evsel, node); + + hash = hash_64(id, PERF_EVLIST__HLIST_BITS); + head = &evlist->heads[hash]; + + hlist_for_each_entry(sid, pos, head, node) + if (sid->id == id) + return sid->evsel; + return NULL; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 16bbfcb..dbfcc79 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -2,13 +2,20 @@ #define __PERF_EVLIST_H 1 #include +#include "../perf.h" struct pollfd; +#define PERF_EVLIST__HLIST_BITS 8 +#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) + struct perf_evlist { struct list_head entries; + struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_entries; int nr_fds; + int mmap_len; + struct perf_mmap *mmap; struct pollfd *pollfd; }; @@ -23,4 +30,6 @@ int perf_evlist__add_default(struct perf_evlist *evlist); int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads); void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); +struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f500695..ee49035 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -8,7 +8,11 @@ #include #include +#include +#include + #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) +#define SID(e, x, y) xyarray__entry(e->id, x, y) struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) { @@ -29,6 +33,12 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) return evsel->fd != NULL ? 0 : -ENOMEM; } +int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + evsel->id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); + return evsel->id != NULL ? 0 : -ENOMEM; +} + int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) { evsel->counts = zalloc((sizeof(*evsel->counts) + @@ -42,6 +52,12 @@ void perf_evsel__free_fd(struct perf_evsel *evsel) evsel->fd = NULL; } +void perf_evsel__free_id(struct perf_evsel *evsel) +{ + xyarray__delete(evsel->id); + evsel->id = NULL; +} + void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { int cpu, thread; @@ -53,32 +69,29 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) } } -void perf_evsel__munmap(struct perf_evsel *evsel, int ncpus, int nthreads) +void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus) { - struct perf_mmap *mm; - int cpu, thread; + int cpu; - for (cpu = 0; cpu < ncpus; cpu++) - for (thread = 0; thread < nthreads; ++thread) { - mm = xyarray__entry(evsel->mmap, cpu, thread); - if (mm->base != NULL) { - munmap(mm->base, evsel->mmap_len); - mm->base = NULL; - } + for (cpu = 0; cpu < ncpus; cpu++) { + if (evlist->mmap[cpu].base != NULL) { + munmap(evlist->mmap[cpu].base, evlist->mmap_len); + evlist->mmap[cpu].base = NULL; } + } } -int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads) +int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus) { - evsel->mmap = xyarray__new(ncpus, nthreads, sizeof(struct perf_mmap)); - return evsel->mmap != NULL ? 0 : -ENOMEM; + evlist->mmap = zalloc(ncpus * sizeof(struct perf_mmap)); + return evlist->mmap != NULL ? 0 : -ENOMEM; } void perf_evsel__delete(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); xyarray__delete(evsel->fd); - xyarray__delete(evsel->mmap); + xyarray__delete(evsel->id); free(evsel); } @@ -235,47 +248,110 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit); } -int perf_evsel__mmap(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, int pages, - struct perf_evlist *evlist) +static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot, + int mask, int fd) +{ + evlist->mmap[cpu].prev = 0; + evlist->mmap[cpu].mask = mask; + evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot, + MAP_SHARED, fd, 0); + if (evlist->mmap[cpu].base == MAP_FAILED) + return -1; + + perf_evlist__add_pollfd(evlist, fd); + return 0; +} + +static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, + int cpu, int thread, int fd) +{ + struct perf_sample_id *sid; + u64 read_data[4] = { 0, }; + int hash, id_idx = 1; /* The first entry is the counter value */ + + if (!(evsel->attr.read_format & PERF_FORMAT_ID) || + read(fd, &read_data, sizeof(read_data)) == -1) + return -1; + + if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + ++id_idx; + if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + ++id_idx; + + sid = SID(evsel, cpu, thread); + sid->id = read_data[id_idx]; + sid->evsel = evsel; + hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); + hlist_add_head(&sid->node, &evlist->heads[hash]); + return 0; +} + +/** perf_evlist__mmap - Create per cpu maps to receive events + * + * @evlist - list of events + * @cpus - cpu map being monitored + * @threads - threads map being monitored + * @pages - map length in pages + * @overwrite - overwrite older events? + * + * If overwrite is false the user needs to signal event consuption using: + * + * struct perf_mmap *m = &evlist->mmap[cpu]; + * unsigned int head = perf_mmap__read_head(m); + * + * perf_mmap__write_tail(m, head) + */ +int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads, int pages, bool overwrite) { unsigned int page_size = sysconf(_SC_PAGE_SIZE); int mask = pages * page_size - 1, cpu; - struct perf_mmap *mm; - int thread; + struct perf_evsel *first_evsel, *evsel; + int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); - if (evsel->mmap == NULL && - perf_evsel__alloc_mmap(evsel, cpus->nr, threads->nr) < 0) + if (evlist->mmap == NULL && + perf_evlist__alloc_mmap(evlist, cpus->nr) < 0) return -ENOMEM; - evsel->mmap_len = (pages + 1) * page_size; + if (evlist->pollfd == NULL && + perf_evlist__alloc_pollfd(evlist, cpus->nr, threads->nr) < 0) + return -ENOMEM; - for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { - mm = xyarray__entry(evsel->mmap, cpu, thread); - mm->prev = 0; - mm->mask = mask; - mm->base = mmap(NULL, evsel->mmap_len, PROT_READ, - MAP_SHARED, FD(evsel, cpu, thread), 0); - if (mm->base == MAP_FAILED) - goto out_unmap; - - if (evlist != NULL) - perf_evlist__add_pollfd(evlist, FD(evsel, cpu, thread)); + evlist->mmap_len = (pages + 1) * page_size; + first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node); + + list_for_each_entry(evsel, &evlist->entries, node) { + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + evsel->id == NULL && + perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) + return -ENOMEM; + + for (cpu = 0; cpu < cpus->nr; cpu++) { + for (thread = 0; thread < threads->nr; thread++) { + int fd = FD(evsel, cpu, thread); + + if (evsel->idx || thread) { + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, + FD(first_evsel, cpu, 0)) != 0) + goto out_unmap; + } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0) + goto out_unmap; + + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + perf_evlist__id_hash(evlist, evsel, cpu, thread, fd) < 0) + goto out_unmap; + } } } return 0; out_unmap: - do { - while (--thread >= 0) { - mm = xyarray__entry(evsel->mmap, cpu, thread); - munmap(mm->base, evsel->mmap_len); - mm->base = NULL; + for (cpu = 0; cpu < cpus->nr; cpu++) { + if (evlist->mmap[cpu].base != NULL) { + munmap(evlist->mmap[cpu].base, evlist->mmap_len); + evlist->mmap[cpu].base = NULL; } - thread = threads->nr; - } while (--cpu >= 0); - + } return -1; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c8fbef2..667ee4e 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -24,14 +24,25 @@ struct perf_counts { struct perf_counts_values cpu[]; }; +struct perf_evsel; + +/* + * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are + * more than one entry in the evlist. + */ +struct perf_sample_id { + struct hlist_node node; + u64 id; + struct perf_evsel *evsel; +}; + struct perf_evsel { struct list_head node; struct perf_event_attr attr; char *filter; struct xyarray *fd; - struct xyarray *mmap; + struct xyarray *id; struct perf_counts *counts; - size_t mmap_len; int idx; void *priv; }; @@ -44,9 +55,11 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); void perf_evsel__delete(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); +int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); -int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads); +int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus); void perf_evsel__free_fd(struct perf_evsel *evsel); +void perf_evsel__free_id(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, @@ -55,10 +68,9 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads, bool group, bool inherit); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads, bool group, bool inherit); -int perf_evsel__mmap(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, int pages, - struct perf_evlist *evlist); -void perf_evsel__munmap(struct perf_evsel *evsel, int ncpus, int nthreads); +int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads, int pages, bool overwrite); +void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ -- cgit v0.10.2 From 0a27d7f9f417c0305f7efa70631764a53c7af219 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jan 2011 15:50:51 -0200 Subject: perf record: Use perf_evlist__mmap There is more stuff that can go to the perf_ev{sel,list} layer, like detecting if sample_id_all is available, etc, but lets try using this in 'perf test' first. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 109f3b2..45a3689 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -30,6 +30,7 @@ #include #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) +#define SID(e, x, y) xyarray__entry(e->id, x, y) enum write_mode_t { WRITE_FORCE, @@ -78,8 +79,6 @@ static off_t post_processing_offset; static struct perf_session *session; static const char *cpu_list; -static struct perf_mmap mmap_array[MAX_NR_CPUS]; - static void advance_output(size_t size) { bytes_written += size; @@ -196,20 +195,14 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n return h_attr; } -static void create_counter(struct perf_evlist *evlist, - struct perf_evsel *evsel, int cpu) +static void create_counter(struct perf_evsel *evsel, int cpu) { char *filter = evsel->filter; struct perf_event_attr *attr = &evsel->attr; struct perf_header_attr *h_attr; + struct perf_sample_id *sid; int thread_index; int ret; - struct { - u64 count; - u64 time_enabled; - u64 time_running; - u64 id; - } read_data; for (thread_index = 0; thread_index < threads->nr; thread_index++) { h_attr = get_header_attr(attr, evsel->idx); @@ -223,45 +216,12 @@ static void create_counter(struct perf_evlist *evlist, } } - if (read(FD(evsel, cpu, thread_index), &read_data, sizeof(read_data)) == -1) { - perror("Unable to read perf file descriptor"); - exit(-1); - } - - if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { + sid = SID(evsel, cpu, thread_index); + if (perf_header_attr__add_id(h_attr, sid->id) < 0) { pr_warning("Not enough memory to add id\n"); exit(-1); } - assert(FD(evsel, cpu, thread_index) >= 0); - fcntl(FD(evsel, cpu, thread_index), F_SETFL, O_NONBLOCK); - - if (evsel->idx || thread_index) { - struct perf_evsel *first; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - ret = ioctl(FD(evsel, cpu, thread_index), - PERF_EVENT_IOC_SET_OUTPUT, - FD(first, cpu, 0)); - if (ret) { - error("failed to set output: %d (%s)\n", errno, - strerror(errno)); - exit(-1); - } - } else { - mmap_array[cpu].prev = 0; - mmap_array[cpu].mask = mmap_pages*page_size - 1; - mmap_array[cpu].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, cpu, thread_index), 0); - if (mmap_array[cpu].base == MAP_FAILED) { - error("failed to mmap with %d (%s)\n", errno, strerror(errno)); - exit(-1); - } - - evlist->pollfd[evlist->nr_fds].fd = FD(evsel, cpu, thread_index); - evlist->pollfd[evlist->nr_fds].events = POLLIN; - evlist->nr_fds++; - } - if (filter != NULL) { ret = ioctl(FD(evsel, cpu, thread_index), PERF_EVENT_IOC_SET_FILTER, filter); @@ -423,9 +383,12 @@ try_again: } } + if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, false) < 0) + die("failed to mmap with %d (%s)\n", errno, strerror(errno)); + for (cpu = 0; cpu < cpus->nr; ++cpu) { list_for_each_entry(pos, &evlist->entries, node) - create_counter(evlist, pos, cpu); + create_counter(pos, cpu); } } @@ -502,8 +465,8 @@ static void mmap_read_all(void) int i; for (i = 0; i < cpus->nr; i++) { - if (mmap_array[i].base) - mmap_read(&mmap_array[i]); + if (evsel_list->mmap[i].base) + mmap_read(&evsel_list->mmap[i]); } if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) -- cgit v0.10.2 From 915fce20ecf8f7ff4189d0fff42b62aebf6a57cc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jan 2011 16:19:12 -0200 Subject: perf tools: Add missing cpu_map__delete() Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 3ccaa10..6893eec 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -177,3 +177,8 @@ struct cpu_map *cpu_map__dummy_new(void) return cpus; } + +void cpu_map__delete(struct cpu_map *map) +{ + free(map); +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f7a4f42..072c0a3 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -8,6 +8,6 @@ struct cpu_map { struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); -void *cpu_map__delete(struct cpu_map *map); +void cpu_map__delete(struct cpu_map *map); #endif /* __PERF_CPUMAP_H */ -- cgit v0.10.2 From d2af9687c96f3864178de1860e6d83873aeef224 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Jan 2011 16:24:49 -0200 Subject: perf test: Check counts on all cpus in test__open_syscall_event_on_all_cpus We were bailing out after the first count mismatch, do it in all to see if only some CPUs are not getting the expected number of events. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 7287158..7cc6b20 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -408,6 +408,8 @@ static int test__open_syscall_event_on_all_cpus(void) goto out_close_fd; } + err = 0; + for (cpu = 0; cpu < cpus->nr; ++cpu) { unsigned int expected; @@ -416,18 +418,18 @@ static int test__open_syscall_event_on_all_cpus(void) if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { pr_debug("perf_evsel__open_read_on_cpu\n"); - goto out_close_fd; + err = -1; + break; } expected = nr_open_calls + cpu; if (evsel->counts->cpu[cpu].val != expected) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); - goto out_close_fd; + err = -1; } } - err = 0; out_close_fd: perf_evsel__close_fd(evsel, 1, threads->nr); out_evsel_delete: -- cgit v0.10.2 From 98d77b78504a423fca911a26a17bee00ef2fdda2 Mon Sep 17 00:00:00 2001 From: Han Pingtian Date: Sat, 15 Jan 2011 07:00:50 +0800 Subject: perf test: check if cpu_map__new() return NULL It looks like we should check if cpus is NULL after cpus = cpu_map__new(NULL); in test__open_syscall_event_on_all_cpus(). LKML-Reference: <20110114230050.GA7011@localhost> Signed-off-by: Han Pingtian Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 7cc6b20..5a50e47 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -347,9 +347,9 @@ static int test__open_syscall_event_on_all_cpus(void) } cpus = cpu_map__new(NULL); - if (threads == NULL) { - pr_debug("thread_map__new\n"); - return -1; + if (cpus == NULL) { + pr_debug("cpu_map__new\n"); + goto out_thread_map_delete; } -- cgit v0.10.2 From 04391debc3e1195222a4dbb162ace6542dd89c1c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 15 Jan 2011 10:40:59 -0200 Subject: perf evlist: Steal mmap reading routine from 'perf top' Will be used in the upcoming 'perf test' entry for the evlist mmap routines. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index df85c1f..58352ad 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1100,67 +1100,17 @@ static void event__process_sample(const event_t *self, static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu) { - struct perf_mmap *md = &evsel_list->mmap[cpu]; - unsigned int head = perf_mmap__read_head(md); - unsigned int old = md->prev; - unsigned char *data = md->base + page_size; struct sample_data sample; - int diff; - - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the head, we got messed up. - * - * In either case, truncate and restart at head. - */ - diff = head - old; - if (diff > md->mask / 2 || diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); - - /* - * head points to a known good entry, start there. - */ - old = head; - } - - for (; old != head;) { - event_t *event = (event_t *)&data[old & md->mask]; - - event_t event_copy; - - size_t size = event->header.size; - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((old & md->mask) + size != ((old + size) & md->mask)) { - unsigned int offset = old; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = &event_copy; - - do { - cpy = min(md->mask + 1 - (offset & md->mask), len); - memcpy(dst, &data[offset & md->mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = &event_copy; - } + event_t *event; + while ((event = perf_evlist__read_on_cpu(evsel_list, cpu)) != NULL) { event__parse_sample(event, self, &sample); + if (event->header.type == PERF_RECORD_SAMPLE) event__process_sample(event, &sample, self); else event__process(event, &sample, self); - old += size; } - - md->prev = old; } static void perf_session__mmap_read(struct perf_session *self) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index deb82a4..4b3b84c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -95,3 +95,65 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return sid->evsel; return NULL; } + +event_t *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) +{ + /* XXX Move this to perf.c, making it generally available */ + unsigned int page_size = sysconf(_SC_PAGE_SIZE); + struct perf_mmap *md = &evlist->mmap[cpu]; + unsigned int head = perf_mmap__read_head(md); + unsigned int old = md->prev; + unsigned char *data = md->base + page_size; + event_t *event = NULL; + int diff; + + /* + * If we're further behind than half the buffer, there's a chance + * the writer will bite our tail and mess up the samples under us. + * + * If we somehow ended up ahead of the head, we got messed up. + * + * In either case, truncate and restart at head. + */ + diff = head - old; + if (diff > md->mask / 2 || diff < 0) { + fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); + + /* + * head points to a known good entry, start there. + */ + old = head; + } + + if (old != head) { + size_t size; + + event = (event_t *)&data[old & md->mask]; + size = event->header.size; + + /* + * Event straddles the mmap boundary -- header should always + * be inside due to u64 alignment of output. + */ + if ((old & md->mask) + size != ((old + size) & md->mask)) { + unsigned int offset = old; + unsigned int len = min(sizeof(*event), size), cpy; + void *dst = &evlist->event_copy; + + do { + cpy = min(md->mask + 1 - (offset & md->mask), len); + memcpy(dst, &data[offset & md->mask], cpy); + offset += cpy; + dst += cpy; + len -= cpy; + } while (len); + + event = &evlist->event_copy; + } + + old += size; + } + + md->prev = old; + return event; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index dbfcc79..2871206 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -3,6 +3,7 @@ #include #include "../perf.h" +#include "event.h" struct pollfd; @@ -15,6 +16,7 @@ struct perf_evlist { int nr_entries; int nr_fds; int mmap_len; + event_t event_copy; struct perf_mmap *mmap; struct pollfd *pollfd; }; @@ -32,4 +34,6 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); +event_t *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu); + #endif /* __PERF_EVLIST_H */ -- cgit v0.10.2 From de5fa3a8a05cd60f59622e88cfeb90416760d78e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 15 Jan 2011 10:42:46 -0200 Subject: perf test: Add test for the evlist mmap routines This test will generate random numbers of calls to some getpid syscalls, then establish an mmap for a group of events that are created to monitor these syscalls. It will receive the events, using mmap, use its PERF_SAMPLE_ID generated sample.id field to map back to its respective perf_evsel instance. Then it checks if the number of syscalls reported as perf events by the kernel corresponds to the number of syscalls made. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 5a50e47..4fd3453 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -7,7 +7,9 @@ #include "util/cache.h" #include "util/debug.h" +#include "util/evlist.h" #include "util/parse-options.h" +#include "util/parse-events.h" #include "util/session.h" #include "util/symbol.h" #include "util/thread.h" @@ -238,14 +240,14 @@ out: #include "util/evsel.h" #include -static int trace_event__id(const char *event_name) +static int trace_event__id(const char *evname) { char *filename; int err = -1, fd; if (asprintf(&filename, "/sys/kernel/debug/tracing/events/syscalls/%s/id", - event_name) < 0) + evname) < 0) return -1; fd = open(filename, O_RDONLY); @@ -439,6 +441,167 @@ out_thread_map_delete: return err; } +/* + * This test will generate random numbers of calls to some getpid syscalls, + * then establish an mmap for a group of events that are created to monitor + * the syscalls. + * + * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated + * sample.id field to map back to its respective perf_evsel instance. + * + * Then it checks if the number of syscalls reported as perf events by + * the kernel corresponds to the number of syscalls made. + */ +static int test__basic_mmap(void) +{ + int err = -1; + event_t *event; + struct thread_map *threads; + struct perf_session session; + struct cpu_map *cpus; + struct perf_evlist *evlist; + struct perf_event_attr attr = { + .type = PERF_TYPE_TRACEPOINT, + .read_format = PERF_FORMAT_ID, + .sample_type = PERF_SAMPLE_ID, + .watermark = 0, + }; + cpu_set_t cpu_set; + const char *syscall_names[] = { "getsid", "getppid", "getpgrp", + "getpgid", }; + pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp, + (void*)getpgid }; +#define nsyscalls ARRAY_SIZE(syscall_names) + int ids[nsyscalls]; + unsigned int nr_events[nsyscalls], + expected_nr_events[nsyscalls], i, j; + struct perf_evsel *evsels[nsyscalls], *evsel; + + for (i = 0; i < nsyscalls; ++i) { + char name[64]; + + snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); + ids[i] = trace_event__id(name); + if (ids[i] < 0) { + pr_debug("Is debugfs mounted on /sys/kernel/debug?\n"); + return -1; + } + nr_events[i] = 0; + expected_nr_events[i] = random() % 257; + } + + threads = thread_map__new(-1, getpid()); + if (threads == NULL) { + pr_debug("thread_map__new\n"); + return -1; + } + + cpus = cpu_map__new(NULL); + if (threads == NULL) { + pr_debug("thread_map__new\n"); + goto out_free_threads; + } + + CPU_ZERO(&cpu_set); + CPU_SET(cpus->map[0], &cpu_set); + sched_setaffinity(0, sizeof(cpu_set), &cpu_set); + if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { + pr_debug("sched_setaffinity() failed on CPU %d: %s ", + cpus->map[0], strerror(errno)); + goto out_free_cpus; + } + + evlist = perf_evlist__new(); + if (threads == NULL) { + pr_debug("perf_evlist__new\n"); + goto out_free_cpus; + } + + /* anonymous union fields, can't be initialized above */ + attr.wakeup_events = 1; + attr.sample_period = 1; + + /* + * FIXME: use evsel->attr.sample_type in event__parse_sample. + * This will nicely remove the requirement that we have + * all the events with the same sample_type. + */ + session.sample_type = attr.sample_type; + + for (i = 0; i < nsyscalls; ++i) { + attr.config = ids[i]; + evsels[i] = perf_evsel__new(&attr, i); + if (evsels[i] == NULL) { + pr_debug("perf_evsel__new\n"); + goto out_free_evlist; + } + + perf_evlist__add(evlist, evsels[i]); + + if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) { + pr_debug("failed to open counter: %s, " + "tweak /proc/sys/kernel/perf_event_paranoid?\n", + strerror(errno)); + goto out_close_fd; + } + } + + if (perf_evlist__mmap(evlist, cpus, threads, 128, true) < 0) { + pr_debug("failed to mmap events: %d (%s)\n", errno, + strerror(errno)); + goto out_close_fd; + } + + for (i = 0; i < nsyscalls; ++i) + for (j = 0; j < expected_nr_events[i]; ++j) { + int foo = syscalls[i](); + ++foo; + } + + while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) { + struct sample_data sample; + + if (event->header.type != PERF_RECORD_SAMPLE) { + pr_debug("unexpected %s event\n", + event__get_event_name(event->header.type)); + goto out_munmap; + } + + event__parse_sample(event, &session, &sample); + evsel = perf_evlist__id2evsel(evlist, sample.id); + if (evsel == NULL) { + pr_debug("event with id %" PRIu64 + " doesn't map to an evsel\n", sample.id); + goto out_munmap; + } + nr_events[evsel->idx]++; + } + + list_for_each_entry(evsel, &evlist->entries, node) { + if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { + pr_debug("expected %d %s events, got %d\n", + expected_nr_events[evsel->idx], + event_name(evsel), nr_events[evsel->idx]); + goto out_munmap; + } + } + + err = 0; +out_munmap: + perf_evlist__munmap(evlist, 1); +out_close_fd: + for (i = 0; i < nsyscalls; ++i) + perf_evsel__close_fd(evsels[i], 1, threads->nr); +out_free_evlist: + perf_evlist__delete(evlist); +out_free_cpus: + cpu_map__delete(cpus); +out_free_threads: + thread_map__delete(threads); + return err; +#undef nsyscalls +} + static struct test { const char *desc; int (*func)(void); @@ -456,6 +619,10 @@ static struct test { .func = test__open_syscall_event_on_all_cpus, }, { + .desc = "read samples using the mmap interface", + .func = test__basic_mmap, + }, + { .func = NULL, }, }; -- cgit v0.10.2 From 1b3a0e9592ebf174af934b3908a2bf6a6fa86169 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 14 Jan 2011 04:51:58 +0100 Subject: perf callchain: Feed callchains into a cursor The callchains are fed with an array of a fixed size. As a result we iterate over each callchains three times: - 1st to resolve symbols - 2nd to filter out context boundaries - 3rd for the insertion into the tree This also involves some pairs of memory allocation/deallocation everytime we insert a callchain, for the filtered out array of addresses and for the array of symbols that comes along. Instead, feed the callchains through a linked list with persistent allocations. It brings several pros like: - Merge the 1st and 2nd iterations in one. That was possible before but in a way that would involve allocating an array slightly taller than necessary because we don't know in advance the number of context boundaries to filter out. - Much lesser allocations/deallocations. The linked list keeps persistent empty entries for the next usages and is extendable at will. - Makes it easier for multiple sources of callchains to feed a stacktrace together. This is deemed to pave the way for cfi based callchains wherein traditional frame pointer based kernel stacktraces will precede cfi based user ones, producing an overall callchain which size is hardly predictable. This requirement makes the static array obsolete and makes a linked list based iterator a much more flexible fit. Basic testing on a big perf file containing callchains (~ 176 MB) has shown a throughput gain of about 11% with perf report. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1294977121-5700-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c27e31f..c95599a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -81,18 +81,17 @@ static int perf_session__add_hist_entry(struct perf_session *self, struct addr_location *al, struct sample_data *data) { - struct map_symbol *syms = NULL; struct symbol *parent = NULL; - int err = -ENOMEM; + int err = 0; struct hist_entry *he; struct hists *hists; struct perf_event_attr *attr; if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) { - syms = perf_session__resolve_callchain(self, al->thread, - data->callchain, &parent); - if (syms == NULL) - return -ENOMEM; + err = perf_session__resolve_callchain(self, al->thread, + data->callchain, &parent); + if (err) + return err; } attr = perf_header__find_attr(data->id, &self->header); @@ -101,16 +100,17 @@ static int perf_session__add_hist_entry(struct perf_session *self, else hists = perf_session__hists_findnew(self, data->id, 0, 0); if (hists == NULL) - goto out_free_syms; + return -ENOMEM; + he = __hists__add_entry(hists, al, parent, data->period); if (he == NULL) - goto out_free_syms; - err = 0; + return -ENOMEM; + if (symbol_conf.use_callchain) { - err = callchain_append(he->callchain, data->callchain, syms, + err = callchain_append(he->callchain, &self->callchain_cursor, data->period); if (err) - goto out_free_syms; + return err; } /* * Only in the newt browser we are doing integrated annotation, @@ -119,8 +119,7 @@ static int perf_session__add_hist_entry(struct perf_session *self, */ if (use_browser > 0) err = hist_entry__inc_addr_samples(he, al->addr); -out_free_syms: - free(syms); + return err; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index e12d539..53a49e0 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010, Frederic Weisbecker + * Copyright (C) 2009-2011, Frederic Weisbecker * * Handle the callchains from the stream in an ad-hoc radix tree and then * sort them in an rbtree. @@ -195,26 +195,21 @@ create_child(struct callchain_node *parent, bool inherit_children) } -struct resolved_ip { - u64 ip; - struct map_symbol ms; -}; - -struct resolved_chain { - u64 nr; - struct resolved_ip ips[0]; -}; - - /* * Fill the node with callchain values */ static void -fill_node(struct callchain_node *node, struct resolved_chain *chain, int start) +fill_node(struct callchain_node *node, struct callchain_cursor *cursor) { - unsigned int i; + struct callchain_cursor_node *cursor_node; + + node->val_nr = cursor->nr - cursor->pos; + if (!node->val_nr) + pr_warning("Warning: empty node in callchain tree\n"); - for (i = start; i < chain->nr; i++) { + cursor_node = callchain_cursor_current(cursor); + + while (cursor_node) { struct callchain_list *call; call = zalloc(sizeof(*call)); @@ -222,23 +217,25 @@ fill_node(struct callchain_node *node, struct resolved_chain *chain, int start) perror("not enough memory for the code path tree"); return; } - call->ip = chain->ips[i].ip; - call->ms = chain->ips[i].ms; + call->ip = cursor_node->ip; + call->ms.sym = cursor_node->sym; + call->ms.map = cursor_node->map; list_add_tail(&call->list, &node->val); + + callchain_cursor_advance(cursor); + cursor_node = callchain_cursor_current(cursor); } - node->val_nr = chain->nr - start; - if (!node->val_nr) - pr_warning("Warning: empty node in callchain tree\n"); } static void -add_child(struct callchain_node *parent, struct resolved_chain *chain, - int start, u64 period) +add_child(struct callchain_node *parent, + struct callchain_cursor *cursor, + u64 period) { struct callchain_node *new; new = create_child(parent, false); - fill_node(new, chain, start); + fill_node(new, cursor); new->children_hit = 0; new->hit = period; @@ -250,9 +247,10 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain, * Then create another child to host the given callchain of new branch */ static void -split_add_child(struct callchain_node *parent, struct resolved_chain *chain, - struct callchain_list *to_split, int idx_parents, int idx_local, - u64 period) +split_add_child(struct callchain_node *parent, + struct callchain_cursor *cursor, + struct callchain_list *to_split, + u64 idx_parents, u64 idx_local, u64 period) { struct callchain_node *new; struct list_head *old_tail; @@ -277,9 +275,9 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain, parent->val_nr = idx_local; /* create a new child for the new branch if any */ - if (idx_total < chain->nr) { + if (idx_total < cursor->nr) { parent->hit = 0; - add_child(parent, chain, idx_total, period); + add_child(parent, cursor, period); parent->children_hit += period; } else { parent->hit = period; @@ -287,36 +285,41 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain, } static int -append_chain(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start, u64 period); +append_chain(struct callchain_node *root, + struct callchain_cursor *cursor, + u64 period); static void -append_chain_children(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start, u64 period) +append_chain_children(struct callchain_node *root, + struct callchain_cursor *cursor, + u64 period) { struct callchain_node *rnode; /* lookup in childrens */ chain_for_each_child(rnode, root) { - unsigned int ret = append_chain(rnode, chain, start, period); + unsigned int ret = append_chain(rnode, cursor, period); if (!ret) goto inc_children_hit; } /* nothing in children, add to the current node */ - add_child(root, chain, start, period); + add_child(root, cursor, period); inc_children_hit: root->children_hit += period; } static int -append_chain(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start, u64 period) +append_chain(struct callchain_node *root, + struct callchain_cursor *cursor, + u64 period) { + struct callchain_cursor_node *curr_snap = cursor->curr; struct callchain_list *cnode; - unsigned int i = start; + u64 start = cursor->pos; bool found = false; + u64 matches; /* * Lookup in the current node @@ -324,114 +327,95 @@ append_chain(struct callchain_node *root, struct resolved_chain *chain, * anywhere inside a function. */ list_for_each_entry(cnode, &root->val, list) { + struct callchain_cursor_node *node; struct symbol *sym; - if (i == chain->nr) + node = callchain_cursor_current(cursor); + if (!node) break; - sym = chain->ips[i].ms.sym; + sym = node->sym; if (cnode->ms.sym && sym) { if (cnode->ms.sym->start != sym->start) break; - } else if (cnode->ip != chain->ips[i].ip) + } else if (cnode->ip != node->ip) break; if (!found) found = true; - i++; + + callchain_cursor_advance(cursor); } /* matches not, relay on the parent */ - if (!found) + if (!found) { + cursor->curr = curr_snap; + cursor->pos = start; return -1; + } + + matches = cursor->pos - start; /* we match only a part of the node. Split it and add the new chain */ - if (i - start < root->val_nr) { - split_add_child(root, chain, cnode, start, i - start, period); + if (matches < root->val_nr) { + split_add_child(root, cursor, cnode, start, matches, period); return 0; } /* we match 100% of the path, increment the hit */ - if (i - start == root->val_nr && i == chain->nr) { + if (matches == root->val_nr && cursor->pos == cursor->nr) { root->hit += period; return 0; } /* We match the node and still have a part remaining */ - append_chain_children(root, chain, i, period); + append_chain_children(root, cursor, period); return 0; } -static void filter_context(struct ip_callchain *old, struct resolved_chain *new, - struct map_symbol *syms) -{ - int i, j = 0; - - for (i = 0; i < (int)old->nr; i++) { - if (old->ips[i] >= PERF_CONTEXT_MAX) - continue; - - new->ips[j].ip = old->ips[i]; - new->ips[j].ms = syms[i]; - j++; - } - - new->nr = j; -} - - -int callchain_append(struct callchain_root *root, struct ip_callchain *chain, - struct map_symbol *syms, u64 period) +int callchain_append(struct callchain_root *root, + struct callchain_cursor *cursor, + u64 period) { - struct resolved_chain *filtered; - - if (!chain->nr) + if (!cursor->nr) return 0; - filtered = zalloc(sizeof(*filtered) + - chain->nr * sizeof(struct resolved_ip)); - if (!filtered) - return -ENOMEM; - - filter_context(chain, filtered, syms); - - if (!filtered->nr) - goto end; + callchain_cursor_commit(cursor); - append_chain_children(&root->node, filtered, 0, period); + append_chain_children(&root->node, cursor, period); - if (filtered->nr > root->max_depth) - root->max_depth = filtered->nr; -end: - free(filtered); + if (cursor->nr > root->max_depth) + root->max_depth = cursor->nr; return 0; } static int -merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, - struct resolved_chain *chain) +merge_chain_branch(struct callchain_cursor *cursor, + struct callchain_node *dst, struct callchain_node *src) { + struct callchain_cursor_node **old_last = cursor->last; struct callchain_node *child, *next_child; struct callchain_list *list, *next_list; - int old_pos = chain->nr; + int old_pos = cursor->nr; int err = 0; list_for_each_entry_safe(list, next_list, &src->val, list) { - chain->ips[chain->nr].ip = list->ip; - chain->ips[chain->nr].ms = list->ms; - chain->nr++; + callchain_cursor_append(cursor, list->ip, + list->ms.map, list->ms.sym); list_del(&list->list); free(list); } - if (src->hit) - append_chain_children(dst, chain, 0, src->hit); + if (src->hit) { + callchain_cursor_commit(cursor); + append_chain_children(dst, cursor, src->hit); + } chain_for_each_child_safe(child, next_child, src) { - err = merge_chain_branch(dst, child, chain); + err = merge_chain_branch(cursor, dst, child); if (err) break; @@ -439,26 +423,38 @@ merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, free(child); } - chain->nr = old_pos; + cursor->nr = old_pos; + cursor->last = old_last; return err; } -int callchain_merge(struct callchain_root *dst, struct callchain_root *src) +int callchain_merge(struct callchain_cursor *cursor, + struct callchain_root *dst, struct callchain_root *src) +{ + return merge_chain_branch(cursor, &dst->node, &src->node); +} + +int callchain_cursor_append(struct callchain_cursor *cursor, + u64 ip, struct map *map, struct symbol *sym) { - struct resolved_chain *chain; - int err; + struct callchain_cursor_node *node = *cursor->last; - chain = malloc(sizeof(*chain) + - src->max_depth * sizeof(struct resolved_ip)); - if (!chain) - return -ENOMEM; + if (!node) { + node = calloc(sizeof(*node), 1); + if (!node) + return -ENOMEM; - chain->nr = 0; + *cursor->last = node; + } - err = merge_chain_branch(&dst->node, &src->node, chain); + node->ip = ip; + node->map = map; + node->sym = sym; - free(chain); + cursor->nr++; - return err; + cursor->last = &node->next; + + return 0; } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c15fb8c..d74a19a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -49,6 +49,27 @@ struct callchain_list { struct list_head list; }; +/* + * A callchain cursor is a single linked list that + * let one feed a callchain progressively. + * It keeps persitent allocated entries to minimize + * allocations. + */ +struct callchain_cursor_node { + u64 ip; + struct map *map; + struct symbol *sym; + struct callchain_cursor_node *next; +}; + +struct callchain_cursor { + u64 nr; + struct callchain_cursor_node *first; + struct callchain_cursor_node **last; + u64 pos; + struct callchain_cursor_node *curr; +}; + static inline void callchain_init(struct callchain_root *root) { INIT_LIST_HEAD(&root->node.brothers); @@ -67,9 +88,48 @@ static inline u64 cumul_hits(struct callchain_node *node) } int register_callchain_param(struct callchain_param *param); -int callchain_append(struct callchain_root *root, struct ip_callchain *chain, - struct map_symbol *syms, u64 period); -int callchain_merge(struct callchain_root *dst, struct callchain_root *src); +int callchain_append(struct callchain_root *root, + struct callchain_cursor *cursor, + u64 period); + +int callchain_merge(struct callchain_cursor *cursor, + struct callchain_root *dst, struct callchain_root *src); bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); + +/* + * Initialize a cursor before adding entries inside, but keep + * the previously allocated entries as a cache. + */ +static inline void callchain_cursor_reset(struct callchain_cursor *cursor) +{ + cursor->nr = 0; + cursor->last = &cursor->first; +} + +int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, + struct map *map, struct symbol *sym); + +/* Close a cursor writing session. Initialize for the reader */ +static inline void callchain_cursor_commit(struct callchain_cursor *cursor) +{ + cursor->curr = cursor->first; + cursor->pos = 0; +} + +/* Cursor reading iteration helpers */ +static inline struct callchain_cursor_node * +callchain_cursor_current(struct callchain_cursor *cursor) +{ + if (cursor->pos == cursor->nr) + return NULL; + + return cursor->curr; +} + +static inline void callchain_cursor_advance(struct callchain_cursor *cursor) +{ + cursor->curr = cursor->curr->next; + cursor->pos++; +} #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 32f4f1f..a438a06 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,7 +211,9 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he) +static bool hists__collapse_insert_entry(struct hists *self, + struct rb_root *root, + struct hist_entry *he) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -226,8 +228,11 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he) if (!cmp) { iter->period += he->period; - if (symbol_conf.use_callchain) - callchain_merge(iter->callchain, he->callchain); + if (symbol_conf.use_callchain) { + callchain_cursor_reset(&self->callchain_cursor); + callchain_merge(&self->callchain_cursor, iter->callchain, + he->callchain); + } hist_entry__free(he); return false; } @@ -262,7 +267,7 @@ void hists__collapse_resort(struct hists *self) next = rb_next(&n->rb_node); rb_erase(&n->rb_node, &self->entries); - if (collapse__insert_entry(&tmp, n)) + if (hists__collapse_insert_entry(self, &tmp, n)) hists__inc_nr_entries(self, n); } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ee78985..889559b 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -77,6 +77,8 @@ struct hists { u64 event_stream; u32 type; u16 col_len[HISTC_NR_COLS]; + /* Best would be to reuse the session callchain cursor */ + struct callchain_cursor callchain_cursor; }; struct hist_entry *__hists__add_entry(struct hists *self, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 105f00b..b58a48a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -242,17 +242,16 @@ static bool symbol__match_parent_regex(struct symbol *sym) return 0; } -struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, - struct thread *thread, - struct ip_callchain *chain, - struct symbol **parent) +int perf_session__resolve_callchain(struct perf_session *self, + struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent) { u8 cpumode = PERF_RECORD_MISC_USER; unsigned int i; - struct map_symbol *syms = calloc(chain->nr, sizeof(*syms)); + int err; - if (!syms) - return NULL; + callchain_cursor_reset(&self->callchain_cursor); for (i = 0; i < chain->nr; i++) { u64 ip = chain->ips[i]; @@ -281,12 +280,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, *parent = al.sym; if (!symbol_conf.use_callchain) break; - syms[i].map = al.map; - syms[i].sym = al.sym; } + + err = callchain_cursor_append(&self->callchain_cursor, + ip, al.map, al.sym); + if (err) + return err; } - return syms; + return 0; } static int process_event_synth_stub(event_t *event __used, diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index decd83f..e815468 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -51,7 +51,8 @@ struct perf_session { int cwdlen; char *cwd; struct ordered_samples ordered_samples; - char filename[0]; + struct callchain_cursor callchain_cursor; + char filename[0]; }; struct perf_event_ops; @@ -94,10 +95,10 @@ int __perf_session__process_events(struct perf_session *self, int perf_session__process_events(struct perf_session *self, struct perf_event_ops *event_ops); -struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, - struct thread *thread, - struct ip_callchain *chain, - struct symbol **parent); +int perf_session__resolve_callchain(struct perf_session *self, + struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent); bool perf_session__has_traces(struct perf_session *self, const char *msg); -- cgit v0.10.2 From f08c3154ac439c4b5762a40107d84e839e08fbc5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 14 Jan 2011 04:51:59 +0100 Subject: perf callchain: Rename cumul_hits into callchain_cumul_hits That makes the callchain API naming more consistent and reduce potential naming clashes. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1294977121-5700-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 53a49e0..4c6360f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -38,14 +38,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct callchain_node *rnode; - u64 chain_cumul = cumul_hits(chain); + u64 chain_cumul = callchain_cumul_hits(chain); while (*p) { u64 rnode_cumul; parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node); - rnode_cumul = cumul_hits(rnode); + rnode_cumul = callchain_cumul_hits(rnode); switch (mode) { case CHAIN_FLAT: @@ -104,7 +104,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node, chain_for_each_child(child, node) { __sort_chain_graph_abs(child, min_hit); - if (cumul_hits(child) >= min_hit) + if (callchain_cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_ABS); } @@ -129,7 +129,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node, chain_for_each_child(child, node) { __sort_chain_graph_rel(child, min_percent); - if (cumul_hits(child) >= min_hit) + if (callchain_cumul_hits(child) >= min_hit) rb_insert_callchain(&node->rb_root, child, CHAIN_GRAPH_REL); } @@ -270,7 +270,7 @@ split_add_child(struct callchain_node *parent, /* split the hits */ new->hit = parent->hit; new->children_hit = parent->children_hit; - parent->children_hit = cumul_hits(new); + parent->children_hit = callchain_cumul_hits(new); new->val_nr = parent->val_nr - idx_local; parent->val_nr = idx_local; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index d74a19a..07f71e3 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -82,7 +82,7 @@ static inline void callchain_init(struct callchain_root *root) root->max_depth = 0; } -static inline u64 cumul_hits(struct callchain_node *node) +static inline u64 callchain_cumul_hits(struct callchain_node *node) { return node->hit + node->children_hit; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a438a06..02ed318 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -430,7 +430,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, u64 cumul; child = rb_entry(node, struct callchain_node, rb_node); - cumul = cumul_hits(child); + cumul = callchain_cumul_hits(child); remaining -= cumul; /* diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 60c463c..8642823 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -377,7 +377,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self, while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); struct rb_node *next = rb_next(node); - u64 cumul = cumul_hits(child); + u64 cumul = callchain_cumul_hits(child); struct callchain_list *chain; char folded_sign = ' '; int first = true; -- cgit v0.10.2 From 16537f1355017a285b904bfb6bf767464293e69c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 14 Jan 2011 04:52:00 +0100 Subject: perf callchain: Rename register_callchain_param into callchain_register_param To make the callchain API naming more consistent. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1294977121-5700-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c95599a..f6a4349 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -221,7 +221,7 @@ static int perf_session__setup_sample_type(struct perf_session *self) } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; - if (register_callchain_param(&callchain_param) < 0) { + if (callchain_register_param(&callchain_param) < 0) { fprintf(stderr, "Can't register callchain" " params\n"); return -EINVAL; @@ -423,7 +423,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg, if (tok2) callchain_param.print_limit = strtod(tok2, &endptr); setup: - if (register_callchain_param(&callchain_param) < 0) { + if (callchain_register_param(&callchain_param) < 0) { fprintf(stderr, "Can't register callchain params\n"); return -1; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 4c6360f..5b3f09d 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -143,7 +143,7 @@ sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root, rb_root->rb_node = chain_root->node.rb_root.rb_node; } -int register_callchain_param(struct callchain_param *param) +int callchain_register_param(struct callchain_param *param) { switch (param->mode) { case CHAIN_GRAPH_ABS: diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 07f71e3..2bb5403 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -87,7 +87,7 @@ static inline u64 callchain_cumul_hits(struct callchain_node *node) return node->hit + node->children_hit; } -int register_callchain_param(struct callchain_param *param); +int callchain_register_param(struct callchain_param *param); int callchain_append(struct callchain_root *root, struct callchain_cursor *cursor, u64 period); -- cgit v0.10.2 From 529363b76929beb85b81439c61063130af046a21 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 14 Jan 2011 04:52:01 +0100 Subject: perf callchain: Don't give arbitrary gender to callchain tree nodes Some little callchain tree nodes shyly asked me if they can have sisters. How cute! Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1294977121-5700-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 5b3f09d..f8c66d1 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -26,10 +26,10 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event) } #define chain_for_each_child(child, parent) \ - list_for_each_entry(child, &parent->children, brothers) + list_for_each_entry(child, &parent->children, siblings) #define chain_for_each_child_safe(child, next, parent) \ - list_for_each_entry_safe(child, next, &parent->children, brothers) + list_for_each_entry_safe(child, next, &parent->children, siblings) static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, @@ -189,7 +189,7 @@ create_child(struct callchain_node *parent, bool inherit_children) chain_for_each_child(next, new) next->parent = new; } - list_add_tail(&new->brothers, &parent->children); + list_add_tail(&new->siblings, &parent->children); return new; } @@ -419,7 +419,7 @@ merge_chain_branch(struct callchain_cursor *cursor, if (err) break; - list_del(&child->brothers); + list_del(&child->siblings); free(child); } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 2bb5403..6713725 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -16,7 +16,7 @@ enum chain_mode { struct callchain_node { struct callchain_node *parent; - struct list_head brothers; + struct list_head siblings; struct list_head children; struct list_head val; struct rb_node rb_node; /* to sort nodes in an rbtree */ @@ -72,7 +72,7 @@ struct callchain_cursor { static inline void callchain_init(struct callchain_root *root) { - INIT_LIST_HEAD(&root->node.brothers); + INIT_LIST_HEAD(&root->node.siblings); INIT_LIST_HEAD(&root->node.children); INIT_LIST_HEAD(&root->node.val); -- cgit v0.10.2 From b0e8572f3b29c0760b66ba5627a6d5426c88c97d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 16 Jan 2011 17:39:15 -0200 Subject: perf top: Add native_safe_halt to skip symbols Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 58352ad..31fbaf3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -933,6 +933,7 @@ repeat: /* Tag samples to be skipped. */ static const char *skip_symbols[] = { "default_idle", + "native_safe_halt", "cpu_idle", "enter_idle", "exit_idle", -- cgit v0.10.2 From 4cc9cec636e7f78aba7f17606ac13cac07ea5787 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Jan 2011 21:45:58 +0900 Subject: perf probe: Introduce lines walker interface Introduce die_walk_lines() for walking on the line list of given die, and use it in line_range finder and probe point finder. Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Franck Bui-Huu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110113124558.22426.48170.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu [ committer note: s/%ld/%zd/ for a size_t nlines var that broke f14 x86 build] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index ab83b6a..508c017 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -458,6 +458,124 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem); } +/* Walker on lines (Note: line number will not be sorted) */ +typedef int (* line_walk_handler_t) (const char *fname, int lineno, + Dwarf_Addr addr, void *data); + +struct __line_walk_param { + line_walk_handler_t handler; + void *data; + int retval; +}; + +/* Walk on decl lines in given DIE */ +static int __die_walk_funclines(Dwarf_Die *sp_die, + line_walk_handler_t handler, void *data) +{ + const char *fname; + Dwarf_Addr addr; + int lineno, ret = 0; + + /* Handle function declaration line */ + fname = dwarf_decl_file(sp_die); + if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && + dwarf_entrypc(sp_die, &addr) == 0) { + ret = handler(fname, lineno, addr, data); + } + + return ret; +} + +static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) +{ + struct __line_walk_param *lw = data; + + lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data); + if (lw->retval != 0) + return DWARF_CB_ABORT; + + return DWARF_CB_OK; +} + +/* + * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on + * the lines inside the subprogram, otherwise PDIE must be a CU DIE. + */ +static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler, + void *data) +{ + Dwarf_Lines *lines; + Dwarf_Line *line; + Dwarf_Addr addr; + const char *fname; + int lineno, ret = 0; + Dwarf_Die die_mem, *cu_die; + size_t nlines, i; + + /* Get the CU die */ + if (dwarf_tag(pdie) == DW_TAG_subprogram) + cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL); + else + cu_die = pdie; + if (!cu_die) { + pr_debug2("Failed to get CU from subprogram\n"); + return -EINVAL; + } + + /* Get lines list in the CU */ + if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) { + pr_debug2("Failed to get source lines on this CU.\n"); + return -ENOENT; + } + pr_debug2("Get %zd lines from this CU\n", nlines); + + /* Walk on the lines on lines list */ + for (i = 0; i < nlines; i++) { + line = dwarf_onesrcline(lines, i); + if (line == NULL || + dwarf_lineno(line, &lineno) != 0 || + dwarf_lineaddr(line, &addr) != 0) { + pr_debug2("Failed to get line info. " + "Possible error in debuginfo.\n"); + continue; + } + /* Filter lines based on address */ + if (pdie != cu_die) + /* + * Address filtering + * The line is included in given function, and + * no inline block includes it. + */ + if (!dwarf_haspc(pdie, addr) || + die_find_inlinefunc(pdie, addr, &die_mem)) + continue; + /* Get source line */ + fname = dwarf_linesrc(line, NULL, NULL); + + ret = handler(fname, lineno, addr, data); + if (ret != 0) + return ret; + } + + /* + * Dwarf lines doesn't include function declarations and inlined + * subroutines. We have to check functions list or given function. + */ + if (pdie != cu_die) + ret = __die_walk_funclines(pdie, handler, data); + else { + struct __line_walk_param param = { + .handler = handler, + .data = data, + .retval = 0, + }; + dwarf_getfuncs(cu_die, __die_walk_culines_cb, ¶m, 0); + ret = param.retval; + } + + return ret; +} + struct __find_variable_param { const char *name; Dwarf_Addr addr; @@ -1050,43 +1168,26 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) return ret; } -/* Find probe point from its line number */ -static int find_probe_point_by_line(struct probe_finder *pf) +static int probe_point_line_walker(const char *fname, int lineno, + Dwarf_Addr addr, void *data) { - Dwarf_Lines *lines; - Dwarf_Line *line; - size_t nlines, i; - Dwarf_Addr addr; - int lineno; - int ret = 0; + struct probe_finder *pf = data; + int ret; - if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { - pr_warning("No source lines found.\n"); - return -ENOENT; - } + if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0) + return 0; - for (i = 0; i < nlines && ret == 0; i++) { - line = dwarf_onesrcline(lines, i); - if (dwarf_lineno(line, &lineno) != 0 || - lineno != pf->lno) - continue; + pf->addr = addr; + ret = call_probe_finder(NULL, pf); - /* TODO: Get fileno from line, but how? */ - if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0) - continue; - - if (dwarf_lineaddr(line, &addr) != 0) { - pr_warning("Failed to get the address of the line.\n"); - return -ENOENT; - } - pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n", - (int)i, lineno, (uintmax_t)addr); - pf->addr = addr; + /* Continue if no error, because the line will be in inline function */ + return ret < 0 ?: 0; +} - ret = call_probe_finder(NULL, pf); - /* Continuing, because target line might be inlined. */ - } - return ret; +/* Find probe point from its line number */ +static int find_probe_point_by_line(struct probe_finder *pf) +{ + return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf); } /* Find lines which match lazy pattern */ @@ -1140,15 +1241,31 @@ out_close: return nlines; } +static int probe_point_lazy_walker(const char *fname, int lineno, + Dwarf_Addr addr, void *data) +{ + struct probe_finder *pf = data; + int ret; + + if (!line_list__has_line(&pf->lcache, lineno) || + strtailcmp(fname, pf->fname) != 0) + return 0; + + pr_debug("Probe line found: line:%d addr:0x%llx\n", + lineno, (unsigned long long)addr); + pf->addr = addr; + ret = call_probe_finder(NULL, pf); + + /* + * Continue if no error, because the lazy pattern will match + * to other lines + */ + return ret < 0 ?: 0; +} + /* Find probe points from lazy pattern */ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { - Dwarf_Lines *lines; - Dwarf_Line *line; - size_t nlines, i; - Dwarf_Addr addr; - Dwarf_Die die_mem; - int lineno; int ret = 0; if (list_empty(&pf->lcache)) { @@ -1162,45 +1279,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) return ret; } - if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { - pr_warning("No source lines found.\n"); - return -ENOENT; - } - - for (i = 0; i < nlines && ret >= 0; i++) { - line = dwarf_onesrcline(lines, i); - - if (dwarf_lineno(line, &lineno) != 0 || - !line_list__has_line(&pf->lcache, lineno)) - continue; - - /* TODO: Get fileno from line, but how? */ - if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0) - continue; - - if (dwarf_lineaddr(line, &addr) != 0) { - pr_debug("Failed to get the address of line %d.\n", - lineno); - continue; - } - if (sp_die) { - /* Address filtering 1: does sp_die include addr? */ - if (!dwarf_haspc(sp_die, addr)) - continue; - /* Address filtering 2: No child include addr? */ - if (die_find_inlinefunc(sp_die, addr, &die_mem)) - continue; - } - - pr_debug("Probe line found: line[%d]:%d addr:0x%llx\n", - (int)i, lineno, (unsigned long long)addr); - pf->addr = addr; - - ret = call_probe_finder(sp_die, pf); - /* Continuing, because target line might be inlined. */ - } - /* TODO: deallocate lines, but how? */ - return ret; + return die_walk_lines(sp_die, probe_point_lazy_walker, pf); } /* Callback parameter with return value */ @@ -1644,91 +1723,28 @@ static int line_range_add_line(const char *src, unsigned int lineno, return line_list__add_line(&lr->line_list, lineno); } -/* Search function declaration lines */ -static int line_range_funcdecl_cb(Dwarf_Die *sp_die, void *data) +static int line_range_walk_cb(const char *fname, int lineno, + Dwarf_Addr addr __used, + void *data) { - struct dwarf_callback_param *param = data; - struct line_finder *lf = param->data; - const char *src; - int lineno; - - src = dwarf_decl_file(sp_die); - if (src && strtailcmp(src, lf->fname) != 0) - return DWARF_CB_OK; + struct line_finder *lf = data; - if (dwarf_decl_line(sp_die, &lineno) != 0 || + if ((strtailcmp(fname, lf->fname) != 0) || (lf->lno_s > lineno || lf->lno_e < lineno)) - return DWARF_CB_OK; + return 0; - param->retval = line_range_add_line(src, lineno, lf->lr); - if (param->retval < 0) - return DWARF_CB_ABORT; - return DWARF_CB_OK; -} + if (line_range_add_line(fname, lineno, lf->lr) < 0) + return -EINVAL; -static int find_line_range_func_decl_lines(struct line_finder *lf) -{ - struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0}; - dwarf_getfuncs(&lf->cu_die, line_range_funcdecl_cb, ¶m, 0); - return param.retval; + return 0; } /* Find line range from its line number */ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) { - Dwarf_Lines *lines; - Dwarf_Line *line; - size_t nlines, i; - Dwarf_Addr addr; - int lineno, ret = 0; - const char *src; - Dwarf_Die die_mem; - - line_list__init(&lf->lr->line_list); - if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) { - pr_warning("No source lines found.\n"); - return -ENOENT; - } - - /* Search probable lines on lines list */ - for (i = 0; i < nlines; i++) { - line = dwarf_onesrcline(lines, i); - if (dwarf_lineno(line, &lineno) != 0 || - (lf->lno_s > lineno || lf->lno_e < lineno)) - continue; - - if (sp_die) { - /* Address filtering 1: does sp_die include addr? */ - if (dwarf_lineaddr(line, &addr) != 0 || - !dwarf_haspc(sp_die, addr)) - continue; - - /* Address filtering 2: No child include addr? */ - if (die_find_inlinefunc(sp_die, addr, &die_mem)) - continue; - } - - /* TODO: Get fileno from line, but how? */ - src = dwarf_linesrc(line, NULL, NULL); - if (strtailcmp(src, lf->fname) != 0) - continue; - - ret = line_range_add_line(src, lineno, lf->lr); - if (ret < 0) - return ret; - } + int ret; - /* - * Dwarf lines doesn't include function declarations. We have to - * check functions list or given function. - */ - if (sp_die) { - src = dwarf_decl_file(sp_die); - if (src && dwarf_decl_line(sp_die, &lineno) == 0 && - (lf->lno_s <= lineno && lf->lno_e >= lineno)) - ret = line_range_add_line(src, lineno, lf->lr); - } else - ret = find_line_range_func_decl_lines(lf); + ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf); /* Update status */ if (ret >= 0) @@ -1758,9 +1774,6 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) struct line_finder *lf = param->data; struct line_range *lr = lf->lr; - pr_debug("find (%llx) %s\n", - (unsigned long long)dwarf_dieoffset(sp_die), - dwarf_diename(sp_die)); if (dwarf_tag(sp_die) == DW_TAG_subprogram && die_compare_name(sp_die, lr->function)) { lf->fname = dwarf_decl_file(sp_die); -- cgit v0.10.2 From 5069ed86be3c2f28bcdf7fae1374ec0c325aafba Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Jan 2011 21:46:05 +0900 Subject: perf probe: Enable to put probe inline function call site Enable to put probe inline function call site. This will increase line-based probe-ability. $ ./perf probe -L schedule:48 pre_schedule(rq, prev); 50 if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); put_prev_task(rq, prev); next = pick_next_task(rq); 56 if (likely(prev != next)) { sched_info_switch(prev, next); trace_sched_switch_out(prev, next); perf_event_task_sched_out(prev, next); $ ./perf probe -L schedule:48 48 pre_schedule(rq, prev); 50 if (unlikely(!rq->nr_running)) 51 idle_balance(cpu, rq); 53 put_prev_task(rq, prev); 54 next = pick_next_task(rq); 56 if (likely(prev != next)) { 57 sched_info_switch(prev, next); 58 trace_sched_switch_out(prev, next); 59 perf_event_task_sched_out(prev, next); Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Franck Bui-Huu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110113124604.22426.48873.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 508c017..69215bf 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -280,6 +280,19 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname) return name ? (strcmp(tname, name) == 0) : false; } +/* Get callsite line number of inline-function instance */ +static int die_get_call_lineno(Dwarf_Die *in_die) +{ + Dwarf_Attribute attr; + Dwarf_Word ret; + + if (!dwarf_attr(in_die, DW_AT_call_line, &attr)) + return -ENOENT; + + dwarf_formudata(&attr, &ret); + return (int)ret; +} + /* Get type die */ static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) { @@ -463,27 +476,54 @@ typedef int (* line_walk_handler_t) (const char *fname, int lineno, Dwarf_Addr addr, void *data); struct __line_walk_param { + const char *fname; line_walk_handler_t handler; void *data; int retval; }; -/* Walk on decl lines in given DIE */ +static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) +{ + struct __line_walk_param *lw = data; + Dwarf_Addr addr; + int lineno; + + if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { + lineno = die_get_call_lineno(in_die); + if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { + lw->retval = lw->handler(lw->fname, lineno, addr, + lw->data); + if (lw->retval != 0) + return DIE_FIND_CB_FOUND; + } + } + return DIE_FIND_CB_SIBLING; +} + +/* Walk on lines of blocks included in given DIE */ static int __die_walk_funclines(Dwarf_Die *sp_die, line_walk_handler_t handler, void *data) { - const char *fname; + struct __line_walk_param lw = { + .handler = handler, + .data = data, + .retval = 0, + }; + Dwarf_Die die_mem; Dwarf_Addr addr; - int lineno, ret = 0; + int lineno; /* Handle function declaration line */ - fname = dwarf_decl_file(sp_die); - if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && + lw.fname = dwarf_decl_file(sp_die); + if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 && dwarf_entrypc(sp_die, &addr) == 0) { - ret = handler(fname, lineno, addr, data); + lw.retval = handler(lw.fname, lineno, addr, data); + if (lw.retval != 0) + goto done; } - - return ret; + die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem); +done: + return lw.retval; } static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) -- cgit v0.10.2 From e80711ca8512c8586da0c3e18e2f1caf73c88731 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Jan 2011 21:46:11 +0900 Subject: perf probe: Add --funcs to show available functions in symtab Add --funcs to show available functions in symtab. Originally this feature came from Srikar's uprobes patches ( http://lkml.org/lkml/2010/8/27/244 ) e.g. ... __ablkcipher_walk_complete __absent_pages_in_range __account_scheduler_latency __add_pages __alloc_pages_nodemask __alloc_percpu __alloc_reserved_percpu __alloc_skb __alloc_workqueue_key __any_online_cpu __ata_ehi_push_desc ... This also supports symbols in module, e.g. ... cleanup_module cpuid_maxphyaddr emulate_clts emulate_instruction emulate_int_real emulate_invlpg emulator_get_dr emulator_set_dr emulator_task_switch emulator_write_emulated emulator_write_phys fx_init ... Original-patch-from: Srikar Dronamraju Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Franck Bui-Huu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110113124611.22426.10835.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu [ committer note: Add missing elf.h for STB_GLOBAL that broke a RHEL4 build ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 86b797a..fcc51fe 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -73,6 +73,10 @@ OPTIONS (Only for --vars) Show external defined variables in addition to local variables. +-F:: +--funcs:: + Show available functions in given module or kernel. + -f:: --force:: Forcibly add events with existing name. diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index add163c..6cf708a 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -52,6 +52,7 @@ static struct { bool show_lines; bool show_vars; bool show_ext_vars; + bool show_funcs; bool mod_events; int nevents; struct perf_probe_event events[MAX_PROBES]; @@ -221,6 +222,8 @@ static const struct option options[] = { OPT__DRY_RUN(&probe_event_dry_run), OPT_INTEGER('\0', "max-probes", ¶ms.max_probe_points, "Set how many probe points can be found for a probe."), + OPT_BOOLEAN('F', "funcs", ¶ms.show_funcs, + "Show potential probe-able functions."), OPT_END() }; @@ -246,7 +249,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) params.max_probe_points = MAX_PROBES; if ((!params.nevents && !params.dellist && !params.list_events && - !params.show_lines)) + !params.show_lines && !params.show_funcs)) usage_with_options(probe_usage, options); /* @@ -267,12 +270,36 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) pr_err(" Error: Don't use --list with --vars.\n"); usage_with_options(probe_usage, options); } + if (params.show_funcs) { + pr_err(" Error: Don't use --list with --funcs.\n"); + usage_with_options(probe_usage, options); + } ret = show_perf_probe_events(); if (ret < 0) pr_err(" Error: Failed to show event list. (%d)\n", ret); return ret; } + if (params.show_funcs) { + if (params.nevents != 0 || params.dellist) { + pr_err(" Error: Don't use --funcs with" + " --add/--del.\n"); + usage_with_options(probe_usage, options); + } + if (params.show_lines) { + pr_err(" Error: Don't use --funcs with --line.\n"); + usage_with_options(probe_usage, options); + } + if (params.show_vars) { + pr_err(" Error: Don't use --funcs with --vars.\n"); + usage_with_options(probe_usage, options); + } + ret = show_available_funcs(params.target_module); + if (ret < 0) + pr_err(" Error: Failed to show functions." + " (%d)\n", ret); + return ret; + } #ifdef DWARF_SUPPORT if (params.show_lines) { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 6e29d9c..859d377 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -31,6 +31,7 @@ #include #include #include +#include #undef _GNU_SOURCE #include "util.h" @@ -111,7 +112,25 @@ static struct symbol *__find_kernel_function_by_name(const char *name, NULL); } -const char *kernel_get_module_path(const char *module) +static struct map *kernel_get_module_map(const char *module) +{ + struct rb_node *nd; + struct map_groups *grp = &machine.kmaps; + + if (!module) + module = "kernel"; + + for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) { + struct map *pos = rb_entry(nd, struct map, rb_node); + if (strncmp(pos->dso->short_name + 1, module, + pos->dso->short_name_len - 2) == 0) { + return pos; + } + } + return NULL; +} + +static struct dso *kernel_get_module_dso(const char *module) { struct dso *dso; struct map *map; @@ -141,7 +160,13 @@ const char *kernel_get_module_path(const char *module) } } found: - return dso->long_name; + return dso; +} + +const char *kernel_get_module_path(const char *module) +{ + struct dso *dso = kernel_get_module_dso(module); + return (dso) ? dso->long_name : NULL; } #ifdef DWARF_SUPPORT @@ -1913,3 +1938,42 @@ int del_perf_probe_events(struct strlist *dellist) return ret; } +/* + * If a symbol corresponds to a function with global binding return 0. + * For all others return 1. + */ +static int filter_non_global_functions(struct map *map __unused, + struct symbol *sym) +{ + if (sym->binding != STB_GLOBAL) + return 1; + + return 0; +} + +int show_available_funcs(const char *module) +{ + struct map *map; + int ret; + + setup_pager(); + + ret = init_vmlinux(); + if (ret < 0) + return ret; + + map = kernel_get_module_map(module); + if (!map) { + pr_err("Failed to find %s map.\n", (module) ? : "kernel"); + return -EINVAL; + } + if (map__load(map, filter_non_global_functions)) { + pr_err("Failed to load map.\n"); + return -EINVAL; + } + if (!dso__sorted_by_name(map->dso, map->type)) + dso__sort_by_name(map->dso, map->type); + + dso__fprintf_symbols_by_name(map->dso, map->type, stdout); + return 0; +} diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 5accbed..1fb4f18 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -127,6 +127,7 @@ extern int show_line_range(struct line_range *lr, const char *module); extern int show_available_vars(struct perf_probe_event *pevs, int npevs, int max_probe_points, const char *module, bool externs); +extern int show_available_funcs(const char *module); /* Maximum index number of event-name postfix */ -- cgit v0.10.2 From d7065adb9b4f3384c2615f0a3dbdb6c3aae1eb18 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Sun, 16 Jan 2011 17:14:45 +0100 Subject: perf record: auto detect when stdout is a pipe This patch gives the ability to 'perf record' to detect when its stdout has been redirected to a pipe. There's now no more need to add '-o -' switch in this case. However '-o ' option has always precedence, that is if specified and stdout has been connected via a pipe then the output will go into the specified output. LKML-Reference: Signed-off-by: Franck Bui-Huu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 45a3689..1346d42 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -48,7 +48,7 @@ static unsigned int user_freq = UINT_MAX; static int freq = 1000; static int output; static int pipe_output = 0; -static const char *output_name = "perf.data"; +static const char *output_name = NULL; static int group = 0; static int realtime_prio = 0; static bool nodelay = false; @@ -497,18 +497,26 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } - if (!strcmp(output_name, "-")) - pipe_output = 1; - else if (!stat(output_name, &st) && st.st_size) { - if (write_mode == WRITE_FORCE) { - char oldname[PATH_MAX]; - snprintf(oldname, sizeof(oldname), "%s.old", - output_name); - unlink(oldname); - rename(output_name, oldname); + if (!output_name) { + if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode)) + pipe_output = 1; + else + output_name = "perf.data"; + } + if (output_name) { + if (!strcmp(output_name, "-")) + pipe_output = 1; + else if (!stat(output_name, &st) && st.st_size) { + if (write_mode == WRITE_FORCE) { + char oldname[PATH_MAX]; + snprintf(oldname, sizeof(oldname), "%s.old", + output_name); + unlink(oldname); + rename(output_name, oldname); + } + } else if (write_mode == WRITE_APPEND) { + write_mode = WRITE_FORCE; } - } else if (write_mode == WRITE_APPEND) { - write_mode = WRITE_FORCE; } flags = O_CREAT|O_RDWR; -- cgit v0.10.2 From 17ea1b70a87e28457821318341bead2b45563092 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 17 Jan 2011 14:40:46 -0200 Subject: perf tools: Pass the struct opt to the wildcard parsing routine It is needed because it will call parse_event for each tracepoint name that matches, and we pass the perf_evlist via opt->value. Problem introduced in 4503fdd where my assumption about opt being always non NULL made me not look at callers of parse_events outside builtin-*.c. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d3086ce..cf082da 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -446,8 +446,8 @@ parse_single_tracepoint_event(char *sys_name, /* sys + ':' + event + ':' + flags*/ #define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128) static enum event_result -parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp, - char *flags) +parse_multiple_tracepoint_event(const struct option *opt, char *sys_name, + const char *evt_exp, char *flags) { char evt_path[MAXPATHLEN]; struct dirent *evt_ent; @@ -480,15 +480,16 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp, if (len < 0) return EVT_FAILED; - if (parse_events(NULL, event_opt, 0)) + if (parse_events(opt, event_opt, 0)) return EVT_FAILED; } return EVT_HANDLED_ALL; } -static enum event_result parse_tracepoint_event(const char **strp, - struct perf_event_attr *attr) +static enum event_result +parse_tracepoint_event(const struct option *opt, const char **strp, + struct perf_event_attr *attr) { const char *evt_name; char *flags = NULL, *comma_loc; @@ -527,7 +528,7 @@ static enum event_result parse_tracepoint_event(const char **strp, return EVT_FAILED; if (strpbrk(evt_name, "*?")) { *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */ - return parse_multiple_tracepoint_event(sys_name, evt_name, + return parse_multiple_tracepoint_event(opt, sys_name, evt_name, flags); } else { return parse_single_tracepoint_event(sys_name, evt_name, @@ -737,11 +738,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr) * Symbolic names are (almost) exactly matched. */ static enum event_result -parse_event_symbols(const char **str, struct perf_event_attr *attr) +parse_event_symbols(const struct option *opt, const char **str, + struct perf_event_attr *attr) { enum event_result ret; - ret = parse_tracepoint_event(str, attr); + ret = parse_tracepoint_event(opt, str, attr); if (ret != EVT_FAILED) goto modifier; @@ -783,7 +785,7 @@ int parse_events(const struct option *opt, const char *str, int unset __used) for (;;) { memset(&attr, 0, sizeof(attr)); - ret = parse_event_symbols(&str, &attr); + ret = parse_event_symbols(opt, &str, &attr); if (ret == EVT_FAILED) return -1; -- cgit v0.10.2 From fd78260b5376173faeb17127bd63b3c99a8e8bfb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Jan 2011 15:15:24 -0200 Subject: perf threads: Move thread_map to separate file To untangle it from struct thread handling, that is tied to symbols, etc. Right now in the python bindings I'm working on I need just a subset of the util/ files, untangling it allows me to do that. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f20bc6f..638e8e1 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -426,6 +426,7 @@ LIB_H += util/values.h LIB_H += util/sort.h LIB_H += util/hist.h LIB_H += util/thread.h +LIB_H += util/thread_map.h LIB_H += util/trace-event.h LIB_H += util/probe-finder.h LIB_H += util/probe-event.h @@ -471,6 +472,7 @@ LIB_OBJS += $(OUTPUT)util/map.o LIB_OBJS += $(OUTPUT)util/pstack.o LIB_OBJS += $(OUTPUT)util/session.o LIB_OBJS += $(OUTPUT)util/thread.o +LIB_OBJS += $(OUTPUT)util/thread_map.o LIB_OBJS += $(OUTPUT)util/trace-event-parse.o LIB_OBJS += $(OUTPUT)util/trace-event-read.o LIB_OBJS += $(OUTPUT)util/trace-event-info.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1346d42..d788630 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -24,6 +24,7 @@ #include "util/session.h" #include "util/symbol.h" #include "util/cpumap.h" +#include "util/thread_map.h" #include #include diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e2a2d02..8906adf 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -49,6 +49,7 @@ #include "util/header.h" #include "util/cpumap.h" #include "util/thread.h" +#include "util/thread_map.h" #include #include diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 4fd3453..dc91ee0 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -12,7 +12,7 @@ #include "util/parse-events.h" #include "util/session.h" #include "util/symbol.h" -#include "util/thread.h" +#include "util/thread_map.h" static long page_size; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 31fbaf3..d0b16d9 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -26,6 +26,7 @@ #include "util/session.h" #include "util/symbol.h" #include "util/thread.h" +#include "util/thread_map.h" #include "util/util.h" #include #include "util/parse-options.h" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ee49035..9a6d942 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3,7 +3,7 @@ #include "../perf.h" #include "util.h" #include "cpumap.h" -#include "thread.h" +#include "thread_map.h" #include #include diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 00f4ead..d5d3b22 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -7,61 +7,6 @@ #include "util.h" #include "debug.h" -/* Skip "." and ".." directories */ -static int filter(const struct dirent *dir) -{ - if (dir->d_name[0] == '.') - return 0; - else - return 1; -} - -struct thread_map *thread_map__new_by_pid(pid_t pid) -{ - struct thread_map *threads; - char name[256]; - int items; - struct dirent **namelist = NULL; - int i; - - sprintf(name, "/proc/%d/task", pid); - items = scandir(name, &namelist, filter, NULL); - if (items <= 0) - return NULL; - - threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); - if (threads != NULL) { - for (i = 0; i < items; i++) - threads->map[i] = atoi(namelist[i]->d_name); - threads->nr = items; - } - - for (i=0; imap[0] = tid; - threads->nr = 1; - } - - return threads; -} - -struct thread_map *thread_map__new(pid_t pid, pid_t tid) -{ - if (pid != -1) - return thread_map__new_by_pid(pid); - return thread_map__new_by_tid(tid); -} - static struct thread *thread__new(pid_t pid) { struct thread *self = zalloc(sizeof(*self)); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index d757410..e5f2401 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -18,24 +18,10 @@ struct thread { int comm_len; }; -struct thread_map { - int nr; - int map[]; -}; - struct perf_session; void thread__delete(struct thread *self); -struct thread_map *thread_map__new_by_pid(pid_t pid); -struct thread_map *thread_map__new_by_tid(pid_t tid); -struct thread_map *thread_map__new(pid_t pid, pid_t tid); - -static inline void thread_map__delete(struct thread_map *threads) -{ - free(threads); -} - int thread__set_comm(struct thread *self, const char *comm); int thread__comm_len(struct thread *self); struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c new file mode 100644 index 0000000..a5df131 --- /dev/null +++ b/tools/perf/util/thread_map.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include "thread_map.h" + +/* Skip "." and ".." directories */ +static int filter(const struct dirent *dir) +{ + if (dir->d_name[0] == '.') + return 0; + else + return 1; +} + +struct thread_map *thread_map__new_by_pid(pid_t pid) +{ + struct thread_map *threads; + char name[256]; + int items; + struct dirent **namelist = NULL; + int i; + + sprintf(name, "/proc/%d/task", pid); + items = scandir(name, &namelist, filter, NULL); + if (items <= 0) + return NULL; + + threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); + if (threads != NULL) { + for (i = 0; i < items; i++) + threads->map[i] = atoi(namelist[i]->d_name); + threads->nr = items; + } + + for (i=0; imap[0] = tid; + threads->nr = 1; + } + + return threads; +} + +struct thread_map *thread_map__new(pid_t pid, pid_t tid) +{ + if (pid != -1) + return thread_map__new_by_pid(pid); + return thread_map__new_by_tid(tid); +} + +void thread_map__delete(struct thread_map *threads) +{ + free(threads); +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h new file mode 100644 index 0000000..3cb9073 --- /dev/null +++ b/tools/perf/util/thread_map.h @@ -0,0 +1,15 @@ +#ifndef __PERF_THREAD_MAP_H +#define __PERF_THREAD_MAP_H + +#include + +struct thread_map { + int nr; + int map[]; +}; + +struct thread_map *thread_map__new_by_pid(pid_t pid); +struct thread_map *thread_map__new_by_tid(pid_t tid); +struct thread_map *thread_map__new(pid_t pid, pid_t tid); +void thread_map__delete(struct thread_map *threads); +#endif /* __PERF_THREAD_MAP_H */ -- cgit v0.10.2 From d0dd74e853a0a6f37e8061d6d50be41c7034c54c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 21 Jan 2011 13:46:41 -0200 Subject: perf tools: Move event__parse_sample to evsel.c To avoid linking more stuff in the python binding I'm working on, future csets will make the sample type be taken from the evsel itself, but for that we need to first have one file per cpu and per sample_type, not a single perf.data file. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index dc91ee0..231e3e2 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -10,7 +10,6 @@ #include "util/evlist.h" #include "util/parse-options.h" #include "util/parse-events.h" -#include "util/session.h" #include "util/symbol.h" #include "util/thread_map.h" @@ -457,7 +456,6 @@ static int test__basic_mmap(void) int err = -1; event_t *event; struct thread_map *threads; - struct perf_session session; struct cpu_map *cpus; struct perf_evlist *evlist; struct perf_event_attr attr = { @@ -521,13 +519,6 @@ static int test__basic_mmap(void) attr.wakeup_events = 1; attr.sample_period = 1; - /* - * FIXME: use evsel->attr.sample_type in event__parse_sample. - * This will nicely remove the requirement that we have - * all the events with the same sample_type. - */ - session.sample_type = attr.sample_type; - for (i = 0; i < nsyscalls; ++i) { attr.config = ids[i]; evsels[i] = perf_evsel__new(&attr, i); @@ -567,7 +558,7 @@ static int test__basic_mmap(void) goto out_munmap; } - event__parse_sample(event, &session, &sample); + event__parse_sample(event, attr.sample_type, false, &sample); evsel = perf_evlist__id2evsel(evlist, sample.id); if (evsel == NULL) { pr_debug("event with id %" PRIu64 diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d0b16d9..ce2e50c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1106,7 +1106,7 @@ static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu) event_t *event; while ((event = perf_evlist__read_on_cpu(evsel_list, cpu)) != NULL) { - event__parse_sample(event, self, &sample); + perf_session__parse_sample(self, event, &sample); if (event->header.type == PERF_RECORD_SAMPLE) event__process_sample(event, &sample, self); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1478ab4..e4db8b8 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -826,128 +826,3 @@ out_filtered: al->filtered = true; return 0; } - -static int event__parse_id_sample(const event_t *event, - struct perf_session *session, - struct sample_data *sample) -{ - const u64 *array; - u64 type; - - sample->cpu = sample->pid = sample->tid = -1; - sample->stream_id = sample->id = sample->time = -1ULL; - - if (!session->sample_id_all) - return 0; - - array = event->sample.array; - array += ((event->header.size - - sizeof(event->header)) / sizeof(u64)) - 1; - type = session->sample_type; - - if (type & PERF_SAMPLE_CPU) { - u32 *p = (u32 *)array; - sample->cpu = *p; - array--; - } - - if (type & PERF_SAMPLE_STREAM_ID) { - sample->stream_id = *array; - array--; - } - - if (type & PERF_SAMPLE_ID) { - sample->id = *array; - array--; - } - - if (type & PERF_SAMPLE_TIME) { - sample->time = *array; - array--; - } - - if (type & PERF_SAMPLE_TID) { - u32 *p = (u32 *)array; - sample->pid = p[0]; - sample->tid = p[1]; - } - - return 0; -} - -int event__parse_sample(const event_t *event, struct perf_session *session, - struct sample_data *data) -{ - const u64 *array; - u64 type; - - if (event->header.type != PERF_RECORD_SAMPLE) - return event__parse_id_sample(event, session, data); - - array = event->sample.array; - type = session->sample_type; - - if (type & PERF_SAMPLE_IP) { - data->ip = event->ip.ip; - array++; - } - - if (type & PERF_SAMPLE_TID) { - u32 *p = (u32 *)array; - data->pid = p[0]; - data->tid = p[1]; - array++; - } - - if (type & PERF_SAMPLE_TIME) { - data->time = *array; - array++; - } - - if (type & PERF_SAMPLE_ADDR) { - data->addr = *array; - array++; - } - - data->id = -1ULL; - if (type & PERF_SAMPLE_ID) { - data->id = *array; - array++; - } - - if (type & PERF_SAMPLE_STREAM_ID) { - data->stream_id = *array; - array++; - } - - if (type & PERF_SAMPLE_CPU) { - u32 *p = (u32 *)array; - data->cpu = *p; - array++; - } else - data->cpu = -1; - - if (type & PERF_SAMPLE_PERIOD) { - data->period = *array; - array++; - } - - if (type & PERF_SAMPLE_READ) { - pr_debug("PERF_SAMPLE_READ is unsuported for now\n"); - return -1; - } - - if (type & PERF_SAMPLE_CALLCHAIN) { - data->callchain = (struct ip_callchain *)array; - array += 1 + data->callchain->nr; - } - - if (type & PERF_SAMPLE_RAW) { - u32 *p = (u32 *)array; - data->raw_size = *p; - p++; - data->raw_data = p; - } - - return 0; -} diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 2b7e919..d79e4ed 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -169,9 +169,10 @@ struct addr_location; int event__preprocess_sample(const event_t *self, struct perf_session *session, struct addr_location *al, struct sample_data *data, symbol_filter_t filter); -int event__parse_sample(const event_t *event, struct perf_session *session, - struct sample_data *sample); const char *event__get_event_name(unsigned int id); +int event__parse_sample(const event_t *event, u64 type, bool sample_id_all, + struct sample_data *sample); + #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 9a6d942..a85ae12 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -355,3 +355,121 @@ out_unmap: } return -1; } + +static int event__parse_id_sample(const event_t *event, u64 type, + struct sample_data *sample) +{ + const u64 *array = event->sample.array; + + array += ((event->header.size - + sizeof(event->header)) / sizeof(u64)) - 1; + + if (type & PERF_SAMPLE_CPU) { + u32 *p = (u32 *)array; + sample->cpu = *p; + array--; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + sample->stream_id = *array; + array--; + } + + if (type & PERF_SAMPLE_ID) { + sample->id = *array; + array--; + } + + if (type & PERF_SAMPLE_TIME) { + sample->time = *array; + array--; + } + + if (type & PERF_SAMPLE_TID) { + u32 *p = (u32 *)array; + sample->pid = p[0]; + sample->tid = p[1]; + } + + return 0; +} + +int event__parse_sample(const event_t *event, u64 type, bool sample_id_all, + struct sample_data *data) +{ + const u64 *array; + + data->cpu = data->pid = data->tid = -1; + data->stream_id = data->id = data->time = -1ULL; + + if (event->header.type != PERF_RECORD_SAMPLE) { + if (!sample_id_all) + return 0; + return event__parse_id_sample(event, type, data); + } + + array = event->sample.array; + + if (type & PERF_SAMPLE_IP) { + data->ip = event->ip.ip; + array++; + } + + if (type & PERF_SAMPLE_TID) { + u32 *p = (u32 *)array; + data->pid = p[0]; + data->tid = p[1]; + array++; + } + + if (type & PERF_SAMPLE_TIME) { + data->time = *array; + array++; + } + + if (type & PERF_SAMPLE_ADDR) { + data->addr = *array; + array++; + } + + data->id = -1ULL; + if (type & PERF_SAMPLE_ID) { + data->id = *array; + array++; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + data->stream_id = *array; + array++; + } + + if (type & PERF_SAMPLE_CPU) { + u32 *p = (u32 *)array; + data->cpu = *p; + array++; + } + + if (type & PERF_SAMPLE_PERIOD) { + data->period = *array; + array++; + } + + if (type & PERF_SAMPLE_READ) { + fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); + return -1; + } + + if (type & PERF_SAMPLE_CALLCHAIN) { + data->callchain = (struct ip_callchain *)array; + array += 1 + data->callchain->nr; + } + + if (type & PERF_SAMPLE_RAW) { + u32 *p = (u32 *)array; + data->raw_size = *p; + p++; + data->raw_data = p; + } + + return 0; +} diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b58a48a..e6a0740 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -496,7 +496,7 @@ static void flush_sample_queue(struct perf_session *s, if (iter->timestamp > limit) break; - event__parse_sample(iter->event, s, &sample); + perf_session__parse_sample(s, iter->event, &sample); perf_session_deliver_event(s, iter->event, &sample, ops, iter->file_offset); @@ -806,7 +806,7 @@ static int perf_session__process_event(struct perf_session *session, /* * For all kernel events we get the sample data */ - event__parse_sample(event, session, &sample); + perf_session__parse_sample(session, event, &sample); /* Preprocess sample records - precheck callchains */ if (perf_session__preprocess_sample(session, event, &sample)) diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index e815468..7823976 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -155,4 +155,13 @@ size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp) { return hists__fprintf_nr_events(&self->hists, fp); } + +static inline int perf_session__parse_sample(struct perf_session *session, + const event_t *event, + struct sample_data *sample) +{ + return event__parse_sample(event, session->sample_type, + session->sample_id_all, sample); +} + #endif /* __PERF_SESSION_H */ -- cgit v0.10.2 From ef1d1af28ca37fdbc2745da040529cd2953c1af5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Jan 2011 21:41:45 -0200 Subject: perf evsel: Introduce perf_evsel__{in,ex}it Out of the {con,des}structor, as in interpreted language bindings we will need to go back from the wrapper object to the real thing. In that case using container_of will save us to have an extra pointer in the perf_evsel struct. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4b3b84c..df0610e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -6,17 +6,21 @@ #include #include +void perf_evlist__init(struct perf_evlist *evlist) +{ + int i; + + for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) + INIT_HLIST_HEAD(&evlist->heads[i]); + INIT_LIST_HEAD(&evlist->entries); +} + struct perf_evlist *perf_evlist__new(void) { struct perf_evlist *evlist = zalloc(sizeof(*evlist)); - if (evlist != NULL) { - int i; - - for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) - INIT_HLIST_HEAD(&evlist->heads[i]); - INIT_LIST_HEAD(&evlist->entries); - } + if (evlist != NULL) + perf_evlist__init(evlist); return evlist; } @@ -33,11 +37,18 @@ static void perf_evlist__purge(struct perf_evlist *evlist) evlist->nr_entries = 0; } -void perf_evlist__delete(struct perf_evlist *evlist) +void perf_evlist__exit(struct perf_evlist *evlist) { - perf_evlist__purge(evlist); free(evlist->mmap); free(evlist->pollfd); + evlist->mmap = NULL; + evlist->pollfd = NULL; +} + +void perf_evlist__delete(struct perf_evlist *evlist) +{ + perf_evlist__purge(evlist); + perf_evlist__exit(evlist); free(evlist); } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 2871206..acbe48e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -24,6 +24,8 @@ struct perf_evlist { struct perf_evsel; struct perf_evlist *perf_evlist__new(void); +void perf_evlist__init(struct perf_evlist *evlist); +void perf_evlist__exit(struct perf_evlist *evlist); void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a85ae12..76ab553 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -14,15 +14,20 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->id, x, y) +void perf_evsel__init(struct perf_evsel *evsel, + struct perf_event_attr *attr, int idx) +{ + evsel->idx = idx; + evsel->attr = *attr; + INIT_LIST_HEAD(&evsel->node); +} + struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) { struct perf_evsel *evsel = zalloc(sizeof(*evsel)); - if (evsel != NULL) { - evsel->idx = idx; - evsel->attr = *attr; - INIT_LIST_HEAD(&evsel->node); - } + if (evsel != NULL) + perf_evsel__init(evsel, attr, idx); return evsel; } @@ -87,11 +92,16 @@ int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus) return evlist->mmap != NULL ? 0 : -ENOMEM; } -void perf_evsel__delete(struct perf_evsel *evsel) +void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); xyarray__delete(evsel->fd); xyarray__delete(evsel->id); +} + +void perf_evsel__delete(struct perf_evsel *evsel) +{ + perf_evsel__exit(evsel); free(evsel); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 667ee4e..7962e75 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -52,6 +52,9 @@ struct thread_map; struct perf_evlist; struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); +void perf_evsel__init(struct perf_evsel *evsel, + struct perf_event_attr *attr, int idx); +void perf_evsel__exit(struct perf_evsel *evsel); void perf_evsel__delete(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); -- cgit v0.10.2 From dda99116969142cc41e945a1047a419b937536af Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 21 Jan 2011 15:30:01 -0800 Subject: x86, perf: Change two init functions to static init_hw_perf_events() is called via early_initcall now. x86_pmu_event_init is x86_pmu member function. So we can change them to static. Signed-off-by: Yinghai Lu Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <4D3A16F9.109@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d977a2..4d98789 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1389,7 +1389,7 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } -int __init init_hw_perf_events(void) +static int __init init_hw_perf_events(void) { struct event_constraint *c; int err; @@ -1608,7 +1608,7 @@ out: return ret; } -int x86_pmu_event_init(struct perf_event *event) +static int x86_pmu_event_init(struct perf_event *event) { struct pmu *tmp; int err; -- cgit v0.10.2 From 68baa431ec2f14ba7510d4e79bceb6ceaf0d3b74 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jan 2011 23:15:30 +0900 Subject: perf tools: Add strfilter for general purpose string filter Add strfilter for general purpose string filter. Every filter rules are descrived by glob matching pattern and '!' prefix which means Logical NOT. A strfilter consists of those filter rules connected with '&' and '|'. A set of rules can be folded by using '(' and ')'. It also accepts spaces around rules and those operators. Format: ::= | "!" | | "(" ")" ::= "&" | "|" e.g.: "(add* | del*) & *timer" filter rules pass strings which start with add or del and end with timer. This will be used by perf probe --filter. Changes in V2: - Fix to check result of strdup() and strfilter__alloc(). - Encapsulate and simplify interfaces as like regex(3). Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Franck Bui-Huu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110120141530.25915.12673.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 638e8e1..eedcf95 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -417,6 +417,7 @@ LIB_H += util/help.h LIB_H += util/session.h LIB_H += util/strbuf.h LIB_H += util/strlist.h +LIB_H += util/strfilter.h LIB_H += util/svghelper.h LIB_H += util/run-command.h LIB_H += util/sigchain.h @@ -458,6 +459,7 @@ LIB_OBJS += $(OUTPUT)util/quote.o LIB_OBJS += $(OUTPUT)util/strbuf.o LIB_OBJS += $(OUTPUT)util/string.o LIB_OBJS += $(OUTPUT)util/strlist.o +LIB_OBJS += $(OUTPUT)util/strfilter.o LIB_OBJS += $(OUTPUT)util/usage.o LIB_OBJS += $(OUTPUT)util/wrapper.o LIB_OBJS += $(OUTPUT)util/sigchain.o diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c new file mode 100644 index 0000000..4064b7d --- /dev/null +++ b/tools/perf/util/strfilter.c @@ -0,0 +1,200 @@ +#include +#include "util.h" +#include "string.h" +#include "strfilter.h" + +/* Operators */ +static const char *OP_and = "&"; /* Logical AND */ +static const char *OP_or = "|"; /* Logical OR */ +static const char *OP_not = "!"; /* Logical NOT */ + +#define is_operator(c) ((c) == '|' || (c) == '&' || (c) == '!') +#define is_separator(c) (is_operator(c) || (c) == '(' || (c) == ')') + +static void strfilter_node__delete(struct strfilter_node *self) +{ + if (self) { + if (self->p && !is_operator(*self->p)) + free((char *)self->p); + strfilter_node__delete(self->l); + strfilter_node__delete(self->r); + free(self); + } +} + +void strfilter__delete(struct strfilter *self) +{ + if (self) { + strfilter_node__delete(self->root); + free(self); + } +} + +static const char *get_token(const char *s, const char **e) +{ + const char *p; + + while (isspace(*s)) /* Skip spaces */ + s++; + + if (*s == '\0') { + p = s; + goto end; + } + + p = s + 1; + if (!is_separator(*s)) { + /* End search */ +retry: + while (*p && !is_separator(*p) && !isspace(*p)) + p++; + /* Escape and special case: '!' is also used in glob pattern */ + if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) { + p++; + goto retry; + } + } +end: + *e = p; + return s; +} + +static struct strfilter_node *strfilter_node__alloc(const char *op, + struct strfilter_node *l, + struct strfilter_node *r) +{ + struct strfilter_node *ret = zalloc(sizeof(struct strfilter_node)); + + if (ret) { + ret->p = op; + ret->l = l; + ret->r = r; + } + + return ret; +} + +static struct strfilter_node *strfilter_node__new(const char *s, + const char **ep) +{ + struct strfilter_node root, *cur, *last_op; + const char *e; + + if (!s) + return NULL; + + memset(&root, 0, sizeof(root)); + last_op = cur = &root; + + s = get_token(s, &e); + while (*s != '\0' && *s != ')') { + switch (*s) { + case '&': /* Exchg last OP->r with AND */ + if (!cur->r || !last_op->r) + goto error; + cur = strfilter_node__alloc(OP_and, last_op->r, NULL); + if (!cur) + goto nomem; + last_op->r = cur; + last_op = cur; + break; + case '|': /* Exchg the root with OR */ + if (!cur->r || !root.r) + goto error; + cur = strfilter_node__alloc(OP_or, root.r, NULL); + if (!cur) + goto nomem; + root.r = cur; + last_op = cur; + break; + case '!': /* Add NOT as a leaf node */ + if (cur->r) + goto error; + cur->r = strfilter_node__alloc(OP_not, NULL, NULL); + if (!cur->r) + goto nomem; + cur = cur->r; + break; + case '(': /* Recursively parses inside the parenthesis */ + if (cur->r) + goto error; + cur->r = strfilter_node__new(s + 1, &s); + if (!s) + goto nomem; + if (!cur->r || *s != ')') + goto error; + e = s + 1; + break; + default: + if (cur->r) + goto error; + cur->r = strfilter_node__alloc(NULL, NULL, NULL); + if (!cur->r) + goto nomem; + cur->r->p = strndup(s, e - s); + if (!cur->r->p) + goto nomem; + } + s = get_token(e, &e); + } + if (!cur->r) + goto error; + *ep = s; + return root.r; +nomem: + s = NULL; +error: + *ep = s; + strfilter_node__delete(root.r); + return NULL; +} + +/* + * Parse filter rule and return new strfilter. + * Return NULL if fail, and *ep == NULL if memory allocation failed. + */ +struct strfilter *strfilter__new(const char *rules, const char **err) +{ + struct strfilter *ret = zalloc(sizeof(struct strfilter)); + const char *ep = NULL; + + if (ret) + ret->root = strfilter_node__new(rules, &ep); + + if (!ret || !ret->root || *ep != '\0') { + if (err) + *err = ep; + strfilter__delete(ret); + ret = NULL; + } + + return ret; +} + +static bool strfilter_node__compare(struct strfilter_node *self, + const char *str) +{ + if (!self || !self->p) + return false; + + switch (*self->p) { + case '|': /* OR */ + return strfilter_node__compare(self->l, str) || + strfilter_node__compare(self->r, str); + case '&': /* AND */ + return strfilter_node__compare(self->l, str) && + strfilter_node__compare(self->r, str); + case '!': /* NOT */ + return !strfilter_node__compare(self->r, str); + default: + return strglobmatch(str, self->p); + } +} + +/* Return true if STR matches the filter rules */ +bool strfilter__compare(struct strfilter *self, const char *str) +{ + if (!self) + return false; + return strfilter_node__compare(self->root, str); +} diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h new file mode 100644 index 0000000..00f58a7 --- /dev/null +++ b/tools/perf/util/strfilter.h @@ -0,0 +1,48 @@ +#ifndef __PERF_STRFILTER_H +#define __PERF_STRFILTER_H +/* General purpose glob matching filter */ + +#include +#include + +/* A node of string filter */ +struct strfilter_node { + struct strfilter_node *l; /* Tree left branche (for &,|) */ + struct strfilter_node *r; /* Tree right branche (for !,&,|) */ + const char *p; /* Operator or rule */ +}; + +/* String filter */ +struct strfilter { + struct strfilter_node *root; +}; + +/** + * strfilter__new - Create a new string filter + * @rules: Filter rule, which is a combination of glob expressions. + * @err: Pointer which points an error detected on @rules + * + * Parse @rules and return new strfilter. Return NULL if an error detected. + * In that case, *@err will indicate where it is detected, and *@err is NULL + * if a memory allocation is failed. + */ +struct strfilter *strfilter__new(const char *rules, const char **err); + +/** + * strfilter__compare - compare given string and a string filter + * @self: String filter + * @str: target string + * + * Compare @str and @self. Return true if the str match the rule + */ +bool strfilter__compare(struct strfilter *self, const char *str); + +/** + * strfilter__delete - delete a string filter + * @self: String filter to delete + * + * Delete @self. + */ +void strfilter__delete(struct strfilter *self); + +#endif -- cgit v0.10.2 From bd09d7b5efeb13965b6725b4a3e9944908bca9d2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jan 2011 23:15:39 +0900 Subject: perf probe: Add variable filter support Add filters support for available variable list. Default filter is "!__k???tab_*&!__crc_*" for filtering out automatically generated symbols. The format of filter rule is "[!]GLOBPATTERN", so you can use wild cards. If the filter rule starts with '!', matched variables are filter out. e.g.: # perf probe -V schedule --externs --filter=cpu* Available variables at schedule @ cpumask_var_t cpu_callout_mask cpumask_var_t cpu_core_map cpumask_var_t cpu_isolated_map cpumask_var_t cpu_sibling_map int cpu_number long unsigned int* cpu_bit_bitmap ... Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Chase Douglas Cc: Franck Bui-Huu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110120141539.25915.43401.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu [ committer note: Removed the elf.h include as it was fixed up in e80711c] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index fcc51fe..32fb18f 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -77,6 +77,12 @@ OPTIONS --funcs:: Show available functions in given module or kernel. +--filter=FILTER:: + (Only for --vars) Set filter for variables. FILTER is a combination of + glob pattern, see FILTER PATTERN for details. + Default FILTER is "!__k???tab_* & !__crc_*". + If several filters are specified, only the last filter is valid. + -f:: --force:: Forcibly add events with existing name. @@ -139,6 +145,14 @@ e.g. This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.) +FILTER PATTERN +-------------- + The filter pattern is a glob matching pattern(s) to filter variables. + In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")". + +e.g. + With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar". + With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out. EXAMPLES -------- diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 6cf708a..abb423e 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -36,6 +36,7 @@ #include "builtin.h" #include "util/util.h" #include "util/strlist.h" +#include "util/strfilter.h" #include "util/symbol.h" #include "util/debug.h" #include "util/debugfs.h" @@ -43,6 +44,7 @@ #include "util/probe-finder.h" #include "util/probe-event.h" +#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" #define MAX_PATH_LEN 256 /* Session management structure */ @@ -60,6 +62,7 @@ static struct { struct line_range line_range; const char *target_module; int max_probe_points; + struct strfilter *filter; } params; /* Parse an event definition. Note that any error must die. */ @@ -156,6 +159,27 @@ static int opt_show_vars(const struct option *opt __used, return ret; } + +static int opt_set_filter(const struct option *opt __used, + const char *str, int unset __used) +{ + const char *err; + + if (str) { + pr_debug2("Set filter: %s\n", str); + if (params.filter) + strfilter__delete(params.filter); + params.filter = strfilter__new(str, &err); + if (!params.filter) { + pr_err("Filter parse error at %ld.\n", err - str + 1); + pr_err("Source: \"%s\"\n", str); + pr_err(" %*c\n", (int)(err - str + 1), '^'); + return -EINVAL; + } + } + + return 0; +} #endif static const char * const probe_usage[] = { @@ -212,6 +236,10 @@ static const struct option options[] = { "Show accessible variables on PROBEDEF", opt_show_vars), OPT_BOOLEAN('\0', "externs", ¶ms.show_ext_vars, "Show external variables too (with --vars only)"), + OPT_CALLBACK('\0', "filter", NULL, + "[!]FILTER", "Set a variable filter (with --vars only)\n" + "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\")", + opt_set_filter), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING('s', "source", &symbol_conf.source_prefix, @@ -324,10 +352,16 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) " --add/--del.\n"); usage_with_options(probe_usage, options); } + if (!params.filter) + params.filter = strfilter__new(DEFAULT_VAR_FILTER, + NULL); + ret = show_available_vars(params.events, params.nevents, params.max_probe_points, params.target_module, + params.filter, params.show_ext_vars); + strfilter__delete(params.filter); if (ret < 0) pr_err(" Error: Failed to show vars. (%d)\n", ret); return ret; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 859d377..077e051 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -451,12 +451,14 @@ end: } static int show_available_vars_at(int fd, struct perf_probe_event *pev, - int max_vls, bool externs) + int max_vls, struct strfilter *_filter, + bool externs) { char *buf; - int ret, i; + int ret, i, nvars; struct str_node *node; struct variable_list *vls = NULL, *vl; + const char *var; buf = synthesize_perf_probe_point(&pev->point); if (!buf) @@ -464,36 +466,45 @@ static int show_available_vars_at(int fd, struct perf_probe_event *pev, pr_debug("Searching variables at %s\n", buf); ret = find_available_vars_at(fd, pev, &vls, max_vls, externs); - if (ret > 0) { - /* Some variables were found */ - fprintf(stdout, "Available variables at %s\n", buf); - for (i = 0; i < ret; i++) { - vl = &vls[i]; - /* - * A probe point might be converted to - * several trace points. - */ - fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol, - vl->point.offset); - free(vl->point.symbol); - if (vl->vars) { - strlist__for_each(node, vl->vars) + if (ret <= 0) { + pr_err("Failed to find variables at %s (%d)\n", buf, ret); + goto end; + } + /* Some variables are found */ + fprintf(stdout, "Available variables at %s\n", buf); + for (i = 0; i < ret; i++) { + vl = &vls[i]; + /* + * A probe point might be converted to + * several trace points. + */ + fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol, + vl->point.offset); + free(vl->point.symbol); + nvars = 0; + if (vl->vars) { + strlist__for_each(node, vl->vars) { + var = strchr(node->s, '\t') + 1; + if (strfilter__compare(_filter, var)) { fprintf(stdout, "\t\t%s\n", node->s); - strlist__delete(vl->vars); - } else - fprintf(stdout, "(No variables)\n"); + nvars++; + } + } + strlist__delete(vl->vars); } - free(vls); - } else - pr_err("Failed to find variables at %s (%d)\n", buf, ret); - + if (nvars == 0) + fprintf(stdout, "\t\t(No matched variables)\n"); + } + free(vls); +end: free(buf); return ret; } /* Show available variables on given probe point */ int show_available_vars(struct perf_probe_event *pevs, int npevs, - int max_vls, const char *module, bool externs) + int max_vls, const char *module, + struct strfilter *_filter, bool externs) { int i, fd, ret = 0; @@ -510,7 +521,8 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, setup_pager(); for (i = 0; i < npevs && ret >= 0; i++) - ret = show_available_vars_at(fd, &pevs[i], max_vls, externs); + ret = show_available_vars_at(fd, &pevs[i], max_vls, _filter, + externs); close(fd); return ret; @@ -556,7 +568,9 @@ int show_line_range(struct line_range *lr __unused, const char *module __unused) int show_available_vars(struct perf_probe_event *pevs __unused, int npevs __unused, int max_vls __unused, - const char *module __unused, bool externs __unused) + const char *module __unused, + struct strfilter *filter __unused, + bool externs __unused) { pr_warning("Debuginfo-analysis is not supported.\n"); return -ENOSYS; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 1fb4f18..4e80b2b 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -3,6 +3,7 @@ #include #include "strlist.h" +#include "strfilter.h" extern bool probe_event_dry_run; @@ -126,7 +127,7 @@ extern int show_perf_probe_events(void); extern int show_line_range(struct line_range *lr, const char *module); extern int show_available_vars(struct perf_probe_event *pevs, int npevs, int max_probe_points, const char *module, - bool externs); + struct strfilter *filter, bool externs); extern int show_available_funcs(const char *module); -- cgit v0.10.2 From 3c42258c9a4db70133fa6946a275b62a16792bb5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Jan 2011 23:15:45 +0900 Subject: perf probe: Add filters support for available functions Add filters support for available function list. Default filter is "!_*" for filtering out local-purpose symbols. e.g.: # perf probe --filter="add*" -F add_disk add_disk_randomness add_input_randomness add_interrupt_randomness add_memory add_page_to_unevictable_list add_page_wait_queue ... Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Chase Douglas Cc: Franck Bui-Huu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110120141545.25915.85930.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 32fb18f..81c3220 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -78,10 +78,11 @@ OPTIONS Show available functions in given module or kernel. --filter=FILTER:: - (Only for --vars) Set filter for variables. FILTER is a combination of - glob pattern, see FILTER PATTERN for details. - Default FILTER is "!__k???tab_* & !__crc_*". - If several filters are specified, only the last filter is valid. + (Only for --vars and --funcs) Set filter. FILTER is a combination of glob + pattern, see FILTER PATTERN for detail. + Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*" + for --funcs. + If several filters are specified, only the last filter is used. -f:: --force:: diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index abb423e..fcde003 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -45,6 +45,7 @@ #include "util/probe-event.h" #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" +#define DEFAULT_FUNC_FILTER "!_*" #define MAX_PATH_LEN 256 /* Session management structure */ @@ -159,6 +160,7 @@ static int opt_show_vars(const struct option *opt __used, return ret; } +#endif static int opt_set_filter(const struct option *opt __used, const char *str, int unset __used) @@ -180,7 +182,6 @@ static int opt_set_filter(const struct option *opt __used, return 0; } -#endif static const char * const probe_usage[] = { "perf probe [] 'PROBEDEF' ['PROBEDEF' ...]", @@ -236,10 +237,6 @@ static const struct option options[] = { "Show accessible variables on PROBEDEF", opt_show_vars), OPT_BOOLEAN('\0', "externs", ¶ms.show_ext_vars, "Show external variables too (with --vars only)"), - OPT_CALLBACK('\0', "filter", NULL, - "[!]FILTER", "Set a variable filter (with --vars only)\n" - "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\")", - opt_set_filter), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING('s', "source", &symbol_conf.source_prefix, @@ -252,6 +249,11 @@ static const struct option options[] = { "Set how many probe points can be found for a probe."), OPT_BOOLEAN('F', "funcs", ¶ms.show_funcs, "Show potential probe-able functions."), + OPT_CALLBACK('\0', "filter", NULL, + "[!]FILTER", "Set a filter (with --vars/funcs only)\n" + "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n" + "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)", + opt_set_filter), OPT_END() }; @@ -322,7 +324,12 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) pr_err(" Error: Don't use --funcs with --vars.\n"); usage_with_options(probe_usage, options); } - ret = show_available_funcs(params.target_module); + if (!params.filter) + params.filter = strfilter__new(DEFAULT_FUNC_FILTER, + NULL); + ret = show_available_funcs(params.target_module, + params.filter); + strfilter__delete(params.filter); if (ret < 0) pr_err(" Error: Failed to show functions." " (%d)\n", ret); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 077e051..9d237e3 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1951,21 +1951,23 @@ int del_perf_probe_events(struct strlist *dellist) return ret; } +/* TODO: don't use a global variable for filter ... */ +static struct strfilter *available_func_filter; /* - * If a symbol corresponds to a function with global binding return 0. - * For all others return 1. + * If a symbol corresponds to a function with global binding and + * matches filter return 0. For all others return 1. */ -static int filter_non_global_functions(struct map *map __unused, - struct symbol *sym) +static int filter_available_functions(struct map *map __unused, + struct symbol *sym) { - if (sym->binding != STB_GLOBAL) - return 1; - - return 0; + if (sym->binding == STB_GLOBAL && + strfilter__compare(available_func_filter, sym->name)) + return 0; + return 1; } -int show_available_funcs(const char *module) +int show_available_funcs(const char *module, struct strfilter *_filter) { struct map *map; int ret; @@ -1981,7 +1983,8 @@ int show_available_funcs(const char *module) pr_err("Failed to find %s map.\n", (module) ? : "kernel"); return -EINVAL; } - if (map__load(map, filter_non_global_functions)) { + available_func_filter = _filter; + if (map__load(map, filter_available_functions)) { pr_err("Failed to load map.\n"); return -EINVAL; } diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 4e80b2b..3434fc9 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -128,7 +128,7 @@ extern int show_line_range(struct line_range *lr, const char *module); extern int show_available_vars(struct perf_probe_event *pevs, int npevs, int max_probe_points, const char *module, struct strfilter *filter, bool externs); -extern int show_available_funcs(const char *module); +extern int show_available_funcs(const char *module, struct strfilter *filter); /* Maximum index number of event-name postfix */ -- cgit v0.10.2 From 54489c189b1a0c10eaf21c6d2c5916b50442c871 Mon Sep 17 00:00:00 2001 From: Han Pingtian Date: Tue, 25 Jan 2011 07:39:00 +0800 Subject: perf test: Fix return values checking Fixing some cut'n'paste mistakes. LKML-Reference: <20110124233900.GA3443@epc900.nay.redhat.com> Signed-off-by: Han Pingtian [ committer note: I had already removed the CPU_ALLOC calls ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 231e3e2..738830d 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -495,8 +495,8 @@ static int test__basic_mmap(void) } cpus = cpu_map__new(NULL); - if (threads == NULL) { - pr_debug("thread_map__new\n"); + if (cpus == NULL) { + pr_debug("cpu_map__new\n"); goto out_free_threads; } @@ -510,7 +510,7 @@ static int test__basic_mmap(void) } evlist = perf_evlist__new(); - if (threads == NULL) { + if (evlist == NULL) { pr_debug("perf_evlist__new\n"); goto out_free_cpus; } -- cgit v0.10.2 From dc82009aac6ee6e423b48de43a251745c62ab012 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 28 Jan 2011 14:49:19 -0200 Subject: perf record: No need to check for overwrites As we open the mmap with (PROT_READ | PROT_WRITE), signalling the kernel with perf_mmap__write_tail() when consuming data, so the kernel will not overwrite. Suggested-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d788630..caf9279 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -115,27 +115,11 @@ static void mmap_read(struct perf_mmap *md) unsigned char *data = md->base + page_size; unsigned long size; void *buf; - int diff; - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the head, we got messed up. - * - * In either case, truncate and restart at head. - */ - diff = head - old; - if (diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data\n"); - /* - * head points to a known good entry, start there. - */ - old = head; - } + if (old == head) + return; - if (old != head) - samples++; + samples++; size = head - old; -- cgit v0.10.2 From ef2bf6d043ac9bd4a6f38d862af407154a4754d9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 29 Jan 2011 09:04:40 -0200 Subject: perf events: Account PERF_RECORD_LOST events in event__process Right now this function is only used by perf top, that uses PROT_READ only, i.e. overwrite mode, so no PERF_RECORD_LOST events are generated, but don't forget those events. The patch that moved this out of perf top was made so that this routine could be used by 'perf probe' in the uprobes patchset, so perhaps there they need to check for LOST events and warn the user, as will be done in the following patches that will switch 'perf top' to non overwrite mode (mmap with PROT_READ|PROT_WRITE). Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e4db8b8..b9d4ac8 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -646,6 +646,8 @@ int event__process(event_t *event, struct sample_data *sample, case PERF_RECORD_EXIT: event__process_task(event, sample, session); break; + case PERF_RECORD_LOST: + event__process_lost(event, sample, session); default: break; } -- cgit v0.10.2 From 7bb41152b9be7e31f10d8919bce5034135525d9d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 29 Jan 2011 09:08:13 -0200 Subject: perf evlist: Support non overwrite mode in perf_evlist__read_on_cpu I.e. stash the overwrite mode in struct perf_evlist and act accordingly in perf_evlist__read_on_cpu, not checking for overwrites and touching the tail after consuming one event, like perf record does, for instance. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index df0610e..b498eec 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -116,24 +116,25 @@ event_t *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) unsigned int old = md->prev; unsigned char *data = md->base + page_size; event_t *event = NULL; - int diff; - - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the head, we got messed up. - * - * In either case, truncate and restart at head. - */ - diff = head - old; - if (diff > md->mask / 2 || diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); + if (evlist->overwrite) { /* - * head points to a known good entry, start there. + * If we're further behind than half the buffer, there's a chance + * the writer will bite our tail and mess up the samples under us. + * + * If we somehow ended up ahead of the head, we got messed up. + * + * In either case, truncate and restart at head. */ - old = head; + int diff = head - old; + if (diff > md->mask / 2 || diff < 0) { + fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); + + /* + * head points to a known good entry, start there. + */ + old = head; + } } if (old != head) { @@ -166,5 +167,9 @@ event_t *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) } md->prev = old; + + if (!evlist->overwrite) + perf_mmap__write_tail(md, old); + return event; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index acbe48e..2706ae4 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -16,6 +16,7 @@ struct perf_evlist { int nr_entries; int nr_fds; int mmap_len; + bool overwrite; event_t event_copy; struct perf_mmap *mmap; struct pollfd *pollfd; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 76ab553..f98b3e5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -327,6 +327,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, perf_evlist__alloc_pollfd(evlist, cpus->nr, threads->nr) < 0) return -ENOMEM; + evlist->overwrite = overwrite; evlist->mmap_len = (pages + 1) * page_size; first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node); -- cgit v0.10.2 From 93fc64f14472ae24fd640bf3834a178f59142842 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 29 Jan 2011 12:08:00 -0200 Subject: perf top: Switch to non overwrite mode Just like 'perf record'. Warn the user when PERF_RECORD_LOST events happen. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ce2e50c..7f92ab7 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -464,7 +464,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) rb_insert_color(&se->rb_node, tree); } -static void print_sym_table(void) +static void print_sym_table(struct perf_session *session) { int printed = 0, j; struct perf_evsel *counter; @@ -513,7 +513,6 @@ static void print_sym_table(void) puts(CONSOLE_CLEAR); - printf("%-*.*s\n", win_width, win_width, graph_dotted_line); if (!perf_guest) { printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%" " exact: %4.1f%% [", @@ -578,6 +577,12 @@ static void print_sym_table(void) printf("%-*.*s\n", win_width, win_width, graph_dotted_line); + if (session->hists.stats.total_lost != 0) { + color_fprintf(stdout, PERF_COLOR_RED, "WARNING:"); + printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n", + session->hists.stats.total_lost); + } + if (sym_filter_entry) { show_details(sym_filter_entry); return; @@ -919,7 +924,7 @@ repeat: getc(stdin); do { - print_sym_table(); + print_sym_table(session); } while (!poll(&stdin_poll, 1, delay_msecs) == 1); c = getc(stdin); @@ -1176,7 +1181,7 @@ try_again: } } - if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, true) < 0) + if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, false) < 0) die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } -- cgit v0.10.2 From 8d50e5b4171a69cf48ca94a1e7c14033d0b4771d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 29 Jan 2011 13:02:00 -0200 Subject: perf tools: Rename 'struct sample_data' to 'struct perf_sample' Making the namespace more uniform. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 8879463..ef36751 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -58,7 +58,7 @@ static int hists__add_entry(struct hists *self, struct addr_location *al) return hist_entry__inc_addr_samples(he, al->addr); } -static int process_sample_event(event_t *event, struct sample_data *sample, +static int process_sample_event(event_t *event, struct perf_sample *sample, struct perf_session *session) { struct addr_location al; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 3153e49..0822149 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -31,7 +31,7 @@ static int hists__add_entry(struct hists *self, } static int diff__process_sample_event(event_t *event, - struct sample_data *sample, + struct perf_sample *sample, struct perf_session *session) { struct addr_location al; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 0c78ffa..4c9388c 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -36,13 +36,13 @@ static int event__repipe_synth(event_t *event, return 0; } -static int event__repipe(event_t *event, struct sample_data *sample __used, +static int event__repipe(event_t *event, struct perf_sample *sample __used, struct perf_session *session) { return event__repipe_synth(event, session); } -static int event__repipe_mmap(event_t *self, struct sample_data *sample, +static int event__repipe_mmap(event_t *self, struct perf_sample *sample, struct perf_session *session) { int err; @@ -53,7 +53,7 @@ static int event__repipe_mmap(event_t *self, struct sample_data *sample, return err; } -static int event__repipe_task(event_t *self, struct sample_data *sample, +static int event__repipe_task(event_t *self, struct perf_sample *sample, struct perf_session *session) { int err; @@ -119,7 +119,7 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session) return 0; } -static int event__inject_buildid(event_t *event, struct sample_data *sample, +static int event__inject_buildid(event_t *event, struct perf_sample *sample, struct perf_session *session) { struct addr_location al; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index d97256d..3c1cdcf 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -304,7 +304,7 @@ process_raw_event(event_t *raw_event __used, void *data, } } -static int process_sample_event(event_t *event, struct sample_data *sample, +static int process_sample_event(event_t *event, struct perf_sample *sample, struct perf_session *session) { struct thread *thread = perf_session__findnew(session, event->ip.pid); diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 2b36def..c3f5127 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -834,7 +834,7 @@ static void dump_info(void) die("Unknown type of information\n"); } -static int process_sample_event(event_t *self, struct sample_data *sample, +static int process_sample_event(event_t *self, struct perf_sample *sample, struct perf_session *s) { struct thread *thread = perf_session__findnew(s, sample->tid); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index caf9279..5d3e4b3 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -101,7 +101,7 @@ static void write_output(void *buf, size_t size) } static int process_synthesized_event(event_t *event, - struct sample_data *sample __used, + struct perf_sample *sample __used, struct perf_session *self __used) { write_output(event, event->header.size); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f6a4349..bbbadcc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -77,9 +77,9 @@ static struct hists *perf_session__hists_findnew(struct perf_session *self, return new; } -static int perf_session__add_hist_entry(struct perf_session *self, +static int perf_session__add_hist_entry(struct perf_session *session, struct addr_location *al, - struct sample_data *data) + struct perf_sample *sample) { struct symbol *parent = NULL; int err = 0; @@ -87,28 +87,28 @@ static int perf_session__add_hist_entry(struct perf_session *self, struct hists *hists; struct perf_event_attr *attr; - if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) { - err = perf_session__resolve_callchain(self, al->thread, - data->callchain, &parent); + if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { + err = perf_session__resolve_callchain(session, al->thread, + sample->callchain, &parent); if (err) return err; } - attr = perf_header__find_attr(data->id, &self->header); + attr = perf_header__find_attr(sample->id, &session->header); if (attr) - hists = perf_session__hists_findnew(self, data->id, attr->type, attr->config); + hists = perf_session__hists_findnew(session, sample->id, attr->type, attr->config); else - hists = perf_session__hists_findnew(self, data->id, 0, 0); + hists = perf_session__hists_findnew(session, sample->id, 0, 0); if (hists == NULL) return -ENOMEM; - he = __hists__add_entry(hists, al, parent, data->period); + he = __hists__add_entry(hists, al, parent, sample->period); if (he == NULL) return -ENOMEM; if (symbol_conf.use_callchain) { - err = callchain_append(he->callchain, &self->callchain_cursor, - data->period); + err = callchain_append(he->callchain, &session->callchain_cursor, + sample->period); if (err) return err; } @@ -124,32 +124,32 @@ static int perf_session__add_hist_entry(struct perf_session *self, } static int add_event_total(struct perf_session *session, - struct sample_data *data, + struct perf_sample *sample, struct perf_event_attr *attr) { struct hists *hists; if (attr) - hists = perf_session__hists_findnew(session, data->id, + hists = perf_session__hists_findnew(session, sample->id, attr->type, attr->config); else - hists = perf_session__hists_findnew(session, data->id, 0, 0); + hists = perf_session__hists_findnew(session, sample->id, 0, 0); if (!hists) return -ENOMEM; - hists->stats.total_period += data->period; + hists->stats.total_period += sample->period; /* * FIXME: add_event_total should be moved from here to * perf_session__process_event so that the proper hist is passed to * the event_op methods. */ hists__inc_nr_events(hists, PERF_RECORD_SAMPLE); - session->hists.stats.total_period += data->period; + session->hists.stats.total_period += sample->period; return 0; } -static int process_sample_event(event_t *event, struct sample_data *sample, +static int process_sample_event(event_t *event, struct perf_sample *sample, struct perf_session *session) { struct addr_location al; @@ -179,7 +179,7 @@ static int process_sample_event(event_t *event, struct sample_data *sample, return 0; } -static int process_read_event(event_t *event, struct sample_data *sample __used, +static int process_read_event(event_t *event, struct perf_sample *sample __used, struct perf_session *session __used) { struct perf_event_attr *attr; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 29acb89..ff993c8 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1607,7 +1607,7 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session, process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); } -static int process_sample_event(event_t *event, struct sample_data *sample, +static int process_sample_event(event_t *event, struct perf_sample *sample, struct perf_session *session) { struct thread *thread; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b766c2a..5c4c809 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -63,7 +63,7 @@ static int cleanup_scripting(void) static char const *input_name = "perf.data"; -static int process_sample_event(event_t *event, struct sample_data *sample, +static int process_sample_event(event_t *event, struct perf_sample *sample, struct perf_session *session) { struct thread *thread = perf_session__findnew(session, event->ip.pid); diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 738830d..df62433 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -550,7 +550,7 @@ static int test__basic_mmap(void) } while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) { - struct sample_data sample; + struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) { pr_debug("unexpected %s event\n", diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 746cf03..01cf0c3 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -276,21 +276,21 @@ static int cpus_cstate_state[MAX_CPUS]; static u64 cpus_pstate_start_times[MAX_CPUS]; static u64 cpus_pstate_state[MAX_CPUS]; -static int process_comm_event(event_t *event, struct sample_data *sample __used, +static int process_comm_event(event_t *event, struct perf_sample *sample __used, struct perf_session *session __used) { pid_set_comm(event->comm.tid, event->comm.comm); return 0; } -static int process_fork_event(event_t *event, struct sample_data *sample __used, +static int process_fork_event(event_t *event, struct perf_sample *sample __used, struct perf_session *session __used) { pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); return 0; } -static int process_exit_event(event_t *event, struct sample_data *sample __used, +static int process_exit_event(event_t *event, struct perf_sample *sample __used, struct perf_session *session __used) { pid_exit(event->fork.pid, event->fork.time); @@ -487,7 +487,7 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) static int process_sample_event(event_t *event __used, - struct sample_data *sample, + struct perf_sample *sample, struct perf_session *session) { struct trace_entry *te; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7f92ab7..d923127 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -997,7 +997,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) } static void event__process_sample(const event_t *self, - struct sample_data *sample, + struct perf_sample *sample, struct perf_session *session) { u64 ip = self->ip.ip; @@ -1107,7 +1107,7 @@ static void event__process_sample(const event_t *self, static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu) { - struct sample_data sample; + struct perf_sample sample; event_t *event; while ((event = perf_evlist__read_on_cpu(evsel_list, cpu)) != NULL) { diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index deffb8c..b184a7f 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -15,7 +15,7 @@ #include "debug.h" static int build_id__mark_dso_hit(event_t *event, - struct sample_data *sample __used, + struct perf_sample *sample __used, struct perf_session *session) { struct addr_location al; @@ -37,7 +37,7 @@ static int build_id__mark_dso_hit(event_t *event, return 0; } -static int event__exit_del_thread(event_t *self, struct sample_data *sample __used, +static int event__exit_del_thread(event_t *self, struct perf_sample *sample __used, struct perf_session *session) { struct thread *thread = perf_session__findnew(session, self->fork.tid); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index b9d4ac8..5c886fb 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -34,7 +34,7 @@ const char *event__get_event_name(unsigned int id) return event__name[id]; } -static struct sample_data synth_sample = { +static struct perf_sample synth_sample = { .pid = -1, .tid = -1, .time = -1, @@ -440,7 +440,7 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm, return 0; } -int event__process_comm(event_t *self, struct sample_data *sample __used, +int event__process_comm(event_t *self, struct perf_sample *sample __used, struct perf_session *session) { struct thread *thread = perf_session__findnew(session, self->comm.tid); @@ -456,7 +456,7 @@ int event__process_comm(event_t *self, struct sample_data *sample __used, return 0; } -int event__process_lost(event_t *self, struct sample_data *sample __used, +int event__process_lost(event_t *self, struct perf_sample *sample __used, struct perf_session *session) { dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", @@ -567,7 +567,7 @@ out_problem: return -1; } -int event__process_mmap(event_t *self, struct sample_data *sample __used, +int event__process_mmap(event_t *self, struct perf_sample *sample __used, struct perf_session *session) { struct machine *machine; @@ -609,7 +609,7 @@ out_problem: return 0; } -int event__process_task(event_t *self, struct sample_data *sample __used, +int event__process_task(event_t *self, struct perf_sample *sample __used, struct perf_session *session) { struct thread *thread = perf_session__findnew(session, self->fork.tid); @@ -632,7 +632,7 @@ int event__process_task(event_t *self, struct sample_data *sample __used, return 0; } -int event__process(event_t *event, struct sample_data *sample, +int event__process(event_t *event, struct perf_sample *sample, struct perf_session *session) { switch (event->header.type) { @@ -757,7 +757,7 @@ static void dso__calc_col_width(struct dso *self, struct hists *hists) } int event__preprocess_sample(const event_t *self, struct perf_session *session, - struct addr_location *al, struct sample_data *data, + struct addr_location *al, struct perf_sample *sample, symbol_filter_t filter) { u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; @@ -788,7 +788,7 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session, al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : ""); al->sym = NULL; - al->cpu = data->cpu; + al->cpu = sample->cpu; if (al->map) { if (symbol_conf.dso_list && diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d79e4ed..84fd71f 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -61,7 +61,7 @@ struct sample_event { u64 array[]; }; -struct sample_data { +struct perf_sample { u64 ip; u32 pid, tid; u64 time; @@ -138,7 +138,7 @@ struct perf_session; typedef int (*event__handler_synth_t)(event_t *event, struct perf_session *session); -typedef int (*event__handler_t)(event_t *event, struct sample_data *sample, +typedef int (*event__handler_t)(event_t *event, struct perf_sample *sample, struct perf_session *session); int event__synthesize_thread(pid_t pid, event__handler_t process, @@ -154,25 +154,25 @@ int event__synthesize_modules(event__handler_t process, struct perf_session *session, struct machine *machine); -int event__process_comm(event_t *self, struct sample_data *sample, +int event__process_comm(event_t *event, struct perf_sample *sample, struct perf_session *session); -int event__process_lost(event_t *self, struct sample_data *sample, +int event__process_lost(event_t *event, struct perf_sample *sample, struct perf_session *session); -int event__process_mmap(event_t *self, struct sample_data *sample, +int event__process_mmap(event_t *event, struct perf_sample *sample, struct perf_session *session); -int event__process_task(event_t *self, struct sample_data *sample, +int event__process_task(event_t *event, struct perf_sample *sample, struct perf_session *session); -int event__process(event_t *event, struct sample_data *sample, +int event__process(event_t *event, struct perf_sample *sample, struct perf_session *session); struct addr_location; int event__preprocess_sample(const event_t *self, struct perf_session *session, - struct addr_location *al, struct sample_data *data, + struct addr_location *al, struct perf_sample *sample, symbol_filter_t filter); const char *event__get_event_name(unsigned int id); int event__parse_sample(const event_t *event, u64 type, bool sample_id_all, - struct sample_data *sample); + struct perf_sample *sample); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f98b3e5..a134885 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -368,7 +368,7 @@ out_unmap: } static int event__parse_id_sample(const event_t *event, u64 type, - struct sample_data *sample) + struct perf_sample *sample) { const u64 *array = event->sample.array; @@ -406,7 +406,7 @@ static int event__parse_id_sample(const event_t *event, u64 type, } int event__parse_sample(const event_t *event, u64 type, bool sample_id_all, - struct sample_data *data) + struct perf_sample *data) { const u64 *array; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e6a0740..ee0b611 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -67,7 +67,7 @@ out_close: static void perf_session__id_header_size(struct perf_session *session) { - struct sample_data *data; + struct perf_sample *data; u64 sample_type = session->sample_type; u16 size = 0; @@ -299,7 +299,7 @@ static int process_event_synth_stub(event_t *event __used, } static int process_event_stub(event_t *event __used, - struct sample_data *sample __used, + struct perf_sample *sample __used, struct perf_session *session __used) { dump_printf(": unhandled!\n"); @@ -475,7 +475,7 @@ static void perf_session_free_sample_buffers(struct perf_session *session) static int perf_session_deliver_event(struct perf_session *session, event_t *event, - struct sample_data *sample, + struct perf_sample *sample, struct perf_event_ops *ops, u64 file_offset); @@ -485,7 +485,7 @@ static void flush_sample_queue(struct perf_session *s, struct ordered_samples *os = &s->ordered_samples; struct list_head *head = &os->samples; struct sample_queue *tmp, *iter; - struct sample_data sample; + struct perf_sample sample; u64 limit = os->next_flush; u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; @@ -610,11 +610,11 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s) #define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue)) static int perf_session_queue_event(struct perf_session *s, event_t *event, - struct sample_data *data, u64 file_offset) + struct perf_sample *sample, u64 file_offset) { struct ordered_samples *os = &s->ordered_samples; struct list_head *sc = &os->sample_cache; - u64 timestamp = data->time; + u64 timestamp = sample->time; struct sample_queue *new; if (!timestamp || timestamp == ~0ULL) @@ -650,7 +650,7 @@ static int perf_session_queue_event(struct perf_session *s, event_t *event, return 0; } -static void callchain__printf(struct sample_data *sample) +static void callchain__printf(struct perf_sample *sample) { unsigned int i; @@ -663,7 +663,7 @@ static void callchain__printf(struct sample_data *sample) static void perf_session__print_tstamp(struct perf_session *session, event_t *event, - struct sample_data *sample) + struct perf_sample *sample) { if (event->header.type != PERF_RECORD_SAMPLE && !session->sample_id_all) { @@ -679,7 +679,7 @@ static void perf_session__print_tstamp(struct perf_session *session, } static void dump_event(struct perf_session *session, event_t *event, - u64 file_offset, struct sample_data *sample) + u64 file_offset, struct perf_sample *sample) { if (!dump_trace) return; @@ -697,7 +697,7 @@ static void dump_event(struct perf_session *session, event_t *event, } static void dump_sample(struct perf_session *session, event_t *event, - struct sample_data *sample) + struct perf_sample *sample) { if (!dump_trace) return; @@ -712,7 +712,7 @@ static void dump_sample(struct perf_session *session, event_t *event, static int perf_session_deliver_event(struct perf_session *session, event_t *event, - struct sample_data *sample, + struct perf_sample *sample, struct perf_event_ops *ops, u64 file_offset) { @@ -745,7 +745,7 @@ static int perf_session_deliver_event(struct perf_session *session, } static int perf_session__preprocess_sample(struct perf_session *session, - event_t *event, struct sample_data *sample) + event_t *event, struct perf_sample *sample) { if (event->header.type != PERF_RECORD_SAMPLE || !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) @@ -789,7 +789,7 @@ static int perf_session__process_event(struct perf_session *session, struct perf_event_ops *ops, u64 file_offset) { - struct sample_data sample; + struct perf_sample sample; int ret; if (session->header.needs_swap && event__swap_ops[event->header.type]) diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 7823976..365bf53 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -57,7 +57,7 @@ struct perf_session { struct perf_event_ops; -typedef int (*event_op)(event_t *self, struct sample_data *sample, +typedef int (*event_op)(event_t *self, struct perf_sample *sample, struct perf_session *session); typedef int (*event_synth_op)(event_t *self, struct perf_session *session); typedef int (*event_op2)(event_t *self, struct perf_session *session, @@ -158,7 +158,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp) static inline int perf_session__parse_sample(struct perf_session *session, const event_t *event, - struct sample_data *sample) + struct perf_sample *sample) { return event__parse_sample(event, session->sample_type, session->sample_id_all, sample); -- cgit v0.10.2 From 8115d60c323dd9931b95221c0a392aeddc1d6ef3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 29 Jan 2011 14:01:45 -0200 Subject: perf tools: Kill event_t typedef, use 'union perf_event' instead And move the event_t methods to the perf_event__ too. No code changes, just namespace consistency. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index ef36751..7006786 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -58,12 +58,13 @@ static int hists__add_entry(struct hists *self, struct addr_location *al) return hist_entry__inc_addr_samples(he, al->addr); } -static int process_sample_event(event_t *event, struct perf_sample *sample, +static int process_sample_event(union perf_event *event, + struct perf_sample *sample, struct perf_session *session) { struct addr_location al; - if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { + if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -372,9 +373,9 @@ find_next: static struct perf_event_ops event_ops = { .sample = process_sample_event, - .mmap = event__process_mmap, - .comm = event__process_comm, - .fork = event__process_task, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .fork = perf_event__process_task, .ordered_samples = true, .ordering_requires_timestamps = true, }; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 0822149..6b7d911 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -30,13 +30,13 @@ static int hists__add_entry(struct hists *self, return -ENOMEM; } -static int diff__process_sample_event(event_t *event, +static int diff__process_sample_event(union perf_event *event, struct perf_sample *sample, struct perf_session *session) { struct addr_location al; - if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { + if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -56,11 +56,11 @@ static int diff__process_sample_event(event_t *event, static struct perf_event_ops event_ops = { .sample = diff__process_sample_event, - .mmap = event__process_mmap, - .comm = event__process_comm, - .exit = event__process_task, - .fork = event__process_task, - .lost = event__process_lost, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .lost = perf_event__process_lost, .ordered_samples = true, .ordering_requires_timestamps = true, }; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 4c9388c..e29f04e 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -16,8 +16,8 @@ static char const *input_name = "-"; static bool inject_build_ids; -static int event__repipe_synth(event_t *event, - struct perf_session *session __used) +static int perf_event__repipe_synth(union perf_event *event, + struct perf_session *session __used) { uint32_t size; void *buf = event; @@ -36,41 +36,44 @@ static int event__repipe_synth(event_t *event, return 0; } -static int event__repipe(event_t *event, struct perf_sample *sample __used, - struct perf_session *session) +static int perf_event__repipe(union perf_event *event, + struct perf_sample *sample __used, + struct perf_session *session) { - return event__repipe_synth(event, session); + return perf_event__repipe_synth(event, session); } -static int event__repipe_mmap(event_t *self, struct perf_sample *sample, - struct perf_session *session) +static int perf_event__repipe_mmap(union perf_event *event, + struct perf_sample *sample, + struct perf_session *session) { int err; - err = event__process_mmap(self, sample, session); - event__repipe(self, sample, session); + err = perf_event__process_mmap(event, sample, session); + perf_event__repipe(event, sample, session); return err; } -static int event__repipe_task(event_t *self, struct perf_sample *sample, - struct perf_session *session) +static int perf_event__repipe_task(union perf_event *event, + struct perf_sample *sample, + struct perf_session *session) { int err; - err = event__process_task(self, sample, session); - event__repipe(self, sample, session); + err = perf_event__process_task(event, sample, session); + perf_event__repipe(event, sample, session); return err; } -static int event__repipe_tracing_data(event_t *self, - struct perf_session *session) +static int perf_event__repipe_tracing_data(union perf_event *event, + struct perf_session *session) { int err; - event__repipe_synth(self, session); - err = event__process_tracing_data(self, session); + perf_event__repipe_synth(event, session); + err = perf_event__process_tracing_data(event, session); return err; } @@ -109,8 +112,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session) if (self->kernel) misc = PERF_RECORD_MISC_KERNEL; - err = event__synthesize_build_id(self, misc, event__repipe, - machine, session); + err = perf_event__synthesize_build_id(self, misc, perf_event__repipe, + machine, session); if (err) { pr_err("Can't synthesize build_id event for %s\n", self->long_name); return -1; @@ -119,8 +122,9 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session) return 0; } -static int event__inject_buildid(event_t *event, struct perf_sample *sample, - struct perf_session *session) +static int perf_event__inject_buildid(union perf_event *event, + struct perf_sample *sample, + struct perf_session *session) { struct addr_location al; struct thread *thread; @@ -155,24 +159,24 @@ static int event__inject_buildid(event_t *event, struct perf_sample *sample, } repipe: - event__repipe(event, sample, session); + perf_event__repipe(event, sample, session); return 0; } struct perf_event_ops inject_ops = { - .sample = event__repipe, - .mmap = event__repipe, - .comm = event__repipe, - .fork = event__repipe, - .exit = event__repipe, - .lost = event__repipe, - .read = event__repipe, - .throttle = event__repipe, - .unthrottle = event__repipe, - .attr = event__repipe_synth, - .event_type = event__repipe_synth, - .tracing_data = event__repipe_synth, - .build_id = event__repipe_synth, + .sample = perf_event__repipe, + .mmap = perf_event__repipe, + .comm = perf_event__repipe, + .fork = perf_event__repipe, + .exit = perf_event__repipe, + .lost = perf_event__repipe, + .read = perf_event__repipe, + .throttle = perf_event__repipe, + .unthrottle = perf_event__repipe, + .attr = perf_event__repipe_synth, + .event_type = perf_event__repipe_synth, + .tracing_data = perf_event__repipe_synth, + .build_id = perf_event__repipe_synth, }; extern volatile int session_done; @@ -190,10 +194,10 @@ static int __cmd_inject(void) signal(SIGINT, sig_handler); if (inject_build_ids) { - inject_ops.sample = event__inject_buildid; - inject_ops.mmap = event__repipe_mmap; - inject_ops.fork = event__repipe_task; - inject_ops.tracing_data = event__repipe_tracing_data; + inject_ops.sample = perf_event__inject_buildid; + inject_ops.mmap = perf_event__repipe_mmap; + inject_ops.fork = perf_event__repipe_task; + inject_ops.tracing_data = perf_event__repipe_tracing_data; } session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 3c1cdcf..7f618f4 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -275,9 +275,8 @@ static void process_free_event(void *data, s_alloc->alloc_cpu = -1; } -static void -process_raw_event(event_t *raw_event __used, void *data, - int cpu, u64 timestamp, struct thread *thread) +static void process_raw_event(union perf_event *raw_event __used, void *data, + int cpu, u64 timestamp, struct thread *thread) { struct event *event; int type; @@ -304,7 +303,8 @@ process_raw_event(event_t *raw_event __used, void *data, } } -static int process_sample_event(event_t *event, struct perf_sample *sample, +static int process_sample_event(union perf_event *event, + struct perf_sample *sample, struct perf_session *session) { struct thread *thread = perf_session__findnew(session, event->ip.pid); @@ -325,7 +325,7 @@ static int process_sample_event(event_t *event, struct perf_sample *sample, static struct perf_event_ops event_ops = { .sample = process_sample_event, - .comm = event__process_comm, + .comm = perf_event__process_comm, .ordered_samples = true, }; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index c3f5127..e00d938 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -834,14 +834,14 @@ static void dump_info(void) die("Unknown type of information\n"); } -static int process_sample_event(event_t *self, struct perf_sample *sample, +static int process_sample_event(union perf_event *event, struct perf_sample *sample, struct perf_session *s) { struct thread *thread = perf_session__findnew(s, sample->tid); if (thread == NULL) { pr_debug("problem processing %d event, skipping it.\n", - self->header.type); + event->header.type); return -1; } @@ -852,7 +852,7 @@ static int process_sample_event(event_t *self, struct perf_sample *sample, static struct perf_event_ops eops = { .sample = process_sample_event, - .comm = event__process_comm, + .comm = perf_event__process_comm, .ordered_samples = true, }; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 5d3e4b3..edc3555 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -100,7 +100,7 @@ static void write_output(void *buf, size_t size) } } -static int process_synthesized_event(event_t *event, +static int process_synthesized_event(union perf_event *event, struct perf_sample *sample __used, struct perf_session *self __used) { @@ -404,7 +404,7 @@ static void atexit_header(void) } } -static void event__synthesize_guest_os(struct machine *machine, void *data) +static void perf_event__synthesize_guest_os(struct machine *machine, void *data) { int err; struct perf_session *psession = data; @@ -420,8 +420,8 @@ static void event__synthesize_guest_os(struct machine *machine, void *data) *method is used to avoid symbol missing when the first addr is *in module instead of in guest kernel. */ - err = event__synthesize_modules(process_synthesized_event, - psession, machine); + err = perf_event__synthesize_modules(process_synthesized_event, + psession, machine); if (err < 0) pr_err("Couldn't record guest kernel [%d]'s reference" " relocation symbol.\n", machine->pid); @@ -430,11 +430,12 @@ static void event__synthesize_guest_os(struct machine *machine, void *data) * We use _stext for guest kernel because guest kernel's /proc/kallsyms * have no _text sometimes. */ - err = event__synthesize_kernel_mmap(process_synthesized_event, - psession, machine, "_text"); + err = perf_event__synthesize_kernel_mmap(process_synthesized_event, + psession, machine, "_text"); if (err < 0) - err = event__synthesize_kernel_mmap(process_synthesized_event, - psession, machine, "_stext"); + err = perf_event__synthesize_kernel_mmap(process_synthesized_event, + psession, machine, + "_stext"); if (err < 0) pr_err("Couldn't record guest kernel [%d]'s reference" " relocation symbol.\n", machine->pid); @@ -617,16 +618,16 @@ static int __cmd_record(int argc, const char **argv) perf_session__set_sample_id_all(session, sample_id_all_avail); if (pipe_output) { - err = event__synthesize_attrs(&session->header, - process_synthesized_event, - session); + err = perf_event__synthesize_attrs(&session->header, + process_synthesized_event, + session); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); return err; } - err = event__synthesize_event_types(process_synthesized_event, - session); + err = perf_event__synthesize_event_types(process_synthesized_event, + session); if (err < 0) { pr_err("Couldn't synthesize event_types.\n"); return err; @@ -641,9 +642,9 @@ static int __cmd_record(int argc, const char **argv) * return this more properly and also * propagate errors that now are calling die() */ - err = event__synthesize_tracing_data(output, evsel_list, - process_synthesized_event, - session); + err = perf_event__synthesize_tracing_data(output, evsel_list, + process_synthesized_event, + session); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); return err; @@ -658,31 +659,34 @@ static int __cmd_record(int argc, const char **argv) return -1; } - err = event__synthesize_kernel_mmap(process_synthesized_event, - session, machine, "_text"); + err = perf_event__synthesize_kernel_mmap(process_synthesized_event, + session, machine, "_text"); if (err < 0) - err = event__synthesize_kernel_mmap(process_synthesized_event, - session, machine, "_stext"); + err = perf_event__synthesize_kernel_mmap(process_synthesized_event, + session, machine, "_stext"); if (err < 0) pr_err("Couldn't record kernel reference relocation symbol\n" "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" "Check /proc/kallsyms permission or run as root.\n"); - err = event__synthesize_modules(process_synthesized_event, - session, machine); + err = perf_event__synthesize_modules(process_synthesized_event, + session, machine); if (err < 0) pr_err("Couldn't record kernel module information.\n" "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" "Check /proc/modules permission or run as root.\n"); if (perf_guest) - perf_session__process_machines(session, event__synthesize_guest_os); + perf_session__process_machines(session, + perf_event__synthesize_guest_os); if (!system_wide) - event__synthesize_thread(target_tid, process_synthesized_event, - session); + perf_event__synthesize_thread(target_tid, + process_synthesized_event, + session); else - event__synthesize_threads(process_synthesized_event, session); + perf_event__synthesize_threads(process_synthesized_event, + session); if (realtime_prio) { struct sched_param param; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bbbadcc..a6a4e54 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -149,13 +149,14 @@ static int add_event_total(struct perf_session *session, return 0; } -static int process_sample_event(event_t *event, struct perf_sample *sample, +static int process_sample_event(union perf_event *event, + struct perf_sample *sample, struct perf_session *session) { struct addr_location al; struct perf_event_attr *attr; - if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { + if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -179,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_sample *sample, return 0; } -static int process_read_event(event_t *event, struct perf_sample *sample __used, +static int process_read_event(union perf_event *event, + struct perf_sample *sample __used, struct perf_session *session __used) { struct perf_event_attr *attr; @@ -232,17 +234,17 @@ static int perf_session__setup_sample_type(struct perf_session *self) } static struct perf_event_ops event_ops = { - .sample = process_sample_event, - .mmap = event__process_mmap, - .comm = event__process_comm, - .exit = event__process_task, - .fork = event__process_task, - .lost = event__process_lost, - .read = process_read_event, - .attr = event__process_attr, - .event_type = event__process_event_type, - .tracing_data = event__process_tracing_data, - .build_id = event__process_build_id, + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .lost = perf_event__process_lost, + .read = process_read_event, + .attr = perf_event__process_attr, + .event_type = perf_event__process_event_type, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, .ordered_samples = true, .ordering_requires_timestamps = true, }; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index ff993c8..ae26211 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1580,9 +1580,9 @@ process_sched_migrate_task_event(void *data, struct perf_session *session, event, cpu, timestamp, thread); } -static void -process_raw_event(event_t *raw_event __used, struct perf_session *session, - void *data, int cpu, u64 timestamp, struct thread *thread) +static void process_raw_event(union perf_event *raw_event __used, + struct perf_session *session, void *data, int cpu, + u64 timestamp, struct thread *thread) { struct event *event; int type; @@ -1607,7 +1607,8 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session, process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); } -static int process_sample_event(event_t *event, struct perf_sample *sample, +static int process_sample_event(union perf_event *event, + struct perf_sample *sample, struct perf_session *session) { struct thread *thread; @@ -1635,9 +1636,9 @@ static int process_sample_event(event_t *event, struct perf_sample *sample, static struct perf_event_ops event_ops = { .sample = process_sample_event, - .comm = event__process_comm, - .lost = event__process_lost, - .fork = event__process_task, + .comm = perf_event__process_comm, + .lost = perf_event__process_lost, + .fork = perf_event__process_task, .ordered_samples = true, }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5c4c809..5f40df6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -63,7 +63,8 @@ static int cleanup_scripting(void) static char const *input_name = "perf.data"; -static int process_sample_event(event_t *event, struct perf_sample *sample, +static int process_sample_event(union perf_event *event, + struct perf_sample *sample, struct perf_session *session) { struct thread *thread = perf_session__findnew(session, event->ip.pid); @@ -100,14 +101,14 @@ static int process_sample_event(event_t *event, struct perf_sample *sample, } static struct perf_event_ops event_ops = { - .sample = process_sample_event, - .comm = event__process_comm, - .attr = event__process_attr, - .event_type = event__process_event_type, - .tracing_data = event__process_tracing_data, - .build_id = event__process_build_id, - .ordering_requires_timestamps = true, + .sample = process_sample_event, + .comm = perf_event__process_comm, + .attr = perf_event__process_attr, + .event_type = perf_event__process_event_type, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, .ordered_samples = true, + .ordering_requires_timestamps = true, }; extern volatile int session_done; diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index df62433..845b9bd 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -454,7 +454,7 @@ out_thread_map_delete: static int test__basic_mmap(void) { int err = -1; - event_t *event; + union perf_event *event; struct thread_map *threads; struct cpu_map *cpus; struct perf_evlist *evlist; @@ -554,11 +554,11 @@ static int test__basic_mmap(void) if (event->header.type != PERF_RECORD_SAMPLE) { pr_debug("unexpected %s event\n", - event__get_event_name(event->header.type)); + perf_event__name(event->header.type)); goto out_munmap; } - event__parse_sample(event, attr.sample_type, false, &sample); + perf_event__parse_sample(event, attr.sample_type, false, &sample); evsel = perf_evlist__id2evsel(evlist, sample.id); if (evsel == NULL) { pr_debug("event with id %" PRIu64 diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 01cf0c3..0801275 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -276,21 +276,24 @@ static int cpus_cstate_state[MAX_CPUS]; static u64 cpus_pstate_start_times[MAX_CPUS]; static u64 cpus_pstate_state[MAX_CPUS]; -static int process_comm_event(event_t *event, struct perf_sample *sample __used, +static int process_comm_event(union perf_event *event, + struct perf_sample *sample __used, struct perf_session *session __used) { pid_set_comm(event->comm.tid, event->comm.comm); return 0; } -static int process_fork_event(event_t *event, struct perf_sample *sample __used, +static int process_fork_event(union perf_event *event, + struct perf_sample *sample __used, struct perf_session *session __used) { pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); return 0; } -static int process_exit_event(event_t *event, struct perf_sample *sample __used, +static int process_exit_event(union perf_event *event, + struct perf_sample *sample __used, struct perf_session *session __used) { pid_exit(event->fork.pid, event->fork.time); @@ -486,7 +489,7 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te) } -static int process_sample_event(event_t *event __used, +static int process_sample_event(union perf_event *event __used, struct perf_sample *sample, struct perf_session *session) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d923127..2f4d1f2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -401,7 +401,7 @@ static void show_details(struct sym_entry *syme) } /* - * Symbols will be added here in event__process_sample and will get out + * Symbols will be added here in perf_event__process_sample and will get out * after decayed. */ static LIST_HEAD(active_symbols); @@ -996,15 +996,15 @@ static int symbol_filter(struct map *map, struct symbol *sym) return 0; } -static void event__process_sample(const event_t *self, - struct perf_sample *sample, - struct perf_session *session) +static void perf_event__process_sample(const union perf_event *event, + struct perf_sample *sample, + struct perf_session *session) { - u64 ip = self->ip.ip; + u64 ip = event->ip.ip; struct sym_entry *syme; struct addr_location al; struct machine *machine; - u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; ++samples; @@ -1023,7 +1023,7 @@ static void event__process_sample(const event_t *self, break; case PERF_RECORD_MISC_GUEST_KERNEL: ++guest_kernel_samples; - machine = perf_session__find_machine(session, self->ip.pid); + machine = perf_session__find_machine(session, event->ip.pid); break; case PERF_RECORD_MISC_GUEST_USER: ++guest_us_samples; @@ -1038,15 +1038,15 @@ static void event__process_sample(const event_t *self, if (!machine && perf_guest) { pr_err("Can't find guest [%d]'s kernel information\n", - self->ip.pid); + event->ip.pid); return; } - if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) + if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) exact_samples++; - if (event__preprocess_sample(self, session, &al, sample, - symbol_filter) < 0 || + if (perf_event__preprocess_sample(event, session, &al, sample, + symbol_filter) < 0 || al.filtered) return; @@ -1108,15 +1108,15 @@ static void event__process_sample(const event_t *self, static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu) { struct perf_sample sample; - event_t *event; + union perf_event *event; while ((event = perf_evlist__read_on_cpu(evsel_list, cpu)) != NULL) { perf_session__parse_sample(self, event, &sample); if (event->header.type == PERF_RECORD_SAMPLE) - event__process_sample(event, &sample, self); + perf_event__process_sample(event, &sample, self); else - event__process(event, &sample, self); + perf_event__process(event, &sample, self); } } @@ -1199,9 +1199,10 @@ static int __cmd_top(void) return -ENOMEM; if (target_tid != -1) - event__synthesize_thread(target_tid, event__process, session); + perf_event__synthesize_thread(target_tid, perf_event__process, + session); else - event__synthesize_threads(event__process, session); + perf_event__synthesize_threads(perf_event__process, session); start_counters(evsel_list); first = list_entry(evsel_list->entries.next, struct perf_evsel, node); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index b184a7f..31f934a 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -14,7 +14,7 @@ #include #include "debug.h" -static int build_id__mark_dso_hit(event_t *event, +static int build_id__mark_dso_hit(union perf_event *event, struct perf_sample *sample __used, struct perf_session *session) { @@ -37,13 +37,14 @@ static int build_id__mark_dso_hit(event_t *event, return 0; } -static int event__exit_del_thread(event_t *self, struct perf_sample *sample __used, - struct perf_session *session) +static int perf_event__exit_del_thread(union perf_event *event, + struct perf_sample *sample __used, + struct perf_session *session) { - struct thread *thread = perf_session__findnew(session, self->fork.tid); + struct thread *thread = perf_session__findnew(session, event->fork.tid); - dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid, - self->fork.ppid, self->fork.ptid); + dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, + event->fork.ppid, event->fork.ptid); if (thread) { rb_erase(&thread->rb_node, &session->threads); @@ -56,9 +57,9 @@ static int event__exit_del_thread(event_t *self, struct perf_sample *sample __us struct perf_event_ops build_id__mark_dso_hit_ops = { .sample = build_id__mark_dso_hit, - .mmap = event__process_mmap, - .fork = event__process_task, - .exit = event__exit_del_thread, + .mmap = perf_event__process_mmap, + .fork = perf_event__process_task, + .exit = perf_event__exit_del_thread, }; char *dso__build_id_filename(struct dso *self, char *bf, size_t size) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index f8c66d1..9f7106a 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -18,7 +18,8 @@ #include "util.h" #include "callchain.h" -bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event) +bool ip_callchain__valid(struct ip_callchain *chain, + const union perf_event *event) { unsigned int chain_size = event->header.size; chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 6713725..1a79df9 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -95,8 +95,8 @@ int callchain_append(struct callchain_root *root, int callchain_merge(struct callchain_cursor *cursor, struct callchain_root *dst, struct callchain_root *src); -bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); - +bool ip_callchain__valid(struct ip_callchain *chain, + const union perf_event *event); /* * Initialize a cursor before adding entries inside, but keep * the previously allocated entries as a cache. diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 01bbe8e..d4536a9 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -57,7 +57,7 @@ void ui__warning(const char *format, ...) } #endif -void trace_event(event_t *event) +void trace_event(union perf_event *event) { unsigned char *raw_event = (void *)event; const char *color = PERF_COLOR_BLUE; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index ca35fd6..93516cf4 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -9,7 +9,7 @@ extern int verbose; extern bool quiet, dump_trace; int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); -void trace_event(event_t *event); +void trace_event(union perf_event *event); struct ui_progress; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 5c886fb..731265f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -7,7 +7,7 @@ #include "strlist.h" #include "thread.h" -static const char *event__name[] = { +static const char *perf_event__names[] = { [0] = "TOTAL", [PERF_RECORD_MMAP] = "MMAP", [PERF_RECORD_LOST] = "LOST", @@ -25,13 +25,13 @@ static const char *event__name[] = { [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", }; -const char *event__get_event_name(unsigned int id) +const char *perf_event__name(unsigned int id) { - if (id >= ARRAY_SIZE(event__name)) + if (id >= ARRAY_SIZE(perf_event__names)) return "INVALID"; - if (!event__name[id]) + if (!perf_event__names[id]) return "UNKNOWN"; - return event__name[id]; + return perf_event__names[id]; } static struct perf_sample synth_sample = { @@ -43,9 +43,9 @@ static struct perf_sample synth_sample = { .period = 1, }; -static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full, - event__handler_t process, - struct perf_session *session) +static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid, + int full, perf_event__handler_t process, + struct perf_session *session) { char filename[PATH_MAX]; char bf[BUFSIZ]; @@ -126,9 +126,10 @@ out: return tgid; } -static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid, - event__handler_t process, - struct perf_session *session) +static int perf_event__synthesize_mmap_events(union perf_event *event, + pid_t pid, pid_t tgid, + perf_event__handler_t process, + struct perf_session *session) { char filename[PATH_MAX]; FILE *fp; @@ -199,14 +200,14 @@ static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid, return 0; } -int event__synthesize_modules(event__handler_t process, - struct perf_session *session, - struct machine *machine) +int perf_event__synthesize_modules(perf_event__handler_t process, + struct perf_session *session, + struct machine *machine) { struct rb_node *nd; struct map_groups *kmaps = &machine->kmaps; - event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); - + union perf_event *event = zalloc((sizeof(event->mmap) + + session->id_hdr_size)); if (event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); @@ -251,22 +252,23 @@ int event__synthesize_modules(event__handler_t process, return 0; } -static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event, - pid_t pid, event__handler_t process, +static int __event__synthesize_thread(union perf_event *comm_event, + union perf_event *mmap_event, + pid_t pid, perf_event__handler_t process, struct perf_session *session) { - pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process, + pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process, session); if (tgid == -1) return -1; - return event__synthesize_mmap_events(mmap_event, pid, tgid, + return perf_event__synthesize_mmap_events(mmap_event, pid, tgid, process, session); } -int event__synthesize_thread(pid_t pid, event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_thread(pid_t pid, perf_event__handler_t process, + struct perf_session *session) { - event_t *comm_event, *mmap_event; + union perf_event *comm_event, *mmap_event; int err = -1; comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); @@ -286,12 +288,12 @@ out: return err; } -int event__synthesize_threads(event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_threads(perf_event__handler_t process, + struct perf_session *session) { DIR *proc; struct dirent dirent, *next; - event_t *comm_event, *mmap_event; + union perf_event *comm_event, *mmap_event; int err = -1; comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); @@ -349,10 +351,10 @@ static int find_symbol_cb(void *arg, const char *name, char type, return 1; } -int event__synthesize_kernel_mmap(event__handler_t process, - struct perf_session *session, - struct machine *machine, - const char *symbol_name) +int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, + struct perf_session *session, + struct machine *machine, + const char *symbol_name) { size_t size; const char *filename, *mmap_name; @@ -366,8 +368,8 @@ int event__synthesize_kernel_mmap(event__handler_t process, * kernels. */ struct process_symbol_args args = { .name = symbol_name, }; - event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); - + union perf_event *event = zalloc((sizeof(event->mmap) + + session->id_hdr_size)); if (event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); @@ -440,14 +442,15 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm, return 0; } -int event__process_comm(event_t *self, struct perf_sample *sample __used, - struct perf_session *session) +int perf_event__process_comm(union perf_event *event, + struct perf_sample *sample __used, + struct perf_session *session) { - struct thread *thread = perf_session__findnew(session, self->comm.tid); + struct thread *thread = perf_session__findnew(session, event->comm.tid); - dump_printf(": %s:%d\n", self->comm.comm, self->comm.tid); + dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid); - if (thread == NULL || thread__set_comm_adjust(thread, self->comm.comm, + if (thread == NULL || thread__set_comm_adjust(thread, event->comm.comm, &session->hists)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; @@ -456,19 +459,21 @@ int event__process_comm(event_t *self, struct perf_sample *sample __used, return 0; } -int event__process_lost(event_t *self, struct perf_sample *sample __used, - struct perf_session *session) +int perf_event__process_lost(union perf_event *event, + struct perf_sample *sample __used, + struct perf_session *session) { dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", - self->lost.id, self->lost.lost); - session->hists.stats.total_lost += self->lost.lost; + event->lost.id, event->lost.lost); + session->hists.stats.total_lost += event->lost.lost; return 0; } -static void event_set_kernel_mmap_len(struct map **maps, event_t *self) +static void perf_event__set_kernel_mmap_len(union perf_event *event, + struct map **maps) { - maps[MAP__FUNCTION]->start = self->mmap.start; - maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len; + maps[MAP__FUNCTION]->start = event->mmap.start; + maps[MAP__FUNCTION]->end = event->mmap.start + event->mmap.len; /* * Be a bit paranoid here, some perf.data file came with * a zero sized synthesized MMAP event for the kernel. @@ -477,8 +482,8 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self) maps[MAP__FUNCTION]->end = ~0ULL; } -static int event__process_kernel_mmap(event_t *self, - struct perf_session *session) +static int perf_event__process_kernel_mmap(union perf_event *event, + struct perf_session *session) { struct map *map; char kmmap_prefix[PATH_MAX]; @@ -486,9 +491,9 @@ static int event__process_kernel_mmap(event_t *self, enum dso_kernel_type kernel_type; bool is_kernel_mmap; - machine = perf_session__findnew_machine(session, self->mmap.pid); + machine = perf_session__findnew_machine(session, event->mmap.pid); if (!machine) { - pr_err("Can't find id %d's machine\n", self->mmap.pid); + pr_err("Can't find id %d's machine\n", event->mmap.pid); goto out_problem; } @@ -498,17 +503,17 @@ static int event__process_kernel_mmap(event_t *self, else kernel_type = DSO_TYPE_GUEST_KERNEL; - is_kernel_mmap = memcmp(self->mmap.filename, + is_kernel_mmap = memcmp(event->mmap.filename, kmmap_prefix, strlen(kmmap_prefix)) == 0; - if (self->mmap.filename[0] == '/' || - (!is_kernel_mmap && self->mmap.filename[0] == '[')) { + if (event->mmap.filename[0] == '/' || + (!is_kernel_mmap && event->mmap.filename[0] == '[')) { char short_module_name[1024]; char *name, *dot; - if (self->mmap.filename[0] == '/') { - name = strrchr(self->mmap.filename, '/'); + if (event->mmap.filename[0] == '/') { + name = strrchr(event->mmap.filename, '/'); if (name == NULL) goto out_problem; @@ -520,10 +525,10 @@ static int event__process_kernel_mmap(event_t *self, "[%.*s]", (int)(dot - name), name); strxfrchar(short_module_name, '-', '_'); } else - strcpy(short_module_name, self->mmap.filename); + strcpy(short_module_name, event->mmap.filename); - map = machine__new_module(machine, self->mmap.start, - self->mmap.filename); + map = machine__new_module(machine, event->mmap.start, + event->mmap.filename); if (map == NULL) goto out_problem; @@ -533,9 +538,9 @@ static int event__process_kernel_mmap(event_t *self, map->dso->short_name = name; map->dso->sname_alloc = 1; - map->end = map->start + self->mmap.len; + map->end = map->start + event->mmap.len; } else if (is_kernel_mmap) { - const char *symbol_name = (self->mmap.filename + + const char *symbol_name = (event->mmap.filename + strlen(kmmap_prefix)); /* * Should be there already, from the build-id table in @@ -550,10 +555,10 @@ static int event__process_kernel_mmap(event_t *self, if (__machine__create_kernel_maps(machine, kernel) < 0) goto out_problem; - event_set_kernel_mmap_len(machine->vmlinux_maps, self); + perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps); perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, symbol_name, - self->mmap.pgoff); + event->mmap.pgoff); if (machine__is_default_guest(machine)) { /* * preload dso of guest kernel and modules @@ -567,22 +572,23 @@ out_problem: return -1; } -int event__process_mmap(event_t *self, struct perf_sample *sample __used, - struct perf_session *session) +int perf_event__process_mmap(union perf_event *event, + struct perf_sample *sample __used, + struct perf_session *session) { struct machine *machine; struct thread *thread; struct map *map; - u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; int ret = 0; dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n", - self->mmap.pid, self->mmap.tid, self->mmap.start, - self->mmap.len, self->mmap.pgoff, self->mmap.filename); + event->mmap.pid, event->mmap.tid, event->mmap.start, + event->mmap.len, event->mmap.pgoff, event->mmap.filename); if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || cpumode == PERF_RECORD_MISC_KERNEL) { - ret = event__process_kernel_mmap(self, session); + ret = perf_event__process_kernel_mmap(event, session); if (ret < 0) goto out_problem; return 0; @@ -591,12 +597,12 @@ int event__process_mmap(event_t *self, struct perf_sample *sample __used, machine = perf_session__find_host_machine(session); if (machine == NULL) goto out_problem; - thread = perf_session__findnew(session, self->mmap.pid); + thread = perf_session__findnew(session, event->mmap.pid); if (thread == NULL) goto out_problem; - map = map__new(&machine->user_dsos, self->mmap.start, - self->mmap.len, self->mmap.pgoff, - self->mmap.pid, self->mmap.filename, + map = map__new(&machine->user_dsos, event->mmap.start, + event->mmap.len, event->mmap.pgoff, + event->mmap.pid, event->mmap.filename, MAP__FUNCTION); if (map == NULL) goto out_problem; @@ -609,16 +615,17 @@ out_problem: return 0; } -int event__process_task(event_t *self, struct perf_sample *sample __used, - struct perf_session *session) +int perf_event__process_task(union perf_event *event, + struct perf_sample *sample __used, + struct perf_session *session) { - struct thread *thread = perf_session__findnew(session, self->fork.tid); - struct thread *parent = perf_session__findnew(session, self->fork.ptid); + struct thread *thread = perf_session__findnew(session, event->fork.tid); + struct thread *parent = perf_session__findnew(session, event->fork.ptid); - dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid, - self->fork.ppid, self->fork.ptid); + dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, + event->fork.ppid, event->fork.ptid); - if (self->header.type == PERF_RECORD_EXIT) { + if (event->header.type == PERF_RECORD_EXIT) { perf_session__remove_thread(session, thread); return 0; } @@ -632,22 +639,22 @@ int event__process_task(event_t *self, struct perf_sample *sample __used, return 0; } -int event__process(event_t *event, struct perf_sample *sample, - struct perf_session *session) +int perf_event__process(union perf_event *event, struct perf_sample *sample, + struct perf_session *session) { switch (event->header.type) { case PERF_RECORD_COMM: - event__process_comm(event, sample, session); + perf_event__process_comm(event, sample, session); break; case PERF_RECORD_MMAP: - event__process_mmap(event, sample, session); + perf_event__process_mmap(event, sample, session); break; case PERF_RECORD_FORK: case PERF_RECORD_EXIT: - event__process_task(event, sample, session); + perf_event__process_task(event, sample, session); break; case PERF_RECORD_LOST: - event__process_lost(event, sample, session); + perf_event__process_lost(event, sample, session); default: break; } @@ -756,12 +763,14 @@ static void dso__calc_col_width(struct dso *self, struct hists *hists) self->slen_calculated = 1; } -int event__preprocess_sample(const event_t *self, struct perf_session *session, - struct addr_location *al, struct perf_sample *sample, - symbol_filter_t filter) +int perf_event__preprocess_sample(const union perf_event *event, + struct perf_session *session, + struct addr_location *al, + struct perf_sample *sample, + symbol_filter_t filter) { - u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - struct thread *thread = perf_session__findnew(session, self->ip.pid); + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread = perf_session__findnew(session, event->ip.pid); if (thread == NULL) return -1; @@ -783,7 +792,7 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session, machine__create_kernel_maps(&session->host_machine); thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, - self->ip.pid, self->ip.ip, al); + event->ip.pid, event->ip.ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : ""); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 84fd71f..eecb422 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -117,7 +117,7 @@ struct tracing_data_event { u32 size; }; -typedef union event_union { +union perf_event { struct perf_event_header header; struct ip_event ip; struct mmap_event mmap; @@ -130,49 +130,52 @@ typedef union event_union { struct event_type_event event_type; struct tracing_data_event tracing_data; struct build_id_event build_id; -} event_t; +}; -void event__print_totals(void); +void perf_event__print_totals(void); struct perf_session; -typedef int (*event__handler_synth_t)(event_t *event, +typedef int (*perf_event__handler_synth_t)(union perf_event *event, + struct perf_session *session); +typedef int (*perf_event__handler_t)(union perf_event *event, + struct perf_sample *sample, struct perf_session *session); -typedef int (*event__handler_t)(event_t *event, struct perf_sample *sample, - struct perf_session *session); -int event__synthesize_thread(pid_t pid, event__handler_t process, +int perf_event__synthesize_thread(pid_t pid, perf_event__handler_t process, + struct perf_session *session); +int perf_event__synthesize_threads(perf_event__handler_t process, + struct perf_session *session); +int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, + struct perf_session *session, + struct machine *machine, + const char *symbol_name); + +int perf_event__synthesize_modules(perf_event__handler_t process, + struct perf_session *session, + struct machine *machine); + +int perf_event__process_comm(union perf_event *event, struct perf_sample *sample, struct perf_session *session); -int event__synthesize_threads(event__handler_t process, - struct perf_session *session); -int event__synthesize_kernel_mmap(event__handler_t process, - struct perf_session *session, - struct machine *machine, - const char *symbol_name); - -int event__synthesize_modules(event__handler_t process, - struct perf_session *session, - struct machine *machine); - -int event__process_comm(event_t *event, struct perf_sample *sample, - struct perf_session *session); -int event__process_lost(event_t *event, struct perf_sample *sample, - struct perf_session *session); -int event__process_mmap(event_t *event, struct perf_sample *sample, - struct perf_session *session); -int event__process_task(event_t *event, struct perf_sample *sample, +int perf_event__process_lost(union perf_event *event, struct perf_sample *sample, + struct perf_session *session); +int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample, + struct perf_session *session); +int perf_event__process_task(union perf_event *event, struct perf_sample *sample, + struct perf_session *session); +int perf_event__process(union perf_event *event, struct perf_sample *sample, struct perf_session *session); -int event__process(event_t *event, struct perf_sample *sample, - struct perf_session *session); struct addr_location; -int event__preprocess_sample(const event_t *self, struct perf_session *session, - struct addr_location *al, struct perf_sample *sample, - symbol_filter_t filter); +int perf_event__preprocess_sample(const union perf_event *self, + struct perf_session *session, + struct addr_location *al, + struct perf_sample *sample, + symbol_filter_t filter); -const char *event__get_event_name(unsigned int id); +const char *perf_event__name(unsigned int id); -int event__parse_sample(const event_t *event, u64 type, bool sample_id_all, - struct perf_sample *sample); +int perf_event__parse_sample(const union perf_event *event, u64 type, + bool sample_id_all, struct perf_sample *sample); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b498eec..917fc18 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -107,7 +107,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return NULL; } -event_t *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) +union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) { /* XXX Move this to perf.c, making it generally available */ unsigned int page_size = sysconf(_SC_PAGE_SIZE); @@ -115,7 +115,7 @@ event_t *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) unsigned int head = perf_mmap__read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; - event_t *event = NULL; + union perf_event *event = NULL; if (evlist->overwrite) { /* @@ -140,7 +140,7 @@ event_t *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) if (old != head) { size_t size; - event = (event_t *)&data[old & md->mask]; + event = (union perf_event *)&data[old & md->mask]; size = event->header.size; /* diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 2706ae4..022ae40 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -17,7 +17,7 @@ struct perf_evlist { int nr_fds; int mmap_len; bool overwrite; - event_t event_copy; + union perf_event event_copy; struct perf_mmap *mmap; struct pollfd *pollfd; }; @@ -37,6 +37,6 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); -event_t *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu); +union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a134885..fddeb08 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -367,8 +367,8 @@ out_unmap: return -1; } -static int event__parse_id_sample(const event_t *event, u64 type, - struct perf_sample *sample) +static int perf_event__parse_id_sample(const union perf_event *event, u64 type, + struct perf_sample *sample) { const u64 *array = event->sample.array; @@ -405,8 +405,8 @@ static int event__parse_id_sample(const event_t *event, u64 type, return 0; } -int event__parse_sample(const event_t *event, u64 type, bool sample_id_all, - struct perf_sample *data) +int perf_event__parse_sample(const union perf_event *event, u64 type, + bool sample_id_all, struct perf_sample *data) { const u64 *array; @@ -416,7 +416,7 @@ int event__parse_sample(const event_t *event, u64 type, bool sample_id_all, if (event->header.type != PERF_RECORD_SAMPLE) { if (!sample_id_all) return 0; - return event__parse_id_sample(event, type, data); + return perf_event__parse_id_sample(event, type, data); } array = event->sample.array; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f0138d4..c0de5ec 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1000,11 +1000,11 @@ perf_header__find_attr(u64 id, struct perf_header *header) return NULL; } -int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, - event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, + perf_event__handler_t process, + struct perf_session *session) { - event_t *ev; + union perf_event *ev; size_t size; int err; @@ -1031,8 +1031,9 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, return err; } -int event__synthesize_attrs(struct perf_header *self, event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_attrs(struct perf_header *self, + perf_event__handler_t process, + struct perf_session *session) { struct perf_header_attr *attr; int i, err = 0; @@ -1040,8 +1041,8 @@ int event__synthesize_attrs(struct perf_header *self, event__handler_t process, for (i = 0; i < self->attrs; i++) { attr = self->attr[i]; - err = event__synthesize_attr(&attr->attr, attr->ids, attr->id, - process, session); + err = perf_event__synthesize_attr(&attr->attr, attr->ids, + attr->id, process, session); if (err) { pr_debug("failed to create perf header attribute\n"); return err; @@ -1051,21 +1052,22 @@ int event__synthesize_attrs(struct perf_header *self, event__handler_t process, return err; } -int event__process_attr(event_t *self, struct perf_session *session) +int perf_event__process_attr(union perf_event *event, + struct perf_session *session) { struct perf_header_attr *attr; unsigned int i, ids, n_ids; - attr = perf_header_attr__new(&self->attr.attr); + attr = perf_header_attr__new(&event->attr.attr); if (attr == NULL) return -ENOMEM; - ids = self->header.size; - ids -= (void *)&self->attr.id - (void *)self; + ids = event->header.size; + ids -= (void *)&event->attr.id - (void *)event; n_ids = ids / sizeof(u64); for (i = 0; i < n_ids; i++) { - if (perf_header_attr__add_id(attr, self->attr.id[i]) < 0) { + if (perf_header_attr__add_id(attr, event->attr.id[i]) < 0) { perf_header_attr__delete(attr); return -ENOMEM; } @@ -1081,11 +1083,11 @@ int event__process_attr(event_t *self, struct perf_session *session) return 0; } -int event__synthesize_event_type(u64 event_id, char *name, - event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_event_type(u64 event_id, char *name, + perf_event__handler_t process, + struct perf_session *session) { - event_t ev; + union perf_event ev; size_t size = 0; int err = 0; @@ -1106,8 +1108,8 @@ int event__synthesize_event_type(u64 event_id, char *name, return err; } -int event__synthesize_event_types(event__handler_t process, - struct perf_session *session) +int perf_event__synthesize_event_types(perf_event__handler_t process, + struct perf_session *session) { struct perf_trace_event_type *type; int i, err = 0; @@ -1115,8 +1117,9 @@ int event__synthesize_event_types(event__handler_t process, for (i = 0; i < event_count; i++) { type = &events[i]; - err = event__synthesize_event_type(type->event_id, type->name, - process, session); + err = perf_event__synthesize_event_type(type->event_id, + type->name, process, + session); if (err) { pr_debug("failed to create perf header event type\n"); return err; @@ -1126,21 +1129,21 @@ int event__synthesize_event_types(event__handler_t process, return err; } -int event__process_event_type(event_t *self, - struct perf_session *session __unused) +int perf_event__process_event_type(union perf_event *event, + struct perf_session *session __unused) { - if (perf_header__push_event(self->event_type.event_type.event_id, - self->event_type.event_type.name) < 0) + if (perf_header__push_event(event->event_type.event_type.event_id, + event->event_type.event_type.name) < 0) return -ENOMEM; return 0; } -int event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, - event__handler_t process, +int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, + perf_event__handler_t process, struct perf_session *session __unused) { - event_t ev; + union perf_event ev; ssize_t size = 0, aligned_size = 0, padding; int err = 0; @@ -1163,10 +1166,10 @@ int event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, return aligned_size; } -int event__process_tracing_data(event_t *self, - struct perf_session *session) +int perf_event__process_tracing_data(union perf_event *event, + struct perf_session *session) { - ssize_t size_read, padding, size = self->tracing_data.size; + ssize_t size_read, padding, size = event->tracing_data.size; off_t offset = lseek(session->fd, 0, SEEK_CUR); char buf[BUFSIZ]; @@ -1192,12 +1195,12 @@ int event__process_tracing_data(event_t *self, return size_read + padding; } -int event__synthesize_build_id(struct dso *pos, u16 misc, - event__handler_t process, - struct machine *machine, - struct perf_session *session) +int perf_event__synthesize_build_id(struct dso *pos, u16 misc, + perf_event__handler_t process, + struct machine *machine, + struct perf_session *session) { - event_t ev; + union perf_event ev; size_t len; int err = 0; @@ -1220,11 +1223,11 @@ int event__synthesize_build_id(struct dso *pos, u16 misc, return err; } -int event__process_build_id(event_t *self, - struct perf_session *session) +int perf_event__process_build_id(union perf_event *event, + struct perf_session *session) { - __event_process_build_id(&self->build_id, - self->build_id.filename, + __event_process_build_id(&event->build_id, + event->build_id.filename, session); return 0; } diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 65afd7f..f042ceb 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -100,32 +100,32 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, const char *name, bool is_kallsyms); int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); -int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, - event__handler_t process, - struct perf_session *session); -int event__synthesize_attrs(struct perf_header *self, - event__handler_t process, - struct perf_session *session); -int event__process_attr(event_t *self, struct perf_session *session); - -int event__synthesize_event_type(u64 event_id, char *name, - event__handler_t process, +int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, + perf_event__handler_t process, + struct perf_session *session); +int perf_event__synthesize_attrs(struct perf_header *self, + perf_event__handler_t process, struct perf_session *session); -int event__synthesize_event_types(event__handler_t process, - struct perf_session *session); -int event__process_event_type(event_t *self, - struct perf_session *session); - -int event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, - event__handler_t process, +int perf_event__process_attr(union perf_event *event, struct perf_session *session); + +int perf_event__synthesize_event_type(u64 event_id, char *name, + perf_event__handler_t process, + struct perf_session *session); +int perf_event__synthesize_event_types(perf_event__handler_t process, + struct perf_session *session); +int perf_event__process_event_type(union perf_event *event, struct perf_session *session); -int event__process_tracing_data(event_t *self, - struct perf_session *session); - -int event__synthesize_build_id(struct dso *pos, u16 misc, - event__handler_t process, - struct machine *machine, - struct perf_session *session); -int event__process_build_id(event_t *self, struct perf_session *session); +int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, + perf_event__handler_t process, + struct perf_session *session); +int perf_event__process_tracing_data(union perf_event *event, + struct perf_session *session); + +int perf_event__synthesize_build_id(struct dso *pos, u16 misc, + perf_event__handler_t process, + struct machine *machine, + struct perf_session *session); +int perf_event__process_build_id(union perf_event *event, + struct perf_session *session); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 02ed318..9588780 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1182,7 +1182,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) size_t ret = 0; for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { - const char *name = event__get_event_name(i); + const char *name = perf_event__name(i); if (!strcmp(name, "UNKNOWN")) continue; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ee0b611..a3a871f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -165,7 +165,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, } else if (mode == O_WRONLY) { /* * In O_RDONLY mode this will be performed when reading the - * kernel MMAP event, in event__process_mmap(). + * kernel MMAP event, in perf_event__process_mmap(). */ if (perf_session__create_kernel_maps(self) < 0) goto out_delete; @@ -291,14 +291,14 @@ int perf_session__resolve_callchain(struct perf_session *self, return 0; } -static int process_event_synth_stub(event_t *event __used, +static int process_event_synth_stub(union perf_event *event __used, struct perf_session *session __used) { dump_printf(": unhandled!\n"); return 0; } -static int process_event_stub(event_t *event __used, +static int process_event_stub(union perf_event *event __used, struct perf_sample *sample __used, struct perf_session *session __used) { @@ -306,7 +306,7 @@ static int process_event_stub(event_t *event __used, return 0; } -static int process_finished_round_stub(event_t *event __used, +static int process_finished_round_stub(union perf_event *event __used, struct perf_session *session __used, struct perf_event_ops *ops __used) { @@ -314,7 +314,7 @@ static int process_finished_round_stub(event_t *event __used, return 0; } -static int process_finished_round(event_t *event, +static int process_finished_round(union perf_event *event, struct perf_session *session, struct perf_event_ops *ops); @@ -331,7 +331,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) if (handler->exit == NULL) handler->exit = process_event_stub; if (handler->lost == NULL) - handler->lost = event__process_lost; + handler->lost = perf_event__process_lost; if (handler->read == NULL) handler->read = process_event_stub; if (handler->throttle == NULL) @@ -365,98 +365,98 @@ void mem_bswap_64(void *src, int byte_size) } } -static void event__all64_swap(event_t *self) +static void perf_event__all64_swap(union perf_event *event) { - struct perf_event_header *hdr = &self->header; - mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr)); + struct perf_event_header *hdr = &event->header; + mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); } -static void event__comm_swap(event_t *self) +static void perf_event__comm_swap(union perf_event *event) { - self->comm.pid = bswap_32(self->comm.pid); - self->comm.tid = bswap_32(self->comm.tid); + event->comm.pid = bswap_32(event->comm.pid); + event->comm.tid = bswap_32(event->comm.tid); } -static void event__mmap_swap(event_t *self) +static void perf_event__mmap_swap(union perf_event *event) { - self->mmap.pid = bswap_32(self->mmap.pid); - self->mmap.tid = bswap_32(self->mmap.tid); - self->mmap.start = bswap_64(self->mmap.start); - self->mmap.len = bswap_64(self->mmap.len); - self->mmap.pgoff = bswap_64(self->mmap.pgoff); + event->mmap.pid = bswap_32(event->mmap.pid); + event->mmap.tid = bswap_32(event->mmap.tid); + event->mmap.start = bswap_64(event->mmap.start); + event->mmap.len = bswap_64(event->mmap.len); + event->mmap.pgoff = bswap_64(event->mmap.pgoff); } -static void event__task_swap(event_t *self) +static void perf_event__task_swap(union perf_event *event) { - self->fork.pid = bswap_32(self->fork.pid); - self->fork.tid = bswap_32(self->fork.tid); - self->fork.ppid = bswap_32(self->fork.ppid); - self->fork.ptid = bswap_32(self->fork.ptid); - self->fork.time = bswap_64(self->fork.time); + event->fork.pid = bswap_32(event->fork.pid); + event->fork.tid = bswap_32(event->fork.tid); + event->fork.ppid = bswap_32(event->fork.ppid); + event->fork.ptid = bswap_32(event->fork.ptid); + event->fork.time = bswap_64(event->fork.time); } -static void event__read_swap(event_t *self) +static void perf_event__read_swap(union perf_event *event) { - self->read.pid = bswap_32(self->read.pid); - self->read.tid = bswap_32(self->read.tid); - self->read.value = bswap_64(self->read.value); - self->read.time_enabled = bswap_64(self->read.time_enabled); - self->read.time_running = bswap_64(self->read.time_running); - self->read.id = bswap_64(self->read.id); + event->read.pid = bswap_32(event->read.pid); + event->read.tid = bswap_32(event->read.tid); + event->read.value = bswap_64(event->read.value); + event->read.time_enabled = bswap_64(event->read.time_enabled); + event->read.time_running = bswap_64(event->read.time_running); + event->read.id = bswap_64(event->read.id); } -static void event__attr_swap(event_t *self) +static void perf_event__attr_swap(union perf_event *event) { size_t size; - self->attr.attr.type = bswap_32(self->attr.attr.type); - self->attr.attr.size = bswap_32(self->attr.attr.size); - self->attr.attr.config = bswap_64(self->attr.attr.config); - self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period); - self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type); - self->attr.attr.read_format = bswap_64(self->attr.attr.read_format); - self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events); - self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type); - self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr); - self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len); - - size = self->header.size; - size -= (void *)&self->attr.id - (void *)self; - mem_bswap_64(self->attr.id, size); + event->attr.attr.type = bswap_32(event->attr.attr.type); + event->attr.attr.size = bswap_32(event->attr.attr.size); + event->attr.attr.config = bswap_64(event->attr.attr.config); + event->attr.attr.sample_period = bswap_64(event->attr.attr.sample_period); + event->attr.attr.sample_type = bswap_64(event->attr.attr.sample_type); + event->attr.attr.read_format = bswap_64(event->attr.attr.read_format); + event->attr.attr.wakeup_events = bswap_32(event->attr.attr.wakeup_events); + event->attr.attr.bp_type = bswap_32(event->attr.attr.bp_type); + event->attr.attr.bp_addr = bswap_64(event->attr.attr.bp_addr); + event->attr.attr.bp_len = bswap_64(event->attr.attr.bp_len); + + size = event->header.size; + size -= (void *)&event->attr.id - (void *)event; + mem_bswap_64(event->attr.id, size); } -static void event__event_type_swap(event_t *self) +static void perf_event__event_type_swap(union perf_event *event) { - self->event_type.event_type.event_id = - bswap_64(self->event_type.event_type.event_id); + event->event_type.event_type.event_id = + bswap_64(event->event_type.event_type.event_id); } -static void event__tracing_data_swap(event_t *self) +static void perf_event__tracing_data_swap(union perf_event *event) { - self->tracing_data.size = bswap_32(self->tracing_data.size); + event->tracing_data.size = bswap_32(event->tracing_data.size); } -typedef void (*event__swap_op)(event_t *self); - -static event__swap_op event__swap_ops[] = { - [PERF_RECORD_MMAP] = event__mmap_swap, - [PERF_RECORD_COMM] = event__comm_swap, - [PERF_RECORD_FORK] = event__task_swap, - [PERF_RECORD_EXIT] = event__task_swap, - [PERF_RECORD_LOST] = event__all64_swap, - [PERF_RECORD_READ] = event__read_swap, - [PERF_RECORD_SAMPLE] = event__all64_swap, - [PERF_RECORD_HEADER_ATTR] = event__attr_swap, - [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap, - [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap, - [PERF_RECORD_HEADER_BUILD_ID] = NULL, - [PERF_RECORD_HEADER_MAX] = NULL, +typedef void (*perf_event__swap_op)(union perf_event *event); + +static perf_event__swap_op perf_event__swap_ops[] = { + [PERF_RECORD_MMAP] = perf_event__mmap_swap, + [PERF_RECORD_COMM] = perf_event__comm_swap, + [PERF_RECORD_FORK] = perf_event__task_swap, + [PERF_RECORD_EXIT] = perf_event__task_swap, + [PERF_RECORD_LOST] = perf_event__all64_swap, + [PERF_RECORD_READ] = perf_event__read_swap, + [PERF_RECORD_SAMPLE] = perf_event__all64_swap, + [PERF_RECORD_HEADER_ATTR] = perf_event__attr_swap, + [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, + [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, + [PERF_RECORD_HEADER_BUILD_ID] = NULL, + [PERF_RECORD_HEADER_MAX] = NULL, }; struct sample_queue { u64 timestamp; u64 file_offset; - event_t *event; + union perf_event *event; struct list_head list; }; @@ -474,7 +474,7 @@ static void perf_session_free_sample_buffers(struct perf_session *session) } static int perf_session_deliver_event(struct perf_session *session, - event_t *event, + union perf_event *event, struct perf_sample *sample, struct perf_event_ops *ops, u64 file_offset); @@ -552,7 +552,7 @@ static void flush_sample_queue(struct perf_session *s, * Flush every events below timestamp 7 * etc... */ -static int process_finished_round(event_t *event __used, +static int process_finished_round(union perf_event *event __used, struct perf_session *session, struct perf_event_ops *ops) { @@ -609,7 +609,7 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s) #define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue)) -static int perf_session_queue_event(struct perf_session *s, event_t *event, +static int perf_session_queue_event(struct perf_session *s, union perf_event *event, struct perf_sample *sample, u64 file_offset) { struct ordered_samples *os = &s->ordered_samples; @@ -662,7 +662,7 @@ static void callchain__printf(struct perf_sample *sample) } static void perf_session__print_tstamp(struct perf_session *session, - event_t *event, + union perf_event *event, struct perf_sample *sample) { if (event->header.type != PERF_RECORD_SAMPLE && @@ -678,7 +678,7 @@ static void perf_session__print_tstamp(struct perf_session *session, printf("%" PRIu64 " ", sample->time); } -static void dump_event(struct perf_session *session, event_t *event, +static void dump_event(struct perf_session *session, union perf_event *event, u64 file_offset, struct perf_sample *sample) { if (!dump_trace) @@ -693,10 +693,10 @@ static void dump_event(struct perf_session *session, event_t *event, perf_session__print_tstamp(session, event, sample); printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, - event->header.size, event__get_event_name(event->header.type)); + event->header.size, perf_event__name(event->header.type)); } -static void dump_sample(struct perf_session *session, event_t *event, +static void dump_sample(struct perf_session *session, union perf_event *event, struct perf_sample *sample) { if (!dump_trace) @@ -711,7 +711,7 @@ static void dump_sample(struct perf_session *session, event_t *event, } static int perf_session_deliver_event(struct perf_session *session, - event_t *event, + union perf_event *event, struct perf_sample *sample, struct perf_event_ops *ops, u64 file_offset) @@ -745,7 +745,7 @@ static int perf_session_deliver_event(struct perf_session *session, } static int perf_session__preprocess_sample(struct perf_session *session, - event_t *event, struct perf_sample *sample) + union perf_event *event, struct perf_sample *sample) { if (event->header.type != PERF_RECORD_SAMPLE || !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) @@ -760,7 +760,7 @@ static int perf_session__preprocess_sample(struct perf_session *session, return 0; } -static int perf_session__process_user_event(struct perf_session *session, event_t *event, +static int perf_session__process_user_event(struct perf_session *session, union perf_event *event, struct perf_event_ops *ops, u64 file_offset) { dump_event(session, event, file_offset, NULL); @@ -785,15 +785,16 @@ static int perf_session__process_user_event(struct perf_session *session, event_ } static int perf_session__process_event(struct perf_session *session, - event_t *event, + union perf_event *event, struct perf_event_ops *ops, u64 file_offset) { struct perf_sample sample; int ret; - if (session->header.needs_swap && event__swap_ops[event->header.type]) - event__swap_ops[event->header.type](event); + if (session->header.needs_swap && + perf_event__swap_ops[event->header.type]) + perf_event__swap_ops[event->header.type](event); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; @@ -845,7 +846,7 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se static void perf_session__warn_about_errors(const struct perf_session *session, const struct perf_event_ops *ops) { - if (ops->lost == event__process_lost && + if (ops->lost == perf_event__process_lost && session->hists.stats.total_lost != 0) { ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64 "!\n\nCheck IO/CPU overload!\n\n", @@ -877,7 +878,7 @@ volatile int session_done; static int __perf_session__process_pipe_events(struct perf_session *self, struct perf_event_ops *ops) { - event_t event; + union perf_event event; uint32_t size; int skip = 0; u64 head; @@ -958,7 +959,7 @@ int __perf_session__process_events(struct perf_session *session, struct ui_progress *progress; size_t page_size, mmap_size; char *buf, *mmaps[8]; - event_t *event; + union perf_event *event; uint32_t size; perf_event_ops__fill_defaults(ops); @@ -1003,7 +1004,7 @@ remap: file_pos = file_offset + head; more: - event = (event_t *)(buf + head); + event = (union perf_event *)(buf + head); if (session->header.needs_swap) perf_event_header__bswap(&event->header); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 365bf53..977b3a1 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -57,10 +57,11 @@ struct perf_session { struct perf_event_ops; -typedef int (*event_op)(event_t *self, struct perf_sample *sample, +typedef int (*event_op)(union perf_event *self, struct perf_sample *sample, struct perf_session *session); -typedef int (*event_synth_op)(event_t *self, struct perf_session *session); -typedef int (*event_op2)(event_t *self, struct perf_session *session, +typedef int (*event_synth_op)(union perf_event *self, + struct perf_session *session); +typedef int (*event_op2)(union perf_event *self, struct perf_session *session, struct perf_event_ops *ops); struct perf_event_ops { @@ -157,11 +158,11 @@ size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp) } static inline int perf_session__parse_sample(struct perf_session *session, - const event_t *event, + const union perf_event *event, struct perf_sample *sample) { - return event__parse_sample(event, session->sample_type, - session->sample_id_all, sample); + return perf_event__parse_sample(event, session->sample_type, + session->sample_id_all, sample); } #endif /* __PERF_SESSION_H */ -- cgit v0.10.2 From 877108e42b1b9ba64857c4030cf356ecc120fd18 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 29 Jan 2011 15:44:29 -0200 Subject: perf tools: Initial python binding First clarifying that this kind of binding is not a replacement or an equivalent to the 'perf script' way of using python with perf. The 'perf script' way is to process events and look at a given script for some python function that matches the events to pass each event for processing. This is a python module, i.e. everything is driven from the python script, that merely uses "import perf" or "from perf import". perf script is focused on tracepoints, this binding is focused on profiling as an initial target. More work is needed to make available tracepoint specific variables as event variables accessible via this binding. There is one example of such usage model, in tools/perf/python/twatch.py, a tool to watch "cycles" events together with task (fork, exit) and comm perf events. For now, due to me not being able to grok how python distutils cope with building C extensions outside the sources dir the install target just builds it, I'm using it as: [root@emilia linux]# export PYTHONPATH=~acme/git/build/perf/lib.linux-x86_64-2.6/ [root@emilia linux]# tools/perf/python/twatch.py cpu: 4, pid: 30126, tid: 30126 { type: mmap, pid: 30126, tid: 30126, start: 0x4, length: 0x82e9ca03, offset: 0, filename: } cpu: 6, pid: 47, tid: 47 { type: mmap, pid: 47, tid: 47, start: 0x6, length: 0xbef87c36, offset: 0, filename: } cpu: 1, pid: 0, tid: 0 { type: mmap, pid: 0, tid: 0, start: 0x1, length: 0x775d1904, offset: 0, filename: } cpu: 7, pid: 0, tid: 0 { type: mmap, pid: 0, tid: 0, start: 0x7, length: 0xc750aeb6, offset: 0, filename: } cpu: 5, pid: 2255, tid: 2255 { type: mmap, pid: 2255, tid: 2255, start: 0x5, length: 0x76669635, offset: 0, filename: } cpu: 0, pid: 0, tid: 0 { type: mmap, pid: 0, tid: 0, start: 0, length: 0x6422ef6b, offset: 0, filename: } cpu: 2, pid: 2255, tid: 2255 { type: mmap, pid: 2255, tid: 2255, start: 0x2, length: 0xe078757a, offset: 0, filename: } cpu: 1, pid: 5769, tid: 5769 { type: fork, pid: 30127, ppid: 5769, tid: 30127, ptid: 5769, time: 103893991270534} cpu: 6, pid: 30127, tid: 30127 { type: comm, pid: 30127, tid: 30127, comm: ls } cpu: 6, pid: 30127, tid: 30127 { type: exit, pid: 30127, ppid: 30127, tid: 30127, ptid: 30127, time: 103893993273024} The first 8 mmap events in this 8 way machine are a mistery that is still being investigated. More of the tools/perf/util/ APIs will be exposed via this python binding as the need arises. For now the focus is on creating events and processing them, symbol resolution is an obvious next step, with tracepoint variables as a close second step. Cc: Clark Williams Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index eedcf95..36ff73c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -315,6 +315,7 @@ COMPAT_CFLAGS = COMPAT_OBJS = LIB_H = LIB_OBJS = +PYRF_OBJS = SCRIPT_PERL = SCRIPT_SH = TEST_PROGRAMS = @@ -324,6 +325,9 @@ SCRIPT_SH += perf-archive.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) +pyrf: $(PYRF_OBJS) + python util/setup.py build --build-base='$(OUTPUT)' + # # No Perl scripts right now: # @@ -349,7 +353,7 @@ PROGRAMS += $(OUTPUT)perf # # what 'all' will build and 'install' will install, in perfexecdir -ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) +ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) pyrf # what 'all' will build but not install in perfexecdir OTHER_PROGRAMS = $(OUTPUT)perf$X @@ -520,6 +524,20 @@ BUILTIN_OBJS += $(OUTPUT)builtin-inject.o PERFLIBS = $(LIB_FILE) +# Files needed for the python binding, perf.so +# pyrf is just an internal name needed for all those wrappers. +# This has to be in sync with what is in the 'sources' variable in +# tools/perf/util/setup.py + +PYRF_OBJS += $(OUTPUT)util/cpumap.o +PYRF_OBJS += $(OUTPUT)util/ctype.o +PYRF_OBJS += $(OUTPUT)util/evlist.o +PYRF_OBJS += $(OUTPUT)util/evsel.o +PYRF_OBJS += $(OUTPUT)util/python.o +PYRF_OBJS += $(OUTPUT)util/thread_map.o +PYRF_OBJS += $(OUTPUT)util/util.o +PYRF_OBJS += $(OUTPUT)util/xyarray.o + # # Platform specific tweaks # diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py new file mode 100755 index 0000000..5e9f3b7 --- /dev/null +++ b/tools/perf/python/twatch.py @@ -0,0 +1,41 @@ +#! /usr/bin/python +# -*- python -*- +# -*- coding: utf-8 -*- +# twatch - Experimental use of the perf python interface +# Copyright (C) 2011 Arnaldo Carvalho de Melo +# +# This application is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2. +# +# This application is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +import perf + +def main(): + cpus = perf.cpu_map() + threads = perf.thread_map() + evsel = perf.evsel(task = 1, comm = 1, mmap = 0, + wakeup_events = 1, sample_period = 1, + sample_id_all = 1, + sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) + evsel.open(cpus = cpus, threads = threads); + evlist = perf.evlist() + evlist.add(evsel) + evlist.mmap(cpus = cpus, threads = threads) + while True: + evlist.poll(timeout = -1) + for cpu in cpus: + event = evlist.read_on_cpu(cpu) + if not event: + continue + print "cpu: %2d, pid: %4d, tid: %4d" % (event.sample_cpu, + event.sample_pid, + event.sample_tid), + print event + +if __name__ == '__main__': + main() diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c new file mode 100644 index 0000000..88d4789 --- /dev/null +++ b/tools/perf/util/python.c @@ -0,0 +1,888 @@ +#include +#include +#include +#include +#include "evlist.h" +#include "evsel.h" +#include "event.h" +#include "cpumap.h" +#include "thread_map.h" + +struct throttle_event { + struct perf_event_header header; + u64 time; + u64 id; + u64 stream_id; +}; + +#define member_def(type, member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_event, event) + offsetof(struct type, member), \ + 0, help } + +#define sample_member_def(name, member, ptype, help) \ + { #name, ptype, \ + offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \ + 0, help } + +struct pyrf_event { + PyObject_HEAD + struct perf_sample sample; + union perf_event event; +}; + +#define T_ULONG_LONG T_ULONG + +#define sample_members \ + sample_member_def(sample_ip, ip, T_ULONG_LONG, "event type"), \ + sample_member_def(sample_pid, pid, T_INT, "event pid"), \ + sample_member_def(sample_tid, tid, T_INT, "event tid"), \ + sample_member_def(sample_time, time, T_ULONG_LONG, "event timestamp"), \ + sample_member_def(sample_addr, addr, T_ULONG_LONG, "event addr"), \ + sample_member_def(sample_id, id, T_ULONG_LONG, "event id"), \ + sample_member_def(sample_stream_id, stream_id, T_ULONG_LONG, "event stream id"), \ + sample_member_def(sample_period, period, T_ULONG_LONG, "event period"), \ + sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"), + +static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object."); + +static PyMemberDef pyrf_mmap_event__members[] = { + sample_members + member_def(perf_event_header, type, T_UINT, "event type"), + member_def(mmap_event, pid, T_UINT, "event pid"), + member_def(mmap_event, tid, T_UINT, "event tid"), + member_def(mmap_event, start, T_ULONG_LONG, "start of the map"), + member_def(mmap_event, len, T_ULONG_LONG, "map length"), + member_def(mmap_event, pgoff, T_ULONG_LONG, "page offset"), + member_def(mmap_event, filename, T_STRING_INPLACE, "backing store"), + { NULL, }, +}; + +static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent) +{ + PyObject *ret; + char *s; + + if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRIx64 ", " + "length: %#" PRIx64 ", offset: %#" PRIx64 ", " + "filename: %s }", + pevent->event.mmap.pid, pevent->event.mmap.tid, + pevent->event.mmap.start, pevent->event.mmap.len, + pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) { + ret = PyErr_NoMemory(); + } else { + ret = PyString_FromString(s); + free(s); + } + return ret; +} + +static PyTypeObject pyrf_mmap_event__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.mmap_event", + .tp_basicsize = sizeof(struct pyrf_event), + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_mmap_event__doc, + .tp_members = pyrf_mmap_event__members, + .tp_repr = (reprfunc)pyrf_mmap_event__repr, +}; + +static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object."); + +static PyMemberDef pyrf_task_event__members[] = { + sample_members + member_def(perf_event_header, type, T_UINT, "event type"), + member_def(fork_event, pid, T_UINT, "event pid"), + member_def(fork_event, ppid, T_UINT, "event ppid"), + member_def(fork_event, tid, T_UINT, "event tid"), + member_def(fork_event, ptid, T_UINT, "event ptid"), + member_def(fork_event, time, T_ULONG_LONG, "timestamp"), + { NULL, }, +}; + +static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent) +{ + return PyString_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " + "ptid: %u, time: %" PRIu64 "}", + pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit", + pevent->event.fork.pid, + pevent->event.fork.ppid, + pevent->event.fork.tid, + pevent->event.fork.ptid, + pevent->event.fork.time); +} + +static PyTypeObject pyrf_task_event__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.task_event", + .tp_basicsize = sizeof(struct pyrf_event), + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_task_event__doc, + .tp_members = pyrf_task_event__members, + .tp_repr = (reprfunc)pyrf_task_event__repr, +}; + +static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object."); + +static PyMemberDef pyrf_comm_event__members[] = { + sample_members + member_def(perf_event_header, type, T_UINT, "event type"), + member_def(comm_event, pid, T_UINT, "event pid"), + member_def(comm_event, tid, T_UINT, "event tid"), + member_def(comm_event, comm, T_STRING_INPLACE, "process name"), + { NULL, }, +}; + +static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent) +{ + return PyString_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", + pevent->event.comm.pid, + pevent->event.comm.tid, + pevent->event.comm.comm); +} + +static PyTypeObject pyrf_comm_event__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.comm_event", + .tp_basicsize = sizeof(struct pyrf_event), + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_comm_event__doc, + .tp_members = pyrf_comm_event__members, + .tp_repr = (reprfunc)pyrf_comm_event__repr, +}; + +static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object."); + +static PyMemberDef pyrf_throttle_event__members[] = { + sample_members + member_def(perf_event_header, type, T_UINT, "event type"), + member_def(throttle_event, time, T_ULONG_LONG, "timestamp"), + member_def(throttle_event, id, T_ULONG_LONG, "event id"), + member_def(throttle_event, stream_id, T_ULONG_LONG, "event stream id"), + { NULL, }, +}; + +static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent) +{ + struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1); + + return PyString_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64 + ", stream_id: %" PRIu64 " }", + pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un", + te->time, te->id, te->stream_id); +} + +static PyTypeObject pyrf_throttle_event__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.throttle_event", + .tp_basicsize = sizeof(struct pyrf_event), + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_throttle_event__doc, + .tp_members = pyrf_throttle_event__members, + .tp_repr = (reprfunc)pyrf_throttle_event__repr, +}; + +static int pyrf_event__setup_types(void) +{ + int err; + pyrf_mmap_event__type.tp_new = + pyrf_task_event__type.tp_new = + pyrf_comm_event__type.tp_new = + pyrf_throttle_event__type.tp_new = PyType_GenericNew; + err = PyType_Ready(&pyrf_mmap_event__type); + if (err < 0) + goto out; + err = PyType_Ready(&pyrf_task_event__type); + if (err < 0) + goto out; + err = PyType_Ready(&pyrf_comm_event__type); + if (err < 0) + goto out; + err = PyType_Ready(&pyrf_throttle_event__type); + if (err < 0) + goto out; +out: + return err; +} + +static PyTypeObject *pyrf_event__type[] = { + [PERF_RECORD_MMAP] = &pyrf_mmap_event__type, + [PERF_RECORD_LOST] = &pyrf_mmap_event__type, + [PERF_RECORD_COMM] = &pyrf_comm_event__type, + [PERF_RECORD_EXIT] = &pyrf_task_event__type, + [PERF_RECORD_THROTTLE] = &pyrf_throttle_event__type, + [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type, + [PERF_RECORD_FORK] = &pyrf_task_event__type, + [PERF_RECORD_READ] = &pyrf_mmap_event__type, + [PERF_RECORD_SAMPLE] = &pyrf_mmap_event__type, +}; + +static PyObject *pyrf_event__new(union perf_event *event) +{ + struct pyrf_event *pevent; + PyTypeObject *ptype; + + if (event->header.type < PERF_RECORD_MMAP || + event->header.type > PERF_RECORD_SAMPLE) + return NULL; + + ptype = pyrf_event__type[event->header.type]; + pevent = PyObject_New(struct pyrf_event, ptype); + if (pevent != NULL) + memcpy(&pevent->event, event, event->header.size); + return (PyObject *)pevent; +} + +struct pyrf_cpu_map { + PyObject_HEAD + + struct cpu_map *cpus; +}; + +static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus, + PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "cpustr", NULL, NULL, }; + char *cpustr = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s", + kwlist, &cpustr)) + return -1; + + pcpus->cpus = cpu_map__new(cpustr); + if (pcpus->cpus == NULL) + return -1; + return 0; +} + +static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus) +{ + cpu_map__delete(pcpus->cpus); + pcpus->ob_type->tp_free((PyObject*)pcpus); +} + +static Py_ssize_t pyrf_cpu_map__length(PyObject *obj) +{ + struct pyrf_cpu_map *pcpus = (void *)obj; + + return pcpus->cpus->nr; +} + +static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i) +{ + struct pyrf_cpu_map *pcpus = (void *)obj; + + if (i >= pcpus->cpus->nr) + return NULL; + + return Py_BuildValue("i", pcpus->cpus->map[i]); +} + +static PySequenceMethods pyrf_cpu_map__sequence_methods = { + .sq_length = pyrf_cpu_map__length, + .sq_item = pyrf_cpu_map__item, +}; + +static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object."); + +static PyTypeObject pyrf_cpu_map__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.cpu_map", + .tp_basicsize = sizeof(struct pyrf_cpu_map), + .tp_dealloc = (destructor)pyrf_cpu_map__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_cpu_map__doc, + .tp_as_sequence = &pyrf_cpu_map__sequence_methods, + .tp_init = (initproc)pyrf_cpu_map__init, +}; + +static int pyrf_cpu_map__setup_types(void) +{ + pyrf_cpu_map__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_cpu_map__type); +} + +struct pyrf_thread_map { + PyObject_HEAD + + struct thread_map *threads; +}; + +static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, + PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = { "pid", "tid", NULL, NULL, }; + int pid = -1, tid = -1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", + kwlist, &pid, &tid)) + return -1; + + pthreads->threads = thread_map__new(pid, tid); + if (pthreads->threads == NULL) + return -1; + return 0; +} + +static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads) +{ + thread_map__delete(pthreads->threads); + pthreads->ob_type->tp_free((PyObject*)pthreads); +} + +static Py_ssize_t pyrf_thread_map__length(PyObject *obj) +{ + struct pyrf_thread_map *pthreads = (void *)obj; + + return pthreads->threads->nr; +} + +static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i) +{ + struct pyrf_thread_map *pthreads = (void *)obj; + + if (i >= pthreads->threads->nr) + return NULL; + + return Py_BuildValue("i", pthreads->threads->map[i]); +} + +static PySequenceMethods pyrf_thread_map__sequence_methods = { + .sq_length = pyrf_thread_map__length, + .sq_item = pyrf_thread_map__item, +}; + +static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object."); + +static PyTypeObject pyrf_thread_map__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.thread_map", + .tp_basicsize = sizeof(struct pyrf_thread_map), + .tp_dealloc = (destructor)pyrf_thread_map__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_thread_map__doc, + .tp_as_sequence = &pyrf_thread_map__sequence_methods, + .tp_init = (initproc)pyrf_thread_map__init, +}; + +static int pyrf_thread_map__setup_types(void) +{ + pyrf_thread_map__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_thread_map__type); +} + +struct pyrf_evsel { + PyObject_HEAD + + struct perf_evsel evsel; +}; + +static int pyrf_evsel__init(struct pyrf_evsel *pevsel, + PyObject *args, PyObject *kwargs) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID, + }; + static char *kwlist[] = { + "type", + "config", + "sample_freq", + "sample_period", + "sample_type", + "read_format", + "disabled", + "inherit", + "pinned", + "exclusive", + "exclude_user", + "exclude_kernel", + "exclude_hv", + "exclude_idle", + "mmap", + "comm", + "freq", + "inherit_stat", + "enable_on_exec", + "task", + "watermark", + "precise_ip", + "mmap_data", + "sample_id_all", + "wakeup_events", + "bp_type", + "bp_addr", + "bp_len", NULL, NULL, }; + u64 sample_period = 0; + u32 disabled = 0, + inherit = 0, + pinned = 0, + exclusive = 0, + exclude_user = 0, + exclude_kernel = 0, + exclude_hv = 0, + exclude_idle = 0, + mmap = 0, + comm = 0, + freq = 1, + inherit_stat = 0, + enable_on_exec = 0, + task = 0, + watermark = 0, + precise_ip = 0, + mmap_data = 0, + sample_id_all = 1; + int idx = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|iKiKKiiiiiiiiiiiiiiiiiiiiiKK", kwlist, + &attr.type, &attr.config, &attr.sample_freq, + &sample_period, &attr.sample_type, + &attr.read_format, &disabled, &inherit, + &pinned, &exclusive, &exclude_user, + &exclude_kernel, &exclude_hv, &exclude_idle, + &mmap, &comm, &freq, &inherit_stat, + &enable_on_exec, &task, &watermark, + &precise_ip, &mmap_data, &sample_id_all, + &attr.wakeup_events, &attr.bp_type, + &attr.bp_addr, &attr.bp_len, &idx)) + return -1; + + /* union... */ + if (sample_period != 0) { + if (attr.sample_freq != 0) + return -1; /* FIXME: throw right exception */ + attr.sample_period = sample_period; + } + + /* Bitfields */ + attr.disabled = disabled; + attr.inherit = inherit; + attr.pinned = pinned; + attr.exclusive = exclusive; + attr.exclude_user = exclude_user; + attr.exclude_kernel = exclude_kernel; + attr.exclude_hv = exclude_hv; + attr.exclude_idle = exclude_idle; + attr.mmap = mmap; + attr.comm = comm; + attr.freq = freq; + attr.inherit_stat = inherit_stat; + attr.enable_on_exec = enable_on_exec; + attr.task = task; + attr.watermark = watermark; + attr.precise_ip = precise_ip; + attr.mmap_data = mmap_data; + attr.sample_id_all = sample_id_all; + + perf_evsel__init(&pevsel->evsel, &attr, idx); + return 0; +} + +static void pyrf_evsel__delete(struct pyrf_evsel *pevsel) +{ + perf_evsel__exit(&pevsel->evsel); + pevsel->ob_type->tp_free((PyObject*)pevsel); +} + +static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, + PyObject *args, PyObject *kwargs) +{ + struct perf_evsel *evsel = &pevsel->evsel; + struct cpu_map *cpus = NULL; + struct thread_map *threads = NULL; + PyObject *pcpus = NULL, *pthreads = NULL; + int group = 0, overwrite = 0; + static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, + &pcpus, &pthreads, &group, &overwrite)) + return NULL; + + if (pthreads != NULL) + threads = ((struct pyrf_thread_map *)pthreads)->threads; + + if (pcpus != NULL) + cpus = ((struct pyrf_cpu_map *)pcpus)->cpus; + + if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + Py_INCREF(Py_None); + return Py_None; +} + +static PyMethodDef pyrf_evsel__methods[] = { + { + .ml_name = "open", + .ml_meth = (PyCFunction)pyrf_evsel__open, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("open the event selector file descriptor table.") + }, + { NULL, } +}; + +static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object."); + +static PyTypeObject pyrf_evsel__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.evsel", + .tp_basicsize = sizeof(struct pyrf_evsel), + .tp_dealloc = (destructor)pyrf_evsel__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_evsel__doc, + .tp_methods = pyrf_evsel__methods, + .tp_init = (initproc)pyrf_evsel__init, +}; + +static int pyrf_evsel__setup_types(void) +{ + pyrf_evsel__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_evsel__type); +} + +struct pyrf_evlist { + PyObject_HEAD + + struct perf_evlist evlist; +}; + +static int pyrf_evlist__init(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + perf_evlist__init(&pevlist->evlist); + return 0; +} + +static void pyrf_evlist__delete(struct pyrf_evlist *pevlist) +{ + perf_evlist__exit(&pevlist->evlist); + pevlist->ob_type->tp_free((PyObject*)pevlist); +} + +static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + struct perf_evlist *evlist = &pevlist->evlist; + PyObject *pcpus = NULL, *pthreads = NULL; + struct cpu_map *cpus = NULL; + struct thread_map *threads = NULL; + static char *kwlist[] = {"cpus", "threads", "pages", "overwrite", + NULL, NULL}; + int pages = 128, overwrite = false; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|ii", kwlist, + &pcpus, &pthreads, &pages, &overwrite)) + return NULL; + + threads = ((struct pyrf_thread_map *)pthreads)->threads; + cpus = ((struct pyrf_cpu_map *)pcpus)->cpus; + + if (perf_evlist__mmap(evlist, cpus, threads, pages, overwrite) < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + struct perf_evlist *evlist = &pevlist->evlist; + static char *kwlist[] = {"timeout", NULL, NULL}; + int timeout = -1, n; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout)) + return NULL; + + n = poll(evlist->pollfd, evlist->nr_fds, timeout); + if (n < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + return Py_BuildValue("i", n); +} + +static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + struct perf_evlist *evlist = &pevlist->evlist; + PyObject *list = PyList_New(0); + int i; + + for (i = 0; i < evlist->nr_fds; ++i) { + PyObject *file; + FILE *fp = fdopen(evlist->pollfd[i].fd, "r"); + + if (fp == NULL) + goto free_list; + + file = PyFile_FromFile(fp, "perf", "r", NULL); + if (file == NULL) + goto free_list; + + if (PyList_Append(list, file) != 0) { + Py_DECREF(file); + goto free_list; + } + + Py_DECREF(file); + } + + return list; +free_list: + return PyErr_NoMemory(); +} + + +static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + struct perf_evlist *evlist = &pevlist->evlist; + PyObject *pevsel; + struct perf_evsel *evsel; + + if (!PyArg_ParseTuple(args, "O", &pevsel)) + return NULL; + + Py_INCREF(pevsel); + evsel = &((struct pyrf_evsel *)pevsel)->evsel; + evsel->idx = evlist->nr_entries; + perf_evlist__add(evlist, evsel); + + return Py_BuildValue("i", evlist->nr_entries); +} + +static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + struct perf_evlist *evlist = &pevlist->evlist; + union perf_event *event; + int sample_id_all = 1, cpu; + static char *kwlist[] = {"sample_id_all", NULL, NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, + &cpu, &sample_id_all)) + return NULL; + + event = perf_evlist__read_on_cpu(evlist, cpu); + if (event != NULL) { + struct perf_evsel *first; + PyObject *pyevent = pyrf_event__new(event); + struct pyrf_event *pevent = (struct pyrf_event *)pyevent; + + if (pyevent == NULL) + return PyErr_NoMemory(); + + first = list_entry(evlist->entries.next, struct perf_evsel, node); + perf_event__parse_sample(event, first->attr.sample_type, sample_id_all, + &pevent->sample); + return pyevent; + } + + Py_INCREF(Py_None); + return Py_None; +} + +static PyMethodDef pyrf_evlist__methods[] = { + { + .ml_name = "mmap", + .ml_meth = (PyCFunction)pyrf_evlist__mmap, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("mmap the file descriptor table.") + }, + { + .ml_name = "poll", + .ml_meth = (PyCFunction)pyrf_evlist__poll, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("poll the file descriptor table.") + }, + { + .ml_name = "get_pollfd", + .ml_meth = (PyCFunction)pyrf_evlist__get_pollfd, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("get the poll file descriptor table.") + }, + { + .ml_name = "add", + .ml_meth = (PyCFunction)pyrf_evlist__add, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("adds an event selector to the list.") + }, + { + .ml_name = "read_on_cpu", + .ml_meth = (PyCFunction)pyrf_evlist__read_on_cpu, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("reads an event.") + }, + { NULL, } +}; + +static Py_ssize_t pyrf_evlist__length(PyObject *obj) +{ + struct pyrf_evlist *pevlist = (void *)obj; + + return pevlist->evlist.nr_entries; +} + +static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i) +{ + struct pyrf_evlist *pevlist = (void *)obj; + struct perf_evsel *pos; + + if (i >= pevlist->evlist.nr_entries) + return NULL; + + list_for_each_entry(pos, &pevlist->evlist.entries, node) + if (i-- == 0) + break; + + return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel)); +} + +static PySequenceMethods pyrf_evlist__sequence_methods = { + .sq_length = pyrf_evlist__length, + .sq_item = pyrf_evlist__item, +}; + +static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object."); + +static PyTypeObject pyrf_evlist__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.evlist", + .tp_basicsize = sizeof(struct pyrf_evlist), + .tp_dealloc = (destructor)pyrf_evlist__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_as_sequence = &pyrf_evlist__sequence_methods, + .tp_doc = pyrf_evlist__doc, + .tp_methods = pyrf_evlist__methods, + .tp_init = (initproc)pyrf_evlist__init, +}; + +static int pyrf_evlist__setup_types(void) +{ + pyrf_evlist__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_evlist__type); +} + +static struct { + const char *name; + int value; +} perf__constants[] = { + { "TYPE_HARDWARE", PERF_TYPE_HARDWARE }, + { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE }, + { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT }, + { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE }, + { "TYPE_RAW", PERF_TYPE_RAW }, + { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT }, + + { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES }, + { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS }, + { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES }, + { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES }, + { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES }, + { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES }, + { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D }, + { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I }, + { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL }, + { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB }, + { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB }, + { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU }, + { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ }, + { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE }, + { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH }, + { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, + { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, + + { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, + { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, + { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS }, + { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES }, + { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS }, + { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN }, + { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ }, + { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS }, + { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS }, + + { "SAMPLE_IP", PERF_SAMPLE_IP }, + { "SAMPLE_TID", PERF_SAMPLE_TID }, + { "SAMPLE_TIME", PERF_SAMPLE_TIME }, + { "SAMPLE_ADDR", PERF_SAMPLE_ADDR }, + { "SAMPLE_READ", PERF_SAMPLE_READ }, + { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN }, + { "SAMPLE_ID", PERF_SAMPLE_ID }, + { "SAMPLE_CPU", PERF_SAMPLE_CPU }, + { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD }, + { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID }, + { "SAMPLE_RAW", PERF_SAMPLE_RAW }, + + { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED }, + { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING }, + { "FORMAT_ID", PERF_FORMAT_ID }, + { "FORMAT_GROUP", PERF_FORMAT_GROUP }, + + { "RECORD_MMAP", PERF_RECORD_MMAP }, + { "RECORD_LOST", PERF_RECORD_LOST }, + { "RECORD_COMM", PERF_RECORD_COMM }, + { "RECORD_EXIT", PERF_RECORD_EXIT }, + { "RECORD_THROTTLE", PERF_RECORD_THROTTLE }, + { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE }, + { "RECORD_FORK", PERF_RECORD_FORK }, + { "RECORD_READ", PERF_RECORD_READ }, + { "RECORD_SAMPLE", PERF_RECORD_SAMPLE }, + { NULL, }, +}; + +static PyMethodDef perf__methods[] = { + { NULL, NULL } +}; + +PyMODINIT_FUNC initperf(void) +{ + PyObject *obj; + int i; + PyObject *dict, *module = Py_InitModule("perf", perf__methods); + + if (module == NULL || + pyrf_event__setup_types() < 0 || + pyrf_evlist__setup_types() < 0 || + pyrf_evsel__setup_types() < 0 || + pyrf_thread_map__setup_types() < 0 || + pyrf_cpu_map__setup_types() < 0) + return; + + Py_INCREF(&pyrf_evlist__type); + PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type); + + Py_INCREF(&pyrf_evsel__type); + PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type); + + Py_INCREF(&pyrf_thread_map__type); + PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type); + + Py_INCREF(&pyrf_cpu_map__type); + PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type); + + dict = PyModule_GetDict(module); + if (dict == NULL) + goto error; + + for (i = 0; perf__constants[i].name != NULL; i++) { + obj = PyInt_FromLong(perf__constants[i].value); + if (obj == NULL) + goto error; + PyDict_SetItemString(dict, perf__constants[i].name, obj); + Py_DECREF(obj); + } + +error: + if (PyErr_Occurred()) + PyErr_SetString(PyExc_ImportError, "perf: Init failed!"); +} diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py new file mode 100644 index 0000000..496d7f4 --- /dev/null +++ b/tools/perf/util/setup.py @@ -0,0 +1,18 @@ +#!/usr/bin/python2 + +from distutils.core import setup, Extension + +perf = Extension('perf', + sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', + 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', + 'util/util.c', 'util/xyarray.c'], + include_dirs = ['util/include']) + +setup(name='perf', + version='0.1', + description='Interface with the Linux profiling infrastructure', + author='Arnaldo Carvalho de Melo', + author_email='acme@redhat.com', + license='GPLv2', + url='http://perf.wiki.kernel.org', + ext_modules=[perf]) -- cgit v0.10.2 From f8a9530939ed87b9a1b1a038b90e355098b679a2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 30 Jan 2011 10:46:46 -0200 Subject: perf evlist: Move evlist methods to evlist.c They were on evsel.c because they came from refactoring existing evsel methods, so, to make reviewing the changes easier, I kept it there, now its a plain move. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 917fc18..dcd5932 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1,11 +1,26 @@ +/* + * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Parts came from builtin-{top,stat,record}.c, see those files for further + * copyright notes. + * + * Released under the GPL v2. (and only v2, not any later version) + */ #include +#include "cpumap.h" +#include "thread_map.h" #include "evlist.h" #include "evsel.h" #include "util.h" +#include + #include #include +#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) +#define SID(e, x, y) xyarray__entry(e->id, x, y) + void perf_evlist__init(struct perf_evlist *evlist) { int i; @@ -88,6 +103,30 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) evlist->nr_fds++; } +static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, + int cpu, int thread, int fd) +{ + struct perf_sample_id *sid; + u64 read_data[4] = { 0, }; + int hash, id_idx = 1; /* The first entry is the counter value */ + + if (!(evsel->attr.read_format & PERF_FORMAT_ID) || + read(fd, &read_data, sizeof(read_data)) == -1) + return -1; + + if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + ++id_idx; + if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + ++id_idx; + + sid = SID(evsel, cpu, thread); + sid->id = read_data[id_idx]; + sid->evsel = evsel; + hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); + hlist_add_head(&sid->node, &evlist->heads[hash]); + return 0; +} + struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) { struct hlist_head *head; @@ -173,3 +212,106 @@ union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) return event; } + +void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus) +{ + int cpu; + + for (cpu = 0; cpu < ncpus; cpu++) { + if (evlist->mmap[cpu].base != NULL) { + munmap(evlist->mmap[cpu].base, evlist->mmap_len); + evlist->mmap[cpu].base = NULL; + } + } +} + +int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus) +{ + evlist->mmap = zalloc(ncpus * sizeof(struct perf_mmap)); + return evlist->mmap != NULL ? 0 : -ENOMEM; +} + +static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot, + int mask, int fd) +{ + evlist->mmap[cpu].prev = 0; + evlist->mmap[cpu].mask = mask; + evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot, + MAP_SHARED, fd, 0); + if (evlist->mmap[cpu].base == MAP_FAILED) + return -1; + + perf_evlist__add_pollfd(evlist, fd); + return 0; +} + +/** perf_evlist__mmap - Create per cpu maps to receive events + * + * @evlist - list of events + * @cpus - cpu map being monitored + * @threads - threads map being monitored + * @pages - map length in pages + * @overwrite - overwrite older events? + * + * If overwrite is false the user needs to signal event consuption using: + * + * struct perf_mmap *m = &evlist->mmap[cpu]; + * unsigned int head = perf_mmap__read_head(m); + * + * perf_mmap__write_tail(m, head) + */ +int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads, int pages, bool overwrite) +{ + unsigned int page_size = sysconf(_SC_PAGE_SIZE); + int mask = pages * page_size - 1, cpu; + struct perf_evsel *first_evsel, *evsel; + int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); + + if (evlist->mmap == NULL && + perf_evlist__alloc_mmap(evlist, cpus->nr) < 0) + return -ENOMEM; + + if (evlist->pollfd == NULL && + perf_evlist__alloc_pollfd(evlist, cpus->nr, threads->nr) < 0) + return -ENOMEM; + + evlist->overwrite = overwrite; + evlist->mmap_len = (pages + 1) * page_size; + first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node); + + list_for_each_entry(evsel, &evlist->entries, node) { + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + evsel->id == NULL && + perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) + return -ENOMEM; + + for (cpu = 0; cpu < cpus->nr; cpu++) { + for (thread = 0; thread < threads->nr; thread++) { + int fd = FD(evsel, cpu, thread); + + if (evsel->idx || thread) { + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, + FD(first_evsel, cpu, 0)) != 0) + goto out_unmap; + } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0) + goto out_unmap; + + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + perf_evlist__id_hash(evlist, evsel, cpu, thread, fd) < 0) + goto out_unmap; + } + } + } + + return 0; + +out_unmap: + for (cpu = 0; cpu < cpus->nr; cpu++) { + if (evlist->mmap[cpu].base != NULL) { + munmap(evlist->mmap[cpu].base, evlist->mmap_len); + evlist->mmap[cpu].base = NULL; + } + } + return -1; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 022ae40..85aca6e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -6,6 +6,8 @@ #include "event.h" struct pollfd; +struct thread_map; +struct cpu_map; #define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) @@ -39,4 +41,9 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu); +int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus); +int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads, int pages, bool overwrite); +void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus); + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index fddeb08..2720bc1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1,18 +1,19 @@ +/* + * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Parts came from builtin-{top,stat,record}.c, see those files for further + * copyright notes. + * + * Released under the GPL v2. (and only v2, not any later version) + */ + #include "evsel.h" #include "evlist.h" -#include "../perf.h" #include "util.h" #include "cpumap.h" #include "thread_map.h" -#include -#include - -#include -#include - #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -#define SID(e, x, y) xyarray__entry(e->id, x, y) void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) @@ -74,24 +75,6 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) } } -void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus) -{ - int cpu; - - for (cpu = 0; cpu < ncpus; cpu++) { - if (evlist->mmap[cpu].base != NULL) { - munmap(evlist->mmap[cpu].base, evlist->mmap_len); - evlist->mmap[cpu].base = NULL; - } - } -} - -int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus) -{ - evlist->mmap = zalloc(ncpus * sizeof(struct perf_mmap)); - return evlist->mmap != NULL ? 0 : -ENOMEM; -} - void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); @@ -258,115 +241,6 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit); } -static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot, - int mask, int fd) -{ - evlist->mmap[cpu].prev = 0; - evlist->mmap[cpu].mask = mask; - evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot, - MAP_SHARED, fd, 0); - if (evlist->mmap[cpu].base == MAP_FAILED) - return -1; - - perf_evlist__add_pollfd(evlist, fd); - return 0; -} - -static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd) -{ - struct perf_sample_id *sid; - u64 read_data[4] = { 0, }; - int hash, id_idx = 1; /* The first entry is the counter value */ - - if (!(evsel->attr.read_format & PERF_FORMAT_ID) || - read(fd, &read_data, sizeof(read_data)) == -1) - return -1; - - if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - ++id_idx; - if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - ++id_idx; - - sid = SID(evsel, cpu, thread); - sid->id = read_data[id_idx]; - sid->evsel = evsel; - hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); - hlist_add_head(&sid->node, &evlist->heads[hash]); - return 0; -} - -/** perf_evlist__mmap - Create per cpu maps to receive events - * - * @evlist - list of events - * @cpus - cpu map being monitored - * @threads - threads map being monitored - * @pages - map length in pages - * @overwrite - overwrite older events? - * - * If overwrite is false the user needs to signal event consuption using: - * - * struct perf_mmap *m = &evlist->mmap[cpu]; - * unsigned int head = perf_mmap__read_head(m); - * - * perf_mmap__write_tail(m, head) - */ -int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, - struct thread_map *threads, int pages, bool overwrite) -{ - unsigned int page_size = sysconf(_SC_PAGE_SIZE); - int mask = pages * page_size - 1, cpu; - struct perf_evsel *first_evsel, *evsel; - int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); - - if (evlist->mmap == NULL && - perf_evlist__alloc_mmap(evlist, cpus->nr) < 0) - return -ENOMEM; - - if (evlist->pollfd == NULL && - perf_evlist__alloc_pollfd(evlist, cpus->nr, threads->nr) < 0) - return -ENOMEM; - - evlist->overwrite = overwrite; - evlist->mmap_len = (pages + 1) * page_size; - first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node); - - list_for_each_entry(evsel, &evlist->entries, node) { - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - evsel->id == NULL && - perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) - return -ENOMEM; - - for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { - int fd = FD(evsel, cpu, thread); - - if (evsel->idx || thread) { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, - FD(first_evsel, cpu, 0)) != 0) - goto out_unmap; - } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0) - goto out_unmap; - - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_hash(evlist, evsel, cpu, thread, fd) < 0) - goto out_unmap; - } - } - } - - return 0; - -out_unmap: - for (cpu = 0; cpu < cpus->nr; cpu++) { - if (evlist->mmap[cpu].base != NULL) { - munmap(evlist->mmap[cpu].base, evlist->mmap_len); - evlist->mmap[cpu].base = NULL; - } - } - return -1; -} - static int perf_event__parse_id_sample(const union perf_event *event, u64 type, struct perf_sample *sample) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 7962e75..eecdc3a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -60,7 +60,6 @@ void perf_evsel__delete(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); -int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus); void perf_evsel__free_fd(struct perf_evsel *evsel); void perf_evsel__free_id(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); @@ -71,9 +70,6 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads, bool group, bool inherit); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads, bool group, bool inherit); -int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, - struct thread_map *threads, int pages, bool overwrite); -void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ -- cgit v0.10.2 From 7e2ed097538c57ff5268e9a6bced7c0b885809c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 30 Jan 2011 11:59:43 -0200 Subject: perf evlist: Store pointer to the cpu and thread maps So that we don't have to pass it around to the several methods that needs it, simplifying usage. There is one case where we don't have the thread/cpu map in advance, which is in the parsing routines used by top, stat, record, that we have to wait till all options are parsed to know if a cpu or thread list was passed to then create those maps. For that case consolidate the cpu and thread map creation via perf_evlist__create_maps() out of the code in top and record, while also providing a perf_evlist__set_maps() for cases where multiple evlists share maps or for when maps that represent CPU sockets, for instance, get crafted out of topology information or subsets of threads in a particular application are to be monitored, providing more granularity in specifying which cpus and threads to monitor. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index edc3555..07f8d6d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -42,7 +42,6 @@ static u64 user_interval = ULLONG_MAX; static u64 default_interval = 0; static u64 sample_type; -static struct cpu_map *cpus; static unsigned int page_size; static unsigned int mmap_pages = 128; static unsigned int user_freq = UINT_MAX; @@ -58,7 +57,6 @@ static bool sample_id_all_avail = true; static bool system_wide = false; static pid_t target_pid = -1; static pid_t target_tid = -1; -static struct thread_map *threads; static pid_t child_pid = -1; static bool no_inherit = false; static enum write_mode_t write_mode = WRITE_FORCE; @@ -189,7 +187,7 @@ static void create_counter(struct perf_evsel *evsel, int cpu) int thread_index; int ret; - for (thread_index = 0; thread_index < threads->nr; thread_index++) { + for (thread_index = 0; thread_index < evsel_list->threads->nr; thread_index++) { h_attr = get_header_attr(attr, evsel->idx); if (h_attr == NULL) die("nomem\n"); @@ -317,7 +315,8 @@ static void open_counters(struct perf_evlist *evlist) retry_sample_id: attr->sample_id_all = sample_id_all_avail ? 1 : 0; try_again: - if (perf_evsel__open(pos, cpus, threads, group, !no_inherit) < 0) { + if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group, + !no_inherit) < 0) { int err = errno; if (err == EPERM || err == EACCES) @@ -368,10 +367,10 @@ try_again: } } - if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, false) < 0) + if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) die("failed to mmap with %d (%s)\n", errno, strerror(errno)); - for (cpu = 0; cpu < cpus->nr; ++cpu) { + for (cpu = 0; cpu < evsel_list->cpus->nr; ++cpu) { list_for_each_entry(pos, &evlist->entries, node) create_counter(pos, cpu); } @@ -450,7 +449,7 @@ static void mmap_read_all(void) { int i; - for (i = 0; i < cpus->nr; i++) { + for (i = 0; i < evsel_list->cpus->nr; i++) { if (evsel_list->mmap[i].base) mmap_read(&evsel_list->mmap[i]); } @@ -584,7 +583,7 @@ static int __cmd_record(int argc, const char **argv) } if (!system_wide && target_tid == -1 && target_pid == -1) - threads->map[0] = child_pid; + evsel_list->threads->map[0] = child_pid; close(child_ready_pipe[1]); close(go_pipe[0]); @@ -718,12 +717,12 @@ static int __cmd_record(int argc, const char **argv) } if (done) { - for (i = 0; i < cpus->nr; i++) { + for (i = 0; i < evsel_list->cpus->nr; i++) { struct perf_evsel *pos; list_for_each_entry(pos, &evsel_list->entries, node) { for (thread = 0; - thread < threads->nr; + thread < evsel_list->threads->nr; thread++) ioctl(FD(pos, i, thread), PERF_EVENT_IOC_DISABLE); @@ -816,7 +815,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) int err = -ENOMEM; struct perf_evsel *pos; - evsel_list = perf_evlist__new(); + evsel_list = perf_evlist__new(NULL, NULL); if (evsel_list == NULL) return -ENOMEM; @@ -850,28 +849,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) if (target_pid != -1) target_tid = target_pid; - threads = thread_map__new(target_pid, target_tid); - if (threads == NULL) { - pr_err("Problems finding threads of monitor\n"); - usage_with_options(record_usage, record_options); - } - - if (target_tid != -1) - cpus = cpu_map__dummy_new(); - else - cpus = cpu_map__new(cpu_list); - - if (cpus == NULL) + if (perf_evlist__create_maps(evsel_list, target_pid, + target_tid, cpu_list) < 0) usage_with_options(record_usage, record_options); list_for_each_entry(pos, &evsel_list->entries, node) { - if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) + if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, + evsel_list->threads->nr) < 0) goto out_free_fd; if (perf_header__push_event(pos->attr.config, event_name(pos))) goto out_free_fd; } - if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0) + if (perf_evlist__alloc_pollfd(evsel_list) < 0) goto out_free_fd; if (user_interval != ULLONG_MAX) @@ -893,10 +883,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) } err = __cmd_record(argc, argv); - out_free_fd: - thread_map__delete(threads); - threads = NULL; + perf_evlist__delete_maps(evsel_list); out_symbol_exit: symbol__exit(); return err; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8906adf..e0f9575 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -76,7 +76,6 @@ static struct perf_event_attr default_attrs[] = { struct perf_evlist *evsel_list; static bool system_wide = false; -static struct cpu_map *cpus; static int run_idx = 0; static int run_count = 1; @@ -85,7 +84,6 @@ static bool scale = true; static bool no_aggr = false; static pid_t target_pid = -1; static pid_t target_tid = -1; -static struct thread_map *threads; static pid_t child_pid = -1; static bool null_run = false; static bool big_num = true; @@ -170,7 +168,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) - return perf_evsel__open_per_cpu(evsel, cpus, false, false); + return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false, false); attr->inherit = !no_inherit; if (target_pid == -1 && target_tid == -1) { @@ -178,7 +176,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->enable_on_exec = 1; } - return perf_evsel__open_per_thread(evsel, threads, false, false); + return perf_evsel__open_per_thread(evsel, evsel_list->threads, false, false); } /* @@ -203,7 +201,8 @@ static int read_counter_aggr(struct perf_evsel *counter) u64 *count = counter->counts->aggr.values; int i; - if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0) + if (__perf_evsel__read(counter, evsel_list->cpus->nr, + evsel_list->threads->nr, scale) < 0) return -1; for (i = 0; i < 3; i++) @@ -236,7 +235,7 @@ static int read_counter(struct perf_evsel *counter) u64 *count; int cpu; - for (cpu = 0; cpu < cpus->nr; cpu++) { + for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) { if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) return -1; @@ -301,7 +300,7 @@ static int run_perf_stat(int argc __used, const char **argv) } if (target_tid == -1 && target_pid == -1 && !system_wide) - threads->map[0] = child_pid; + evsel_list->threads->map[0] = child_pid; /* * Wait for the child to be ready to exec. @@ -353,12 +352,13 @@ static int run_perf_stat(int argc __used, const char **argv) if (no_aggr) { list_for_each_entry(counter, &evsel_list->entries, node) { read_counter(counter); - perf_evsel__close_fd(counter, cpus->nr, 1); + perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1); } } else { list_for_each_entry(counter, &evsel_list->entries, node) { read_counter_aggr(counter); - perf_evsel__close_fd(counter, cpus->nr, threads->nr); + perf_evsel__close_fd(counter, evsel_list->cpus->nr, + evsel_list->threads->nr); } } @@ -386,7 +386,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) if (no_aggr) sprintf(cpustr, "CPU%*d%s", csv_output ? 0 : -4, - cpus->map[cpu], csv_sep); + evsel_list->cpus->map[cpu], csv_sep); fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); @@ -414,7 +414,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (no_aggr) sprintf(cpustr, "CPU%*d%s", csv_output ? 0 : -4, - cpus->map[cpu], csv_sep); + evsel_list->cpus->map[cpu], csv_sep); else cpu = 0; @@ -500,14 +500,14 @@ static void print_counter(struct perf_evsel *counter) u64 ena, run, val; int cpu; - for (cpu = 0; cpu < cpus->nr; cpu++) { + for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) { val = counter->counts->cpu[cpu].val; ena = counter->counts->cpu[cpu].ena; run = counter->counts->cpu[cpu].run; if (run == 0 || ena == 0) { fprintf(stderr, "CPU%*d%s%*s%s%-24s", csv_output ? 0 : -4, - cpus->map[cpu], csv_sep, + evsel_list->cpus->map[cpu], csv_sep, csv_output ? 0 : 18, "", csv_sep, event_name(counter)); @@ -652,7 +652,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) setlocale(LC_ALL, ""); - evsel_list = perf_evlist__new(); + evsel_list = perf_evlist__new(NULL, NULL); if (evsel_list == NULL) return -ENOMEM; @@ -701,18 +701,18 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) if (target_pid != -1) target_tid = target_pid; - threads = thread_map__new(target_pid, target_tid); - if (threads == NULL) { + evsel_list->threads = thread_map__new(target_pid, target_tid); + if (evsel_list->threads == NULL) { pr_err("Problems finding threads of monitor\n"); usage_with_options(stat_usage, options); } if (system_wide) - cpus = cpu_map__new(cpu_list); + evsel_list->cpus = cpu_map__new(cpu_list); else - cpus = cpu_map__dummy_new(); + evsel_list->cpus = cpu_map__dummy_new(); - if (cpus == NULL) { + if (evsel_list->cpus == NULL) { perror("failed to parse CPUs map"); usage_with_options(stat_usage, options); return -1; @@ -720,8 +720,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_stat_priv(pos) < 0 || - perf_evsel__alloc_counts(pos, cpus->nr) < 0 || - perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) + perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 || + perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0) goto out_free_fd; } @@ -750,7 +750,6 @@ out_free_fd: perf_evsel__free_stat_priv(pos); perf_evlist__delete(evsel_list); out: - thread_map__delete(threads); - threads = NULL; + perf_evlist__delete_maps(evsel_list); return status; } diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 845b9bd..1b2106c 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -509,7 +509,7 @@ static int test__basic_mmap(void) goto out_free_cpus; } - evlist = perf_evlist__new(); + evlist = perf_evlist__new(cpus, threads); if (evlist == NULL) { pr_debug("perf_evlist__new\n"); goto out_free_cpus; @@ -537,7 +537,7 @@ static int test__basic_mmap(void) } } - if (perf_evlist__mmap(evlist, cpus, threads, 128, true) < 0) { + if (perf_evlist__mmap(evlist, 128, true) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, strerror(errno)); goto out_close_fd; @@ -579,7 +579,7 @@ static int test__basic_mmap(void) err = 0; out_munmap: - perf_evlist__munmap(evlist, 1); + perf_evlist__munmap(evlist); out_close_fd: for (i = 0; i < nsyscalls; ++i) perf_evsel__close_fd(evsels[i], 1, threads->nr); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2f4d1f2..599036b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -73,9 +73,7 @@ static int print_entries; static int target_pid = -1; static int target_tid = -1; -static struct thread_map *threads; static bool inherit = false; -static struct cpu_map *cpus; static int realtime_prio = 0; static bool group = false; static unsigned int page_size; @@ -567,12 +565,13 @@ static void print_sym_table(struct perf_session *session) printf(" (all"); if (cpu_list) - printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list); + printf(", CPU%s: %s)\n", evsel_list->cpus->nr > 1 ? "s" : "", cpu_list); else { if (target_tid != -1) printf(")\n"); else - printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : ""); + printf(", %d CPU%s)\n", evsel_list->cpus->nr, + evsel_list->cpus->nr > 1 ? "s" : ""); } printf("%-*.*s\n", win_width, win_width, graph_dotted_line); @@ -1124,7 +1123,7 @@ static void perf_session__mmap_read(struct perf_session *self) { int i; - for (i = 0; i < cpus->nr; i++) + for (i = 0; i < evsel_list->cpus->nr; i++) perf_session__mmap_read_cpu(self, i); } @@ -1150,7 +1149,8 @@ static void start_counters(struct perf_evlist *evlist) attr->mmap = 1; try_again: - if (perf_evsel__open(counter, cpus, threads, group, inherit) < 0) { + if (perf_evsel__open(counter, evsel_list->cpus, + evsel_list->threads, group, inherit) < 0) { int err = errno; if (err == EPERM || err == EACCES) @@ -1181,7 +1181,7 @@ try_again: } } - if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, false) < 0) + if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) die("failed to mmap with %d (%s)\n", errno, strerror(errno)); } @@ -1296,7 +1296,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) struct perf_evsel *pos; int status = -ENOMEM; - evsel_list = perf_evlist__new(); + evsel_list = perf_evlist__new(NULL, NULL); if (evsel_list == NULL) return -ENOMEM; @@ -1306,15 +1306,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(top_usage, options); - if (target_pid != -1) - target_tid = target_pid; - - threads = thread_map__new(target_pid, target_tid); - if (threads == NULL) { - pr_err("Problems finding threads of monitor\n"); - usage_with_options(top_usage, options); - } - /* CPU and PID are mutually exclusive */ if (target_tid > 0 && cpu_list) { printf("WARNING: PID switch overriding CPU\n"); @@ -1322,6 +1313,13 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) cpu_list = NULL; } + if (target_pid != -1) + target_tid = target_pid; + + if (perf_evlist__create_maps(evsel_list, target_pid, + target_tid, cpu_list) < 0) + usage_with_options(top_usage, options); + if (!evsel_list->nr_entries && perf_evlist__add_default(evsel_list) < 0) { pr_err("Not enough memory for event selector list\n"); @@ -1343,16 +1341,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) exit(EXIT_FAILURE); } - if (target_tid != -1) - cpus = cpu_map__dummy_new(); - else - cpus = cpu_map__new(cpu_list); - - if (cpus == NULL) - usage_with_options(top_usage, options); - list_for_each_entry(pos, &evsel_list->entries, node) { - if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) + if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, + evsel_list->threads->nr) < 0) goto out_free_fd; /* * Fill in the ones not specifically initialized via -c: @@ -1363,8 +1354,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = default_interval; } - if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0 || - perf_evlist__alloc_mmap(evsel_list, cpus->nr) < 0) + if (perf_evlist__alloc_pollfd(evsel_list) < 0 || + perf_evlist__alloc_mmap(evsel_list) < 0) goto out_free_fd; sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node); diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index 5e9f3b7..df638c4 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py @@ -23,9 +23,9 @@ def main(): sample_id_all = 1, sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) evsel.open(cpus = cpus, threads = threads); - evlist = perf.evlist() + evlist = perf.evlist(cpus, threads) evlist.add(evsel) - evlist.mmap(cpus = cpus, threads = threads) + evlist.mmap() while True: evlist.poll(timeout = -1) for cpu in cpus: diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index dcd5932..95b21fe 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -21,21 +21,24 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) #define SID(e, x, y) xyarray__entry(e->id, x, y) -void perf_evlist__init(struct perf_evlist *evlist) +void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads) { int i; for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); + perf_evlist__set_maps(evlist, cpus, threads); } -struct perf_evlist *perf_evlist__new(void) +struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, + struct thread_map *threads) { struct perf_evlist *evlist = zalloc(sizeof(*evlist)); if (evlist != NULL) - perf_evlist__init(evlist); + perf_evlist__init(evlist, cpus, threads); return evlist; } @@ -88,9 +91,9 @@ int perf_evlist__add_default(struct perf_evlist *evlist) return 0; } -int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads) +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nfds = ncpus * nthreads * evlist->nr_entries; + int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries; evlist->pollfd = malloc(sizeof(struct pollfd) * nfds); return evlist->pollfd != NULL ? 0 : -ENOMEM; } @@ -213,11 +216,11 @@ union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) return event; } -void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus) +void perf_evlist__munmap(struct perf_evlist *evlist) { int cpu; - for (cpu = 0; cpu < ncpus; cpu++) { + for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { if (evlist->mmap[cpu].base != NULL) { munmap(evlist->mmap[cpu].base, evlist->mmap_len); evlist->mmap[cpu].base = NULL; @@ -225,9 +228,9 @@ void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus) } } -int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus) +int perf_evlist__alloc_mmap(struct perf_evlist *evlist) { - evlist->mmap = zalloc(ncpus * sizeof(struct perf_mmap)); + evlist->mmap = zalloc(evlist->cpus->nr * sizeof(struct perf_mmap)); return evlist->mmap != NULL ? 0 : -ENOMEM; } @@ -248,8 +251,6 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot, /** perf_evlist__mmap - Create per cpu maps to receive events * * @evlist - list of events - * @cpus - cpu map being monitored - * @threads - threads map being monitored * @pages - map length in pages * @overwrite - overwrite older events? * @@ -259,21 +260,22 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot, * unsigned int head = perf_mmap__read_head(m); * * perf_mmap__write_tail(m, head) + * + * Using perf_evlist__read_on_cpu does this automatically. */ -int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, - struct thread_map *threads, int pages, bool overwrite) +int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) { unsigned int page_size = sysconf(_SC_PAGE_SIZE); int mask = pages * page_size - 1, cpu; struct perf_evsel *first_evsel, *evsel; + const struct cpu_map *cpus = evlist->cpus; + const struct thread_map *threads = evlist->threads; int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); - if (evlist->mmap == NULL && - perf_evlist__alloc_mmap(evlist, cpus->nr) < 0) + if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) return -ENOMEM; - if (evlist->pollfd == NULL && - perf_evlist__alloc_pollfd(evlist, cpus->nr, threads->nr) < 0) + if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; evlist->overwrite = overwrite; @@ -315,3 +317,34 @@ out_unmap: } return -1; } + +int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, + pid_t target_tid, const char *cpu_list) +{ + evlist->threads = thread_map__new(target_pid, target_tid); + + if (evlist->threads == NULL) + return -1; + + if (target_tid != -1) + evlist->cpus = cpu_map__dummy_new(); + else + evlist->cpus = cpu_map__new(cpu_list); + + if (evlist->cpus == NULL) + goto out_delete_threads; + + return 0; + +out_delete_threads: + thread_map__delete(evlist->threads); + return -1; +} + +void perf_evlist__delete_maps(struct perf_evlist *evlist) +{ + cpu_map__delete(evlist->cpus); + thread_map__delete(evlist->threads); + evlist->cpus = NULL; + evlist->threads = NULL; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 85aca6e..c988405 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -22,28 +22,43 @@ struct perf_evlist { union perf_event event_copy; struct perf_mmap *mmap; struct pollfd *pollfd; + struct thread_map *threads; + struct cpu_map *cpus; }; struct perf_evsel; -struct perf_evlist *perf_evlist__new(void); -void perf_evlist__init(struct perf_evlist *evlist); +struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, + struct thread_map *threads); +void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads); void perf_evlist__exit(struct perf_evlist *evlist); void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); -int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads); +int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu); -int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus); -int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus, - struct thread_map *threads, int pages, bool overwrite); -void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus); +int perf_evlist__alloc_mmap(struct perf_evlist *evlist); +int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite); +void perf_evlist__munmap(struct perf_evlist *evlist); + +static inline void perf_evlist__set_maps(struct perf_evlist *evlist, + struct cpu_map *cpus, + struct thread_map *threads) +{ + evlist->cpus = cpus; + evlist->threads = threads; +} + +int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, + pid_t target_tid, const char *cpu_list); +void perf_evlist__delete_maps(struct perf_evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 88d4789..d2d5217 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -553,7 +553,16 @@ struct pyrf_evlist { static int pyrf_evlist__init(struct pyrf_evlist *pevlist, PyObject *args, PyObject *kwargs) { - perf_evlist__init(&pevlist->evlist); + PyObject *pcpus = NULL, *pthreads = NULL; + struct cpu_map *cpus; + struct thread_map *threads; + + if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads)) + return -1; + + threads = ((struct pyrf_thread_map *)pthreads)->threads; + cpus = ((struct pyrf_cpu_map *)pcpus)->cpus; + perf_evlist__init(&pevlist->evlist, cpus, threads); return 0; } @@ -567,21 +576,15 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, PyObject *args, PyObject *kwargs) { struct perf_evlist *evlist = &pevlist->evlist; - PyObject *pcpus = NULL, *pthreads = NULL; - struct cpu_map *cpus = NULL; - struct thread_map *threads = NULL; - static char *kwlist[] = {"cpus", "threads", "pages", "overwrite", + static char *kwlist[] = {"pages", "overwrite", NULL, NULL}; int pages = 128, overwrite = false; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|ii", kwlist, - &pcpus, &pthreads, &pages, &overwrite)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist, + &pages, &overwrite)) return NULL; - threads = ((struct pyrf_thread_map *)pthreads)->threads; - cpus = ((struct pyrf_cpu_map *)pcpus)->cpus; - - if (perf_evlist__mmap(evlist, cpus, threads, pages, overwrite) < 0) { + if (perf_evlist__mmap(evlist, pages, overwrite) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } -- cgit v0.10.2 From 8c3e10eb1968877d6a1957b7e790c6ce01bd56fc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jan 2011 14:50:39 -0200 Subject: perf top: Move display agnostic routines to util/top.[ch] Paving the way for a slang browser a la 'perf report --tui'. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 36ff73c..edc660e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -437,6 +437,7 @@ LIB_H += util/probe-finder.h LIB_H += util/probe-event.h LIB_H += util/pstack.h LIB_H += util/cpumap.h +LIB_H += util/top.h LIB_H += $(ARCH_INCLUDE) LIB_OBJS += $(OUTPUT)util/abspath.o @@ -464,6 +465,7 @@ LIB_OBJS += $(OUTPUT)util/strbuf.o LIB_OBJS += $(OUTPUT)util/string.o LIB_OBJS += $(OUTPUT)util/strlist.o LIB_OBJS += $(OUTPUT)util/strfilter.o +LIB_OBJS += $(OUTPUT)util/top.o LIB_OBJS += $(OUTPUT)util/usage.o LIB_OBJS += $(OUTPUT)util/wrapper.o LIB_OBJS += $(OUTPUT)util/sigchain.o diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 599036b..3c9ba94 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -27,6 +27,7 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/thread_map.h" +#include "util/top.h" #include "util/util.h" #include #include "util/parse-options.h" @@ -47,7 +48,6 @@ #include #include #include -#include #include #include @@ -62,75 +62,35 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -struct perf_evlist *evsel_list; +static struct perf_top top = { + .count_filter = 5, + .delay_secs = 2, + .display_weighted = -1, + .target_pid = -1, + .target_tid = -1, + .active_symbols = LIST_HEAD_INIT(top.active_symbols), + .active_symbols_lock = PTHREAD_MUTEX_INITIALIZER, + .freq = 1000, /* 1 KHz */ +}; static bool system_wide = false; static int default_interval = 0; -static int count_filter = 5; -static int print_entries; - -static int target_pid = -1; -static int target_tid = -1; static bool inherit = false; static int realtime_prio = 0; static bool group = false; static unsigned int page_size; static unsigned int mmap_pages = 128; -static int freq = 1000; /* 1 KHz */ -static int delay_secs = 2; -static bool zero = false; static bool dump_symtab = false; -static bool hide_kernel_symbols = false; -static bool hide_user_symbols = false; static struct winsize winsize; -/* - * Source - */ - -struct source_line { - u64 eip; - unsigned long count[MAX_COUNTERS]; - char *line; - struct source_line *next; -}; - static const char *sym_filter = NULL; struct sym_entry *sym_filter_entry = NULL; struct sym_entry *sym_filter_entry_sched = NULL; static int sym_pcnt_filter = 5; -static int sym_counter = 0; -static struct perf_evsel *sym_evsel = NULL; -static int display_weighted = -1; -static const char *cpu_list; - -/* - * Symbols - */ - -struct sym_entry_source { - struct source_line *source; - struct source_line *lines; - struct source_line **lines_tail; - pthread_mutex_t lock; -}; - -struct sym_entry { - struct rb_node rb_node; - struct list_head node; - unsigned long snap_count; - double weight; - int skip; - u16 name_len; - u8 origin; - struct map *map; - struct sym_entry_source *src; - unsigned long count[0]; -}; /* * Source functions @@ -165,10 +125,10 @@ void get_term_dimensions(struct winsize *ws) static void update_print_entries(struct winsize *ws) { - print_entries = ws->ws_row; + top.print_entries = ws->ws_row; - if (print_entries > 9) - print_entries -= 9; + if (top.print_entries > 9) + top.print_entries -= 9; } static void sig_winch_handler(int sig __used) @@ -269,7 +229,7 @@ static void __zero_source_counters(struct sym_entry *syme) line = syme->src->lines; while (line) { - for (i = 0; i < evsel_list->nr_entries; i++) + for (i = 0; i < top.evlist->nr_entries; i++) line->count[i] = 0; line = line->next; } @@ -331,9 +291,9 @@ static void show_lines(struct source_line *queue, int count, int total) line = queue; for (i = 0; i < count; i++) { - float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; + float pcnt = 100.0*(float)line->count[top.sym_counter]/(float)total; - printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); + printf("%8li %4.1f%%\t%s\n", line->count[top.sym_counter], pcnt, line->line); line = line->next; } } @@ -358,13 +318,13 @@ static void show_details(struct sym_entry *syme) return; symbol = sym_entry__symbol(syme); - printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name); + printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name); printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); pthread_mutex_lock(&syme->src->lock); line = syme->src->source; while (line) { - total += line->count[sym_counter]; + total += line->count[top.sym_counter]; line = line->next; } @@ -376,10 +336,10 @@ static void show_details(struct sym_entry *syme) line_queue = line; line_queue_count++; - if (line->count[sym_counter]) - pcnt = 100.0 * line->count[sym_counter] / (float)total; + if (line->count[top.sym_counter]) + pcnt = 100.0 * line->count[top.sym_counter] / (float)total; if (pcnt >= (float)sym_pcnt_filter) { - if (displayed <= print_entries) + if (displayed <= top.print_entries) show_lines(line_queue, line_queue_count, total); else more++; displayed += line_queue_count; @@ -390,7 +350,7 @@ static void show_details(struct sym_entry *syme) line_queue_count--; } - line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; + line->count[top.sym_counter] = top.zero ? 0 : line->count[top.sym_counter] * 7 / 8; line = line->next; } pthread_mutex_unlock(&syme->src->lock); @@ -398,181 +358,30 @@ static void show_details(struct sym_entry *syme) printf("%d lines not displayed, maybe increase display entries [e]\n", more); } -/* - * Symbols will be added here in perf_event__process_sample and will get out - * after decayed. - */ -static LIST_HEAD(active_symbols); -static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; - -/* - * Ordering weight: count-1 * count-2 * ... / count-n - */ -static double sym_weight(const struct sym_entry *sym) -{ - double weight = sym->snap_count; - int counter; - - if (!display_weighted) - return weight; - - for (counter = 1; counter < evsel_list->nr_entries - 1; counter++) - weight *= sym->count[counter]; - - weight /= (sym->count[counter] + 1); - - return weight; -} - -static long samples; -static long kernel_samples, us_samples; -static long exact_samples; -static long guest_us_samples, guest_kernel_samples; static const char CONSOLE_CLEAR[] = ""; static void __list_insert_active_sym(struct sym_entry *syme) { - list_add(&syme->node, &active_symbols); -} - -static void list_remove_active_sym(struct sym_entry *syme) -{ - pthread_mutex_lock(&active_symbols_lock); - list_del_init(&syme->node); - pthread_mutex_unlock(&active_symbols_lock); -} - -static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) -{ - struct rb_node **p = &tree->rb_node; - struct rb_node *parent = NULL; - struct sym_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct sym_entry, rb_node); - - if (se->weight > iter->weight) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&se->rb_node, parent, p); - rb_insert_color(&se->rb_node, tree); + list_add(&syme->node, &top.active_symbols); } static void print_sym_table(struct perf_session *session) { - int printed = 0, j; - struct perf_evsel *counter; - int snap = !display_weighted ? sym_counter : 0; - float samples_per_sec = samples/delay_secs; - float ksamples_per_sec = kernel_samples/delay_secs; - float us_samples_per_sec = (us_samples)/delay_secs; - float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs; - float guest_us_samples_per_sec = (guest_us_samples)/delay_secs; - float esamples_percent = (100.0*exact_samples)/samples; - float sum_ksamples = 0.0; - struct sym_entry *syme, *n; - struct rb_root tmp = RB_ROOT; + char bf[160]; + int printed = 0; struct rb_node *nd; - int sym_width = 0, dso_width = 0, dso_short_width = 0; + struct sym_entry *syme; + struct rb_root tmp = RB_ROOT; const int win_width = winsize.ws_col - 1; - - samples = us_samples = kernel_samples = exact_samples = 0; - guest_kernel_samples = guest_us_samples = 0; - - /* Sort the active symbols */ - pthread_mutex_lock(&active_symbols_lock); - syme = list_entry(active_symbols.next, struct sym_entry, node); - pthread_mutex_unlock(&active_symbols_lock); - - list_for_each_entry_safe_from(syme, n, &active_symbols, node) { - syme->snap_count = syme->count[snap]; - if (syme->snap_count != 0) { - - if ((hide_user_symbols && - syme->origin == PERF_RECORD_MISC_USER) || - (hide_kernel_symbols && - syme->origin == PERF_RECORD_MISC_KERNEL)) { - list_remove_active_sym(syme); - continue; - } - syme->weight = sym_weight(syme); - rb_insert_active_sym(&tmp, syme); - sum_ksamples += syme->snap_count; - - for (j = 0; j < evsel_list->nr_entries; j++) - syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; - } else - list_remove_active_sym(syme); - } + int sym_width, dso_width, dso_short_width; + float sum_ksamples = perf_top__decay_samples(&top, &tmp); puts(CONSOLE_CLEAR); - if (!perf_guest) { - printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%" - " exact: %4.1f%% [", - samples_per_sec, - 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) / - samples_per_sec)), - esamples_percent); - } else { - printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%" - " guest kernel:%4.1f%% guest us:%4.1f%%" - " exact: %4.1f%% [", - samples_per_sec, - 100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) / - samples_per_sec)), - 100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) / - samples_per_sec)), - 100.0 - (100.0 * ((samples_per_sec - - guest_kernel_samples_per_sec) / - samples_per_sec)), - 100.0 - (100.0 * ((samples_per_sec - - guest_us_samples_per_sec) / - samples_per_sec)), - esamples_percent); - } + perf_top__header_snprintf(&top, bf, sizeof(bf)); + printf("%s\n", bf); - if (evsel_list->nr_entries == 1 || !display_weighted) { - struct perf_evsel *first; - first = list_entry(evsel_list->entries.next, struct perf_evsel, node); - printf("%" PRIu64, (uint64_t)first->attr.sample_period); - if (freq) - printf("Hz "); - else - printf(" "); - } - - if (!display_weighted) - printf("%s", event_name(sym_evsel)); - else list_for_each_entry(counter, &evsel_list->entries, node) { - if (counter->idx) - printf("/"); - - printf("%s", event_name(counter)); - } - - printf( "], "); - - if (target_pid != -1) - printf(" (target_pid: %d", target_pid); - else if (target_tid != -1) - printf(" (target_tid: %d", target_tid); - else - printf(" (all"); - - if (cpu_list) - printf(", CPU%s: %s)\n", evsel_list->cpus->nr > 1 ? "s" : "", cpu_list); - else { - if (target_tid != -1) - printf(")\n"); - else - printf(", %d CPU%s)\n", evsel_list->cpus->nr, - evsel_list->cpus->nr > 1 ? "s" : ""); - } + perf_top__reset_sample_counters(&top); printf("%-*.*s\n", win_width, win_width, graph_dotted_line); @@ -587,26 +396,8 @@ static void print_sym_table(struct perf_session *session) return; } - /* - * Find the longest symbol name that will be displayed - */ - for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { - syme = rb_entry(nd, struct sym_entry, rb_node); - if (++printed > print_entries || - (int)syme->snap_count < count_filter) - continue; - - if (syme->map->dso->long_name_len > dso_width) - dso_width = syme->map->dso->long_name_len; - - if (syme->map->dso->short_name_len > dso_short_width) - dso_short_width = syme->map->dso->short_name_len; - - if (syme->name_len > sym_width) - sym_width = syme->name_len; - } - - printed = 0; + perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width, + &sym_width); if (sym_width + dso_width > winsize.ws_col - 29) { dso_width = dso_short_width; @@ -614,7 +405,7 @@ static void print_sym_table(struct perf_session *session) sym_width = winsize.ws_col - dso_width - 29; } putchar('\n'); - if (evsel_list->nr_entries == 1) + if (top.evlist->nr_entries == 1) printf(" samples pcnt"); else printf(" weight samples pcnt"); @@ -623,7 +414,7 @@ static void print_sym_table(struct perf_session *session) printf(" RIP "); printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); printf(" %s _______ _____", - evsel_list->nr_entries == 1 ? " " : "______"); + top.evlist->nr_entries == 1 ? " " : "______"); if (verbose) printf(" ________________"); printf(" %-*.*s", sym_width, sym_width, graph_line); @@ -636,13 +427,14 @@ static void print_sym_table(struct perf_session *session) syme = rb_entry(nd, struct sym_entry, rb_node); sym = sym_entry__symbol(syme); - if (++printed > print_entries || (int)syme->snap_count < count_filter) + if (++printed > top.print_entries || + (int)syme->snap_count < top.count_filter) continue; pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / sum_ksamples)); - if (evsel_list->nr_entries == 1 || !display_weighted) + if (top.evlist->nr_entries == 1 || !top.display_weighted) printf("%20.2f ", syme->weight); else printf("%9.1f %10ld ", syme->weight, syme->snap_count); @@ -715,11 +507,11 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) if (p) *p = 0; - pthread_mutex_lock(&active_symbols_lock); - syme = list_entry(active_symbols.next, struct sym_entry, node); - pthread_mutex_unlock(&active_symbols_lock); + pthread_mutex_lock(&top.active_symbols_lock); + syme = list_entry(top.active_symbols.next, struct sym_entry, node); + pthread_mutex_unlock(&top.active_symbols_lock); - list_for_each_entry_safe_from(syme, n, &active_symbols, node) { + list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) { struct symbol *sym = sym_entry__symbol(syme); if (!strcmp(buf, sym->name)) { @@ -749,28 +541,28 @@ static void print_mapped_keys(void) } fprintf(stdout, "\nMapped keys:\n"); - fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); - fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); + fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs); + fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries); - if (evsel_list->nr_entries > 1) - fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel)); + if (top.evlist->nr_entries > 1) + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel)); - fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); + fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter); fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); - if (evsel_list->nr_entries > 1) - fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + if (top.evlist->nr_entries > 1) + fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0); fprintf(stdout, "\t[K] hide kernel_symbols symbols. \t(%s)\n", - hide_kernel_symbols ? "yes" : "no"); + top.hide_kernel_symbols ? "yes" : "no"); fprintf(stdout, "\t[U] hide user symbols. \t(%s)\n", - hide_user_symbols ? "yes" : "no"); - fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); + top.hide_user_symbols ? "yes" : "no"); + fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0); fprintf(stdout, "\t[qQ] quit.\n"); } @@ -791,7 +583,7 @@ static int key_mapped(int c) return 1; case 'E': case 'w': - return evsel_list->nr_entries > 1 ? 1 : 0; + return top.evlist->nr_entries > 1 ? 1 : 0; default: break; } @@ -826,47 +618,47 @@ static void handle_keypress(struct perf_session *session, int c) switch (c) { case 'd': - prompt_integer(&delay_secs, "Enter display delay"); - if (delay_secs < 1) - delay_secs = 1; + prompt_integer(&top.delay_secs, "Enter display delay"); + if (top.delay_secs < 1) + top.delay_secs = 1; break; case 'e': - prompt_integer(&print_entries, "Enter display entries (lines)"); - if (print_entries == 0) { + prompt_integer(&top.print_entries, "Enter display entries (lines)"); + if (top.print_entries == 0) { sig_winch_handler(SIGWINCH); signal(SIGWINCH, sig_winch_handler); } else signal(SIGWINCH, SIG_DFL); break; case 'E': - if (evsel_list->nr_entries > 1) { + if (top.evlist->nr_entries > 1) { fprintf(stderr, "\nAvailable events:"); - list_for_each_entry(sym_evsel, &evsel_list->entries, node) - fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel)); + list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) + fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel)); - prompt_integer(&sym_counter, "Enter details event counter"); + prompt_integer(&top.sym_counter, "Enter details event counter"); - if (sym_counter >= evsel_list->nr_entries) { - sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node); - sym_counter = 0; - fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel)); + if (top.sym_counter >= top.evlist->nr_entries) { + top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); + top.sym_counter = 0; + fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel)); sleep(1); break; } - list_for_each_entry(sym_evsel, &evsel_list->entries, node) - if (sym_evsel->idx == sym_counter) + list_for_each_entry(top.sym_evsel, &top.evlist->entries, node) + if (top.sym_evsel->idx == top.sym_counter) break; - } else sym_counter = 0; + } else top.sym_counter = 0; break; case 'f': - prompt_integer(&count_filter, "Enter display event count filter"); + prompt_integer(&top.count_filter, "Enter display event count filter"); break; case 'F': prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); break; case 'K': - hide_kernel_symbols = !hide_kernel_symbols; + top.hide_kernel_symbols = !top.hide_kernel_symbols; break; case 'q': case 'Q': @@ -890,13 +682,13 @@ static void handle_keypress(struct perf_session *session, int c) } break; case 'U': - hide_user_symbols = !hide_user_symbols; + top.hide_user_symbols = !top.hide_user_symbols; break; case 'w': - display_weighted = ~display_weighted; + top.display_weighted = ~top.display_weighted; break; case 'z': - zero = !zero; + top.zero = !top.zero; break; default: break; @@ -917,7 +709,7 @@ static void *display_thread(void *arg __used) tc.c_cc[VTIME] = 0; repeat: - delay_msecs = delay_secs * 1000; + delay_msecs = top.delay_secs * 1000; tcsetattr(0, TCSANOW, &tc); /* trash return*/ getc(stdin); @@ -1005,27 +797,27 @@ static void perf_event__process_sample(const union perf_event *event, struct machine *machine; u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - ++samples; + ++top.samples; switch (origin) { case PERF_RECORD_MISC_USER: - ++us_samples; - if (hide_user_symbols) + ++top.us_samples; + if (top.hide_user_symbols) return; machine = perf_session__find_host_machine(session); break; case PERF_RECORD_MISC_KERNEL: - ++kernel_samples; - if (hide_kernel_symbols) + ++top.kernel_samples; + if (top.hide_kernel_symbols) return; machine = perf_session__find_host_machine(session); break; case PERF_RECORD_MISC_GUEST_KERNEL: - ++guest_kernel_samples; + ++top.guest_kernel_samples; machine = perf_session__find_machine(session, event->ip.pid); break; case PERF_RECORD_MISC_GUEST_USER: - ++guest_us_samples; + ++top.guest_us_samples; /* * TODO: we don't process guest user from host side * except simple counting. @@ -1042,7 +834,7 @@ static void perf_event__process_sample(const union perf_event *event, } if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) - exact_samples++; + top.exact_samples++; if (perf_event__preprocess_sample(event, session, &al, sample, symbol_filter) < 0 || @@ -1093,14 +885,14 @@ static void perf_event__process_sample(const union perf_event *event, struct perf_evsel *evsel; syme->origin = origin; - evsel = perf_evlist__id2evsel(evsel_list, sample->id); + evsel = perf_evlist__id2evsel(top.evlist, sample->id); assert(evsel != NULL); syme->count[evsel->idx]++; record_precise_ip(syme, evsel->idx, ip); - pthread_mutex_lock(&active_symbols_lock); + pthread_mutex_lock(&top.active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) __list_insert_active_sym(syme); - pthread_mutex_unlock(&active_symbols_lock); + pthread_mutex_unlock(&top.active_symbols_lock); } } @@ -1109,7 +901,7 @@ static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu) struct perf_sample sample; union perf_event *event; - while ((event = perf_evlist__read_on_cpu(evsel_list, cpu)) != NULL) { + while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) { perf_session__parse_sample(self, event, &sample); if (event->header.type == PERF_RECORD_SAMPLE) @@ -1123,7 +915,7 @@ static void perf_session__mmap_read(struct perf_session *self) { int i; - for (i = 0; i < evsel_list->cpus->nr; i++) + for (i = 0; i < top.evlist->cpus->nr; i++) perf_session__mmap_read_cpu(self, i); } @@ -1136,10 +928,10 @@ static void start_counters(struct perf_evlist *evlist) attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; - if (freq) { + if (top.freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; attr->freq = 1; - attr->sample_freq = freq; + attr->sample_freq = top.freq; } if (evlist->nr_entries > 1) { @@ -1149,8 +941,8 @@ static void start_counters(struct perf_evlist *evlist) attr->mmap = 1; try_again: - if (perf_evsel__open(counter, evsel_list->cpus, - evsel_list->threads, group, inherit) < 0) { + if (perf_evsel__open(counter, top.evlist->cpus, + top.evlist->threads, group, inherit) < 0) { int err = errno; if (err == EPERM || err == EACCES) @@ -1198,18 +990,18 @@ static int __cmd_top(void) if (session == NULL) return -ENOMEM; - if (target_tid != -1) - perf_event__synthesize_thread(target_tid, perf_event__process, + if (top.target_tid != -1) + perf_event__synthesize_thread(top.target_tid, perf_event__process, session); else perf_event__synthesize_threads(perf_event__process, session); - start_counters(evsel_list); - first = list_entry(evsel_list->entries.next, struct perf_evsel, node); + start_counters(top.evlist); + first = list_entry(top.evlist->entries.next, struct perf_evsel, node); perf_session__set_sample_type(session, first->attr.sample_type); /* Wait for a minimal set of events before starting the snapshot */ - poll(evsel_list->pollfd, evsel_list->nr_fds, 100); + poll(top.evlist->pollfd, top.evlist->nr_fds, 100); perf_session__mmap_read(session); @@ -1229,12 +1021,12 @@ static int __cmd_top(void) } while (1) { - int hits = samples; + u64 hits = top.samples; perf_session__mmap_read(session); - if (hits == samples) - ret = poll(evsel_list->pollfd, evsel_list->nr_fds, 100); + if (hits == top.samples) + ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100); } return 0; @@ -1246,31 +1038,31 @@ static const char * const top_usage[] = { }; static const struct option options[] = { - OPT_CALLBACK('e', "event", &evsel_list, "event", + OPT_CALLBACK('e', "event", &top.evlist, "event", "event selector. use 'perf list' to list available events", parse_events), OPT_INTEGER('c', "count", &default_interval, "event period to sample"), - OPT_INTEGER('p', "pid", &target_pid, + OPT_INTEGER('p', "pid", &top.target_pid, "profile events on existing process id"), - OPT_INTEGER('t', "tid", &target_tid, + OPT_INTEGER('t', "tid", &top.target_tid, "profile events on existing thread id"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), - OPT_STRING('C', "cpu", &cpu_list, "cpu", + OPT_STRING('C', "cpu", &top.cpu_list, "cpu", "list of cpus to monitor"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), - OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, + OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, "hide kernel symbols"), OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), OPT_INTEGER('r', "realtime", &realtime_prio, "collect data with this RT SCHED_FIFO priority"), - OPT_INTEGER('d', "delay", &delay_secs, + OPT_INTEGER('d', "delay", &top.delay_secs, "number of seconds to delay between refreshes"), OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, "dump the symbol table used for profiling"), - OPT_INTEGER('f', "count-filter", &count_filter, + OPT_INTEGER('f', "count-filter", &top.count_filter, "only display functions with more events than this"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), @@ -1278,13 +1070,13 @@ static const struct option options[] = { "child tasks inherit counters"), OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", "symbol to annotate"), - OPT_BOOLEAN('z', "zero", &zero, + OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"), - OPT_INTEGER('F', "freq", &freq, + OPT_INTEGER('F', "freq", &top.freq, "profile at this frequency"), - OPT_INTEGER('E', "entries", &print_entries, + OPT_INTEGER('E', "entries", &top.print_entries, "display this many functions"), - OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, + OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols, "hide user symbols"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), @@ -1296,8 +1088,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) struct perf_evsel *pos; int status = -ENOMEM; - evsel_list = perf_evlist__new(NULL, NULL); - if (evsel_list == NULL) + top.evlist = perf_evlist__new(NULL, NULL); + if (top.evlist == NULL) return -ENOMEM; page_size = sysconf(_SC_PAGE_SIZE); @@ -1307,43 +1099,43 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) usage_with_options(top_usage, options); /* CPU and PID are mutually exclusive */ - if (target_tid > 0 && cpu_list) { + if (top.target_tid > 0 && top.cpu_list) { printf("WARNING: PID switch overriding CPU\n"); sleep(1); - cpu_list = NULL; + top.cpu_list = NULL; } - if (target_pid != -1) - target_tid = target_pid; + if (top.target_pid != -1) + top.target_tid = top.target_pid; - if (perf_evlist__create_maps(evsel_list, target_pid, - target_tid, cpu_list) < 0) + if (perf_evlist__create_maps(top.evlist, top.target_pid, + top.target_tid, top.cpu_list) < 0) usage_with_options(top_usage, options); - if (!evsel_list->nr_entries && - perf_evlist__add_default(evsel_list) < 0) { + if (!top.evlist->nr_entries && + perf_evlist__add_default(top.evlist) < 0) { pr_err("Not enough memory for event selector list\n"); return -ENOMEM; } - if (delay_secs < 1) - delay_secs = 1; + if (top.delay_secs < 1) + top.delay_secs = 1; /* * User specified count overrides default frequency. */ if (default_interval) - freq = 0; - else if (freq) { - default_interval = freq; + top.freq = 0; + else if (top.freq) { + default_interval = top.freq; } else { fprintf(stderr, "frequency and count are zero, aborting\n"); exit(EXIT_FAILURE); } - list_for_each_entry(pos, &evsel_list->entries, node) { - if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, - evsel_list->threads->nr) < 0) + list_for_each_entry(pos, &top.evlist->entries, node) { + if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr, + top.evlist->threads->nr) < 0) goto out_free_fd; /* * Fill in the ones not specifically initialized via -c: @@ -1354,28 +1146,28 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = default_interval; } - if (perf_evlist__alloc_pollfd(evsel_list) < 0 || - perf_evlist__alloc_mmap(evsel_list) < 0) + if (perf_evlist__alloc_pollfd(top.evlist) < 0 || + perf_evlist__alloc_mmap(top.evlist) < 0) goto out_free_fd; - sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node); + top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); symbol_conf.priv_size = (sizeof(struct sym_entry) + - (evsel_list->nr_entries + 1) * sizeof(unsigned long)); + (top.evlist->nr_entries + 1) * sizeof(unsigned long)); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); if (symbol__init() < 0) return -1; get_term_dimensions(&winsize); - if (print_entries == 0) { + if (top.print_entries == 0) { update_print_entries(&winsize); signal(SIGWINCH, sig_winch_handler); } status = __cmd_top(); out_free_fd: - perf_evlist__delete(evsel_list); + perf_evlist__delete(top.evlist); return status; } diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c new file mode 100644 index 0000000..c06cc53 --- /dev/null +++ b/tools/perf/util/top.c @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Refactored from builtin-top.c, see that files for further copyright notes. + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "cpumap.h" +#include "event.h" +#include "evlist.h" +#include "evsel.h" +#include "parse-events.h" +#include "symbol.h" +#include "top.h" +#include + +/* + * Ordering weight: count-1 * count-2 * ... / count-n + */ +static double sym_weight(const struct sym_entry *sym, struct perf_top *top) +{ + double weight = sym->snap_count; + int counter; + + if (!top->display_weighted) + return weight; + + for (counter = 1; counter < top->evlist->nr_entries - 1; counter++) + weight *= sym->count[counter]; + + weight /= (sym->count[counter] + 1); + + return weight; +} + +static void perf_top__remove_active_sym(struct perf_top *top, struct sym_entry *syme) +{ + pthread_mutex_lock(&top->active_symbols_lock); + list_del_init(&syme->node); + pthread_mutex_unlock(&top->active_symbols_lock); +} + +static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) +{ + struct rb_node **p = &tree->rb_node; + struct rb_node *parent = NULL; + struct sym_entry *iter; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct sym_entry, rb_node); + + if (se->weight > iter->weight) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&se->rb_node, parent, p); + rb_insert_color(&se->rb_node, tree); +} + +size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) +{ + struct perf_evsel *counter; + float samples_per_sec = top->samples / top->delay_secs; + float ksamples_per_sec = top->kernel_samples / top->delay_secs; + float esamples_percent = (100.0 * top->exact_samples) / top->samples; + size_t ret = 0; + + if (!perf_guest) { + ret = snprintf(bf, size, + " PerfTop:%8.0f irqs/sec kernel:%4.1f%%" + " exact: %4.1f%% [", samples_per_sec, + 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) / + samples_per_sec)), + esamples_percent); + } else { + float us_samples_per_sec = top->us_samples / top->delay_secs; + float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs; + float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs; + + ret = snprintf(bf, size, + " PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%" + " guest kernel:%4.1f%% guest us:%4.1f%%" + " exact: %4.1f%% [", samples_per_sec, + 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) / + samples_per_sec)), + 100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) / + samples_per_sec)), + 100.0 - (100.0 * ((samples_per_sec - + guest_kernel_samples_per_sec) / + samples_per_sec)), + 100.0 - (100.0 * ((samples_per_sec - + guest_us_samples_per_sec) / + samples_per_sec)), + esamples_percent); + } + + if (top->evlist->nr_entries == 1 || !top->display_weighted) { + struct perf_evsel *first; + first = list_entry(top->evlist->entries.next, struct perf_evsel, node); + ret += snprintf(bf + ret, size - ret, "%" PRIu64 "%s ", + (uint64_t)first->attr.sample_period, + top->freq ? "Hz" : ""); + } + + if (!top->display_weighted) { + ret += snprintf(bf + ret, size - ret, "%s", + event_name(top->sym_evsel)); + } else list_for_each_entry(counter, &top->evlist->entries, node) { + ret += snprintf(bf + ret, size - ret, "%s%s", + counter->idx ? "/" : "", event_name(counter)); + } + + ret += snprintf(bf + ret, size - ret, "], "); + + if (top->target_pid != -1) + ret += snprintf(bf + ret, size - ret, " (target_pid: %d", + top->target_pid); + else if (top->target_tid != -1) + ret += snprintf(bf + ret, size - ret, " (target_tid: %d", + top->target_tid); + else + ret += snprintf(bf + ret, size - ret, " (all"); + + if (top->cpu_list) + ret += snprintf(bf + ret, size - ret, ", CPU%s: %s)", + top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); + else { + if (top->target_tid != -1) + ret += snprintf(bf + ret, size - ret, ")"); + else + ret += snprintf(bf + ret, size - ret, ", %d CPU%s)", + top->evlist->cpus->nr, + top->evlist->cpus->nr > 1 ? "s" : ""); + } + + return ret; +} + +void perf_top__reset_sample_counters(struct perf_top *top) +{ + top->samples = top->us_samples = top->kernel_samples = + top->exact_samples = top->guest_kernel_samples = + top->guest_us_samples = 0; +} + +float perf_top__decay_samples(struct perf_top *top, struct rb_root *root) +{ + struct sym_entry *syme, *n; + float sum_ksamples = 0.0; + int snap = !top->display_weighted ? top->sym_counter : 0, j; + + /* Sort the active symbols */ + pthread_mutex_lock(&top->active_symbols_lock); + syme = list_entry(top->active_symbols.next, struct sym_entry, node); + pthread_mutex_unlock(&top->active_symbols_lock); + + list_for_each_entry_safe_from(syme, n, &top->active_symbols, node) { + syme->snap_count = syme->count[snap]; + if (syme->snap_count != 0) { + + if ((top->hide_user_symbols && + syme->origin == PERF_RECORD_MISC_USER) || + (top->hide_kernel_symbols && + syme->origin == PERF_RECORD_MISC_KERNEL)) { + perf_top__remove_active_sym(top, syme); + continue; + } + syme->weight = sym_weight(syme, top); + rb_insert_active_sym(root, syme); + sum_ksamples += syme->snap_count; + + for (j = 0; j < top->evlist->nr_entries; j++) + syme->count[j] = top->zero ? 0 : syme->count[j] * 7 / 8; + } else + perf_top__remove_active_sym(top, syme); + } + + return sum_ksamples; +} + +/* + * Find the longest symbol name that will be displayed + */ +void perf_top__find_widths(struct perf_top *top, struct rb_root *root, + int *dso_width, int *dso_short_width, int *sym_width) +{ + struct rb_node *nd; + int printed = 0; + + *sym_width = *dso_width = *dso_short_width = 0; + + for (nd = rb_first(root); nd; nd = rb_next(nd)) { + struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); + + if (++printed > top->print_entries || + (int)syme->snap_count < top->count_filter) + continue; + + if (syme->map->dso->long_name_len > *dso_width) + *dso_width = syme->map->dso->long_name_len; + + if (syme->map->dso->short_name_len > *dso_short_width) + *dso_short_width = syme->map->dso->short_name_len; + + if (syme->name_len > *sym_width) + *sym_width = syme->name_len; + } +} diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h new file mode 100644 index 0000000..0467b26 --- /dev/null +++ b/tools/perf/util/top.h @@ -0,0 +1,67 @@ +#ifndef __PERF_TOP_H +#define __PERF_TOP_H 1 + +#include "types.h" +#include "../perf.h" +#include +#include +#include +#include + +struct perf_evlist; +struct perf_evsel; + +struct source_line { + u64 eip; + unsigned long count[MAX_COUNTERS]; /* FIXME */ + char *line; + struct source_line *next; +}; + +struct sym_entry_source { + struct source_line *source; + struct source_line *lines; + struct source_line **lines_tail; + pthread_mutex_t lock; +}; + +struct sym_entry { + struct rb_node rb_node; + struct list_head node; + unsigned long snap_count; + double weight; + int skip; + u16 name_len; + u8 origin; + struct map *map; + struct sym_entry_source *src; + unsigned long count[0]; +}; + +struct perf_top { + struct perf_evlist *evlist; + /* + * Symbols will be added here in perf_event__process_sample and will + * get out after decayed. + */ + struct list_head active_symbols; + pthread_mutex_t active_symbols_lock; + u64 samples; + u64 kernel_samples, us_samples; + u64 exact_samples; + u64 guest_us_samples, guest_kernel_samples; + int print_entries, count_filter, delay_secs; + int display_weighted, freq; + int sym_counter, target_pid, target_tid; + bool hide_kernel_symbols, hide_user_symbols, zero; + const char *cpu_list; + struct perf_evsel *sym_evsel; +}; + +size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); +void perf_top__reset_sample_counters(struct perf_top *top); +float perf_top__decay_samples(struct perf_top *top, struct rb_root *root); +void perf_top__find_widths(struct perf_top *top, struct rb_root *root, + int *dso_width, int *dso_short_width, int *sym_width); + +#endif /* __PERF_TOP_H */ -- cgit v0.10.2 From 229ade9ba36341f7369ecb4f134bcec9133520bf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jan 2011 18:08:39 -0200 Subject: perf tools: Don't fallback to setup_pager unconditionally Because in tools like 'top' we don't want the pager. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7006786..cd9dec4 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -452,7 +452,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) else if (use_tui) use_browser = 1; - setup_browser(); + setup_browser(true); symbol_conf.priv_size = sizeof(struct sym_priv); symbol_conf.try_vmlinux_path = true; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a6a4e54..080937c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -499,7 +499,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) use_browser = 1; if (strcmp(input_name, "-") != 0) - setup_browser(); + setup_browser(true); else use_browser = 0; /* diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index a772979..fc5e5a0 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -34,13 +34,14 @@ extern int pager_use_color; extern int use_browser; #ifdef NO_NEWT_SUPPORT -static inline void setup_browser(void) +static inline void setup_browser(bool fallback_to_pager) { - setup_pager(); + if (fallback_to_pager) + setup_pager(); } static inline void exit_browser(bool wait_for_ok __used) {} #else -void setup_browser(void); +void setup_browser(bool fallback_to_pager); void exit_browser(bool wait_for_ok); #endif diff --git a/tools/perf/util/ui/setup.c b/tools/perf/util/ui/setup.c index 6620850..fbf1a14 100644 --- a/tools/perf/util/ui/setup.c +++ b/tools/perf/util/ui/setup.c @@ -14,11 +14,12 @@ static void newt_suspend(void *d __used) newtResume(); } -void setup_browser(void) +void setup_browser(bool fallback_to_pager) { if (!isatty(1) || !use_browser || dump_trace) { use_browser = 0; - setup_pager(); + if (fallback_to_pager) + setup_pager(); return; } -- cgit v0.10.2 From c0443df1b69b59675fc6790e0ddce87c8ca00abf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jan 2011 18:19:33 -0200 Subject: perf top: Introduce slang based TUI Disabled by default as there are features found in the stdio based one that aren't implemented, like live annotation, filtering knobs data entry. Annotation hopefully will get somehow merged with the 'perf annotate' code. To use it: perf top --tui Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index edc660e..67a9f4d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -621,6 +621,7 @@ else LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o + LIB_OBJS += $(OUTPUT)util/ui/browsers/top.o LIB_OBJS += $(OUTPUT)util/ui/helpline.o LIB_OBJS += $(OUTPUT)util/ui/progress.o LIB_OBJS += $(OUTPUT)util/ui/util.o @@ -1050,6 +1051,9 @@ $(OUTPUT)util/ui/browser.o: util/ui/browser.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)util/ui/browsers/top.o: util/ui/browsers/top.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< + $(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 3c9ba94..104de9a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -20,6 +20,7 @@ #include "perf.h" +#include "util/cache.h" #include "util/color.h" #include "util/evlist.h" #include "util/evsel.h" @@ -75,6 +76,8 @@ static struct perf_top top = { static bool system_wide = false; +static bool use_tui, use_stdio; + static int default_interval = 0; static bool inherit = false; @@ -96,11 +99,6 @@ static int sym_pcnt_filter = 5; * Source functions */ -static inline struct symbol *sym_entry__symbol(struct sym_entry *self) -{ - return ((void *)self) + symbol_conf.priv_size; -} - void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -695,6 +693,14 @@ static void handle_keypress(struct perf_session *session, int c) } } +static void *display_thread_tui(void *arg __used) +{ + perf_top__tui_browser(&top); + exit_browser(0); + exit(0); + return NULL; +} + static void *display_thread(void *arg __used) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; @@ -1005,7 +1011,8 @@ static int __cmd_top(void) perf_session__mmap_read(session); - if (pthread_create(&thread, NULL, display_thread, session)) { + if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : + display_thread), session)) { printf("Could not create display thread.\n"); exit(-1); } @@ -1078,6 +1085,8 @@ static const struct option options[] = { "display this many functions"), OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols, "hide user symbols"), + OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), + OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_END() @@ -1098,6 +1107,20 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(top_usage, options); + /* + * XXX For now start disabled, only using TUI if explicitely asked for. + * Change that when handle_keys equivalent gets written, live annotation + * done, etc. + */ + use_browser = 0; + + if (use_stdio) + use_browser = 0; + else if (use_tui) + use_browser = 1; + + setup_browser(false); + /* CPU and PID are mutually exclusive */ if (top.target_tid > 0 && top.cpu_list) { printf("WARNING: PID switch overriding CPU\n"); diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index c06cc53..1d2e265 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -158,6 +158,7 @@ float perf_top__decay_samples(struct perf_top *top, struct rb_root *root) syme = list_entry(top->active_symbols.next, struct sym_entry, node); pthread_mutex_unlock(&top->active_symbols_lock); + top->rb_entries = 0; list_for_each_entry_safe_from(syme, n, &top->active_symbols, node) { syme->snap_count = syme->count[snap]; if (syme->snap_count != 0) { @@ -170,7 +171,11 @@ float perf_top__decay_samples(struct perf_top *top, struct rb_root *root) continue; } syme->weight = sym_weight(syme, top); - rb_insert_active_sym(root, syme); + + if ((int)syme->snap_count >= top->count_filter) { + rb_insert_active_sym(root, syme); + ++top->rb_entries; + } sum_ksamples += syme->snap_count; for (j = 0; j < top->evlist->nr_entries; j++) diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 0467b26..611370f 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -38,6 +38,11 @@ struct sym_entry { unsigned long count[0]; }; +static inline struct symbol *sym_entry__symbol(struct sym_entry *self) +{ + return ((void *)self) + symbol_conf.priv_size; +} + struct perf_top { struct perf_evlist *evlist; /* @@ -51,7 +56,7 @@ struct perf_top { u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; - int display_weighted, freq; + int display_weighted, freq, rb_entries; int sym_counter, target_pid, target_tid; bool hide_kernel_symbols, hide_user_symbols, zero; const char *cpu_list; @@ -64,4 +69,12 @@ float perf_top__decay_samples(struct perf_top *top, struct rb_root *root); void perf_top__find_widths(struct perf_top *top, struct rb_root *root, int *dso_width, int *dso_short_width, int *sym_width); +#ifdef NO_NEWT_SUPPORT +static inline int perf_top__tui_browser(struct perf_top *top __used) +{ + return 0; +} +#else +int perf_top__tui_browser(struct perf_top *top); +#endif #endif /* __PERF_TOP_H */ diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c new file mode 100644 index 0000000..ca60624 --- /dev/null +++ b/tools/perf/util/ui/browsers/top.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Parts came from builtin-{top,stat,record}.c, see those files for further + * copyright notes. + * + * Released under the GPL v2. (and only v2, not any later version) + */ +#include "../browser.h" +#include "../helpline.h" +#include "../libslang.h" +#include "../../evlist.h" +#include "../../hist.h" +#include "../../sort.h" +#include "../../symbol.h" +#include "../../top.h" + +struct perf_top_browser { + struct ui_browser b; + struct rb_root root; + float sum_ksamples; + int dso_width; + int dso_short_width; + int sym_width; +}; + +static void perf_top_browser__write(struct ui_browser *browser, void *entry, int row) +{ + struct perf_top_browser *top_browser = container_of(browser, struct perf_top_browser, b); + struct sym_entry *syme = rb_entry(entry, struct sym_entry, rb_node); + bool current_entry = ui_browser__is_current_entry(browser, row); + struct symbol *symbol = sym_entry__symbol(syme); + struct perf_top *top = browser->priv; + int width = browser->width; + double pcnt; + + pcnt = 100.0 - (100.0 * ((top_browser->sum_ksamples - syme->snap_count) / + top_browser->sum_ksamples)); + ui_browser__set_percent_color(browser, pcnt, current_entry); + + if (top->evlist->nr_entries == 1 || !top->display_weighted) { + slsmg_printf("%20.2f ", syme->weight); + width -= 24; + } else { + slsmg_printf("%9.1f %10ld ", syme->weight, syme->snap_count); + width -= 23; + } + + slsmg_printf("%4.1f%%", pcnt); + width -= 7; + + if (verbose) { + slsmg_printf(" %016" PRIx64, symbol->start); + width -= 17; + } + + slsmg_printf(" %-*.*s ", top_browser->sym_width, top_browser->sym_width, + symbol->name); + width -= top_browser->sym_width; + slsmg_write_nstring(width >= syme->map->dso->long_name_len ? + syme->map->dso->long_name : + syme->map->dso->short_name, width); +} + +static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser) +{ + struct perf_top *top = browser->b.priv; + + browser->root = RB_ROOT; + browser->b.top = NULL; + browser->sum_ksamples = perf_top__decay_samples(top, &browser->root); + perf_top__find_widths(top, &browser->root, &browser->dso_width, + &browser->dso_short_width, + &browser->sym_width); + if (browser->sym_width + browser->dso_width > browser->b.width - 29) { + browser->dso_width = browser->dso_short_width; + if (browser->sym_width + browser->dso_width > browser->b.width - 29) + browser->sym_width = browser->b.width - browser->dso_width - 29; + } + browser->b.nr_entries = top->rb_entries; +} + +static int perf_top_browser__run(struct perf_top_browser *browser) +{ + int key; + char title[160]; + struct perf_top *top = browser->b.priv; + int delay_msecs = top->delay_secs * 1000; + + perf_top_browser__update_rb_tree(browser); + perf_top__header_snprintf(top, title, sizeof(title)); + perf_top__reset_sample_counters(top); + + if (ui_browser__show(&browser->b, title, "ESC: exit") < 0) + return -1; + + newtFormSetTimer(browser->b.form, delay_msecs); + + while (1) { + key = ui_browser__run(&browser->b); + + switch (key) { + case -1: + /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */ + perf_top_browser__update_rb_tree(browser); + perf_top__header_snprintf(top, title, sizeof(title)); + perf_top__reset_sample_counters(top); + ui_browser__set_color(&browser->b, NEWT_COLORSET_ROOT); + SLsmg_gotorc(0, 0); + slsmg_write_nstring(title, browser->b.width); + break; + case NEWT_KEY_TAB: + default: + goto out; + } + } +out: + ui_browser__hide(&browser->b); + return key; +} + +int perf_top__tui_browser(struct perf_top *top) +{ + struct perf_top_browser browser = { + .b = { + .entries = &browser.root, + .refresh = ui_browser__rb_tree_refresh, + .seek = ui_browser__rb_tree_seek, + .write = perf_top_browser__write, + .priv = top, + }, + }; + + ui_helpline__push("Press <- or ESC to exit"); + return perf_top_browser__run(&browser); +} -- cgit v0.10.2 From 823c7164a92a6347d46bb64aaae728b6d08f3bb8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jan 2011 19:45:38 -0200 Subject: perf probe: Use %td for pointer arithmetic result MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit %td is for ptrdiff_t, avoiding this warning on 32-bit: cc1: warnings being treated as errors builtin-probe.c: In function ‘opt_set_filter’: builtin-probe.c:176:4: error: format ‘%ld’ expects type ‘long int’, but argument 3 has type ‘int’ Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index fcde003..2c0e64d 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -173,7 +173,7 @@ static int opt_set_filter(const struct option *opt __used, strfilter__delete(params.filter); params.filter = strfilter__new(str, &err); if (!params.filter) { - pr_err("Filter parse error at %ld.\n", err - str + 1); + pr_err("Filter parse error at %td.\n", err - str + 1); pr_err("Source: \"%s\"\n", str); pr_err(" %*c\n", (int)(err - str + 1), '^'); return -EINVAL; -- cgit v0.10.2 From f6bbc1daac964da551130dbf01809d3fbd178b2d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 31 Jan 2011 20:56:27 -0200 Subject: perf python: Fix build on 32-bit Where there are lots of errors related to python methods receiving 'char *' for things like file open mode, which break the build, also disable strict aliasing and fixup some other warnings. Now builds on both 32-bit and 64-bit fedora systems. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index d2d5217..5317ef2 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -15,6 +15,8 @@ struct throttle_event { u64 stream_id; }; +PyMODINIT_FUNC initperf(void); + #define member_def(type, member, ptype, help) \ { #member, ptype, \ offsetof(struct pyrf_event, event) + offsetof(struct type, member), \ @@ -31,17 +33,15 @@ struct pyrf_event { union perf_event event; }; -#define T_ULONG_LONG T_ULONG - #define sample_members \ - sample_member_def(sample_ip, ip, T_ULONG_LONG, "event type"), \ + sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \ sample_member_def(sample_pid, pid, T_INT, "event pid"), \ sample_member_def(sample_tid, tid, T_INT, "event tid"), \ - sample_member_def(sample_time, time, T_ULONG_LONG, "event timestamp"), \ - sample_member_def(sample_addr, addr, T_ULONG_LONG, "event addr"), \ - sample_member_def(sample_id, id, T_ULONG_LONG, "event id"), \ - sample_member_def(sample_stream_id, stream_id, T_ULONG_LONG, "event stream id"), \ - sample_member_def(sample_period, period, T_ULONG_LONG, "event period"), \ + sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \ + sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"), \ + sample_member_def(sample_id, id, T_ULONGLONG, "event id"), \ + sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \ + sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \ sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"), static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object."); @@ -51,11 +51,11 @@ static PyMemberDef pyrf_mmap_event__members[] = { member_def(perf_event_header, type, T_UINT, "event type"), member_def(mmap_event, pid, T_UINT, "event pid"), member_def(mmap_event, tid, T_UINT, "event tid"), - member_def(mmap_event, start, T_ULONG_LONG, "start of the map"), - member_def(mmap_event, len, T_ULONG_LONG, "map length"), - member_def(mmap_event, pgoff, T_ULONG_LONG, "page offset"), + member_def(mmap_event, start, T_ULONGLONG, "start of the map"), + member_def(mmap_event, len, T_ULONGLONG, "map length"), + member_def(mmap_event, pgoff, T_ULONGLONG, "page offset"), member_def(mmap_event, filename, T_STRING_INPLACE, "backing store"), - { NULL, }, + { .name = NULL, }, }; static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent) @@ -96,8 +96,8 @@ static PyMemberDef pyrf_task_event__members[] = { member_def(fork_event, ppid, T_UINT, "event ppid"), member_def(fork_event, tid, T_UINT, "event tid"), member_def(fork_event, ptid, T_UINT, "event ptid"), - member_def(fork_event, time, T_ULONG_LONG, "timestamp"), - { NULL, }, + member_def(fork_event, time, T_ULONGLONG, "timestamp"), + { .name = NULL, }, }; static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent) @@ -130,7 +130,7 @@ static PyMemberDef pyrf_comm_event__members[] = { member_def(comm_event, pid, T_UINT, "event pid"), member_def(comm_event, tid, T_UINT, "event tid"), member_def(comm_event, comm, T_STRING_INPLACE, "process name"), - { NULL, }, + { .name = NULL, }, }; static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent) @@ -156,10 +156,10 @@ static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object." static PyMemberDef pyrf_throttle_event__members[] = { sample_members member_def(perf_event_header, type, T_UINT, "event type"), - member_def(throttle_event, time, T_ULONG_LONG, "timestamp"), - member_def(throttle_event, id, T_ULONG_LONG, "event id"), - member_def(throttle_event, stream_id, T_ULONG_LONG, "event stream id"), - { NULL, }, + member_def(throttle_event, time, T_ULONGLONG, "timestamp"), + member_def(throttle_event, id, T_ULONGLONG, "event id"), + member_def(throttle_event, stream_id, T_ULONGLONG, "event stream id"), + { .name = NULL, }, }; static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent) @@ -522,7 +522,7 @@ static PyMethodDef pyrf_evsel__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("open the event selector file descriptor table.") }, - { NULL, } + { .ml_name = NULL, } }; static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object."); @@ -551,7 +551,7 @@ struct pyrf_evlist { }; static int pyrf_evlist__init(struct pyrf_evlist *pevlist, - PyObject *args, PyObject *kwargs) + PyObject *args, PyObject *kwargs __used) { PyObject *pcpus = NULL, *pthreads = NULL; struct cpu_map *cpus; @@ -613,7 +613,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist, } static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, - PyObject *args, PyObject *kwargs) + PyObject *args __used, PyObject *kwargs __used) { struct perf_evlist *evlist = &pevlist->evlist; PyObject *list = PyList_New(0); @@ -645,7 +645,7 @@ free_list: static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist, - PyObject *args, PyObject *kwargs) + PyObject *args, PyObject *kwargs __used) { struct perf_evlist *evlist = &pevlist->evlist; PyObject *pevsel; @@ -724,7 +724,7 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("reads an event.") }, - { NULL, } + { .ml_name = NULL, } }; static Py_ssize_t pyrf_evlist__length(PyObject *obj) @@ -840,11 +840,11 @@ static struct { { "RECORD_FORK", PERF_RECORD_FORK }, { "RECORD_READ", PERF_RECORD_READ }, { "RECORD_SAMPLE", PERF_RECORD_SAMPLE }, - { NULL, }, + { .name = NULL, }, }; static PyMethodDef perf__methods[] = { - { NULL, NULL } + { .ml_name = NULL, } }; PyMODINIT_FUNC initperf(void) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 496d7f4..1947b04 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -6,7 +6,8 @@ perf = Extension('perf', sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', 'util/util.c', 'util/xyarray.c'], - include_dirs = ['util/include']) + include_dirs = ['util/include'], + extra_compile_args = ['-fno-strict-aliasing', '-Wno-write-strings']) setup(name='perf', version='0.1', -- cgit v0.10.2 From 067187fc9f1d09738fc833392e117f125cb6bbad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Feb 2011 14:57:02 -0200 Subject: perf tools: Remove verbose build messages for the python binding Also now it builds it in a well known location: [acme@felicio linux]$ rm -rf ../build/perf/ [acme@felicio linux]$ mkdir ../build/perf [acme@felicio linux]$ make -j2 O=~acme/git/build/perf -C tools/perf/ [acme@felicio linux]$ ls -la ../build/perf/python/ total 152 -rwxrwxr-x 1 acme acme 147957 Feb 1 14:56 perf.so drwxrwxr-x 3 acme acme 17 Feb 1 14:56 temp [acme@felicio linux]$ [root@felicio ~]# strip ~acme/git/build/perf/python/perf.so [root@felicio ~]# ls -la ~acme/git/build/perf/python/perf.so -rwxrwxr-x 1 acme acme 46264 Feb 1 14:58 /home/acme/git/build/perf/python/perf.so [root@felicio ~]# export PYTHONPATH=~acme/git/build/perf/python/ [root@felicio ~]# ~acme/git/linux/tools/perf/python/twatch.py cpu: 0, pid: 7751, tid: 7751 { type: exit, pid: 7751, ppid: 7751, tid: 7751, ptid: 7751, time: 54562393512356} cpu: 0, pid: 13700, tid: 13700 { type: fork, pid: 7756, ppid: 13700, tid: 7756, ptid: 13700, time: 54562393746739} cpu: 1, pid: 7756, tid: 7756 { type: fork, pid: 7757, ppid: 7756, tid: 7757, ptid: 7756, time: 54562394246152} cpu: 1, pid: 7757, tid: 7757 { type: comm, pid: 7757, tid: 7757, comm: awk } cpu: 1, pid: 7757, tid: 7757 { type: exit, pid: 7757, ppid: 7757, tid: 7757, ptid: 7757, time: 54562395456813} Reported-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 67a9f4d..d1984ee 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -325,9 +325,9 @@ SCRIPT_SH += perf-archive.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) -pyrf: $(PYRF_OBJS) - python util/setup.py build --build-base='$(OUTPUT)' - +$(OUTPUT)python/perf.so: $(PYRF_OBJS) + @python util/setup.py --quiet build_ext --build-lib='$(OUTPUT)python' \ + --build-temp='$(OUTPUT)python/temp' # # No Perl scripts right now: # @@ -348,12 +348,14 @@ PROGRAMS += $(EXTRA_PROGRAMS) # PROGRAMS += $(OUTPUT)perf +LANG_BINDINGS = $(OUTPUT)python/perf.so + # List built-in command $C whose implementation cmd_$C() is not in # builtin-$C.o but is linked in as part of some other command. # # what 'all' will build and 'install' will install, in perfexecdir -ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) pyrf +ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) $(LANG_BINDINGS) # what 'all' will build but not install in perfexecdir OTHER_PROGRAMS = $(OUTPUT)perf$X @@ -1298,6 +1300,8 @@ clean: $(RM) $(htmldocs).tar.gz $(manpages).tar.gz $(MAKE) -C Documentation/ clean $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-BUILD-OPTIONS + @python util/setup.py clean --build-lib='$(OUTPUT)python' \ + --build-temp='$(OUTPUT)python/temp' .PHONY: all install clean strip .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -- cgit v0.10.2 From 568bb7b8e856b9efb98a3f63259c717adc1b96b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Feb 2011 15:05:00 -0200 Subject: perf tools: Fix up 'make clean' target It wasn't using $(OUTPUT) to rm *.o and there were some funny looking automake files that never get created but were being deleted anyway. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d1984ee..85f6549 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -1289,12 +1289,10 @@ distclean: clean # $(RM) configure clean: - $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE) + $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive} $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X $(RM) $(TEST_PROGRAMS) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* - $(RM) -r autom4te.cache - $(RM) config.log config.mak.autogen config.mak.append config.status config.cache $(RM) -r $(PERF_TARNAME) .doc-tmp-dir $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz $(RM) $(htmldocs).tar.gz $(manpages).tar.gz -- cgit v0.10.2 From 0015e2e101f5fd3256ab8b5a374c0e8806098871 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Feb 2011 16:18:10 -0200 Subject: perf stat: Fix up resource release order That was causing a SEGV on selected old distros. Problem introduced in 7e2ed09. Reported-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e0f9575..806a999 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -748,8 +748,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) out_free_fd: list_for_each_entry(pos, &evsel_list->entries, node) perf_evsel__free_stat_priv(pos); - perf_evlist__delete(evsel_list); -out: perf_evlist__delete_maps(evsel_list); +out: + perf_evlist__delete(evsel_list); return status; } -- cgit v0.10.2 From 978f626c4e5b9524d1898788d8e34d86dfa00795 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Feb 2011 16:40:51 -0200 Subject: perf tools: Don't try to build python bindings if Python.h not available Just leverage the test done for python support in 'python script', emitting a warning about losing those features if python-dev[el] is not installed. Reported-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 85f6549..4c9499c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -348,14 +348,14 @@ PROGRAMS += $(EXTRA_PROGRAMS) # PROGRAMS += $(OUTPUT)perf -LANG_BINDINGS = $(OUTPUT)python/perf.so +LANG_BINDINGS = # List built-in command $C whose implementation cmd_$C() is not in # builtin-$C.o but is linked in as part of some other command. # # what 'all' will build and 'install' will install, in perfexecdir -ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) $(LANG_BINDINGS) +ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) # what 'all' will build but not install in perfexecdir OTHER_PROGRAMS = $(OUTPUT)perf$X @@ -664,12 +664,14 @@ else PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) + msg := $(warning No Python.h found, install python-dev[el] to have python support in 'perf script' and to build the python bindings) BASIC_CFLAGS += -DNO_LIBPYTHON else ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS) EXTLIBS += $(PYTHON_EMBED_LIBADD) LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o + LANG_BINDINGS += $(OUTPUT)python/perf.so endif endif @@ -956,7 +958,7 @@ export TAR INSTALL DESTDIR SHELL_PATH SHELL = $(SHELL_PATH) -all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS +all:: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS ifneq (,$X) $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) endif -- cgit v0.10.2 From cdb0861c85c03fe80f4da033aab69df949579dc6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 1 Feb 2011 10:51:23 -0800 Subject: perf top: Fix TUI compilation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > + slsmg_write_nstring(width >= syme->map->dso->long_name_len ? > + syme->map->dso->long_name : > + syme->map->dso->short_name, width); need update macro for that calling util/ui/browsers/top.c: In function ‘perf_top_browser__write’: util/ui/browsers/top.c:60:2: error: cast to pointer from integer of different size util/ui/browsers/top.c:60:2: error: comparison between pointer and integer util/ui/browsers/top.c:60:2: error: passing argument 1 of ‘SLsmg_write_nstring’ discards qualifiers from pointer target type /usr/include/slang.h:1728:16: note: expected ‘char *’ but argument is of type ‘const char *’ make: *** [util/ui/browsers/top.o] Error 1 Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tom Zanussi LKML-Reference: <4D48562B.20006@kernel.org> Signed-off-by: Yinghai Lu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/libslang.h b/tools/perf/util/ui/libslang.h index 5623da8..2b63e1c 100644 --- a/tools/perf/util/ui/libslang.h +++ b/tools/perf/util/ui/libslang.h @@ -13,11 +13,11 @@ #if SLANG_VERSION < 20104 #define slsmg_printf(msg, args...) \ - SLsmg_printf((char *)msg, ##args) + SLsmg_printf((char *)(msg), ##args) #define slsmg_write_nstring(msg, len) \ - SLsmg_write_nstring((char *)msg, len) + SLsmg_write_nstring((char *)(msg), len) #define sltt_set_color(obj, name, fg, bg) \ - SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg) + SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg)) #else #define slsmg_printf SLsmg_printf #define slsmg_write_nstring SLsmg_write_nstring -- cgit v0.10.2 From fe4b04fa31a6dcf4358aa84cf81e5a7fd079469b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Feb 2011 13:19:09 +0100 Subject: perf: Cure task_oncpu_function_call() races Oleg reported that on architectures with __ARCH_WANT_INTERRUPTS_ON_CTXSW the IPI from task_oncpu_function_call() can land before perf_event_task_sched_in() and cause interesting situations for eg. perf_install_in_context(). This patch reworks the task_oncpu_function_call() interface to give a more usable primitive as well as rework all its users to hopefully be more obvious as well as remove the races. While looking at the code I also found a number of races against perf_event_task_sched_out() which can flip contexts between tasks so plug those too. Reported-and-reviewed-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index d747f94..0b40ee3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2578,13 +2578,6 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -/* - * Call the function if the target task is executing on a CPU right now: - */ -extern void task_oncpu_function_call(struct task_struct *p, - void (*func) (void *info), void *info); - - #ifdef CONFIG_MM_OWNER extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 126a302..7d3faa2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -38,6 +38,79 @@ #include +struct remote_function_call { + struct task_struct *p; + int (*func)(void *info); + void *info; + int ret; +}; + +static void remote_function(void *data) +{ + struct remote_function_call *tfc = data; + struct task_struct *p = tfc->p; + + if (p) { + tfc->ret = -EAGAIN; + if (task_cpu(p) != smp_processor_id() || !task_curr(p)) + return; + } + + tfc->ret = tfc->func(tfc->info); +} + +/** + * task_function_call - call a function on the cpu on which a task runs + * @p: the task to evaluate + * @func: the function to be called + * @info: the function call argument + * + * Calls the function @func when the task is currently running. This might + * be on the current CPU, which just calls the function directly + * + * returns: @func return value, or + * -ESRCH - when the process isn't running + * -EAGAIN - when the process moved away + */ +static int +task_function_call(struct task_struct *p, int (*func) (void *info), void *info) +{ + struct remote_function_call data = { + .p = p, + .func = func, + .info = info, + .ret = -ESRCH, /* No such (running) process */ + }; + + if (task_curr(p)) + smp_call_function_single(task_cpu(p), remote_function, &data, 1); + + return data.ret; +} + +/** + * cpu_function_call - call a function on the cpu + * @func: the function to be called + * @info: the function call argument + * + * Calls the function @func on the remote cpu. + * + * returns: @func return value or -ENXIO when the cpu is offline + */ +static int cpu_function_call(int cpu, int (*func) (void *info), void *info) +{ + struct remote_function_call data = { + .p = NULL, + .func = func, + .info = info, + .ret = -ENXIO, /* No such CPU */ + }; + + smp_call_function_single(cpu, remote_function, &data, 1); + + return data.ret; +} + enum event_type_t { EVENT_FLEXIBLE = 0x1, EVENT_PINNED = 0x2, @@ -254,7 +327,6 @@ static void perf_unpin_context(struct perf_event_context *ctx) raw_spin_lock_irqsave(&ctx->lock, flags); --ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); - put_ctx(ctx); } /* @@ -618,35 +690,24 @@ __get_cpu_context(struct perf_event_context *ctx) * We disable the event on the hardware level first. After that we * remove it from the context list. */ -static void __perf_event_remove_from_context(void *info) +static int __perf_remove_from_context(void *info) { struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - raw_spin_lock(&ctx->lock); - event_sched_out(event, cpuctx, ctx); - list_del_event(event, ctx); - raw_spin_unlock(&ctx->lock); + + return 0; } /* * Remove the event from a task's (or a CPU's) list of events. * - * Must be called with ctx->mutex held. - * * CPU events are removed with a smp call. For task events we only * call when the task is on a CPU. * @@ -657,49 +718,48 @@ static void __perf_event_remove_from_context(void *info) * When called from perf_event_exit_task, it's OK because the * context has been detached from its task. */ -static void perf_event_remove_from_context(struct perf_event *event) +static void perf_remove_from_context(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; + lockdep_assert_held(&ctx->mutex); + if (!task) { /* * Per cpu events are removed via an smp call and * the removal is always successful. */ - smp_call_function_single(event->cpu, - __perf_event_remove_from_context, - event, 1); + cpu_function_call(event->cpu, __perf_remove_from_context, event); return; } retry: - task_oncpu_function_call(task, __perf_event_remove_from_context, - event); + if (!task_function_call(task, __perf_remove_from_context, event)) + return; raw_spin_lock_irq(&ctx->lock); /* - * If the context is active we need to retry the smp call. + * If we failed to find a running task, but find the context active now + * that we've acquired the ctx->lock, retry. */ - if (ctx->nr_active && !list_empty(&event->group_entry)) { + if (ctx->is_active) { raw_spin_unlock_irq(&ctx->lock); goto retry; } /* - * The lock prevents that this context is scheduled in so we - * can remove the event safely, if the call above did not - * succeed. + * Since the task isn't running, its safe to remove the event, us + * holding the ctx->lock ensures the task won't get scheduled in. */ - if (!list_empty(&event->group_entry)) - list_del_event(event, ctx); + list_del_event(event, ctx); raw_spin_unlock_irq(&ctx->lock); } /* * Cross CPU call to disable a performance event */ -static void __perf_event_disable(void *info) +static int __perf_event_disable(void *info) { struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; @@ -708,9 +768,12 @@ static void __perf_event_disable(void *info) /* * If this is a per-task event, need to check whether this * event's task is the current task on this cpu. + * + * Can trigger due to concurrent perf_event_context_sched_out() + * flipping contexts around. */ if (ctx->task && cpuctx->task_ctx != ctx) - return; + return -EINVAL; raw_spin_lock(&ctx->lock); @@ -729,6 +792,8 @@ static void __perf_event_disable(void *info) } raw_spin_unlock(&ctx->lock); + + return 0; } /* @@ -753,13 +818,13 @@ void perf_event_disable(struct perf_event *event) /* * Disable the event on the cpu that it's on */ - smp_call_function_single(event->cpu, __perf_event_disable, - event, 1); + cpu_function_call(event->cpu, __perf_event_disable, event); return; } retry: - task_oncpu_function_call(task, __perf_event_disable, event); + if (!task_function_call(task, __perf_event_disable, event)) + return; raw_spin_lock_irq(&ctx->lock); /* @@ -767,6 +832,11 @@ retry: */ if (event->state == PERF_EVENT_STATE_ACTIVE) { raw_spin_unlock_irq(&ctx->lock); + /* + * Reload the task pointer, it might have been changed by + * a concurrent perf_event_context_sched_out(). + */ + task = ctx->task; goto retry; } @@ -778,7 +848,6 @@ retry: update_group_times(event); event->state = PERF_EVENT_STATE_OFF; } - raw_spin_unlock_irq(&ctx->lock); } @@ -928,12 +997,14 @@ static void add_event_to_ctx(struct perf_event *event, event->tstamp_stopped = tstamp; } +static void perf_event_context_sched_in(struct perf_event_context *ctx); + /* * Cross CPU call to install and enable a performance event * * Must be called with ctx->mutex held */ -static void __perf_install_in_context(void *info) +static int __perf_install_in_context(void *info) { struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; @@ -942,17 +1013,12 @@ static void __perf_install_in_context(void *info) int err; /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - * Or possibly this is the right context but it isn't - * on this cpu because it had no events. + * In case we're installing a new context to an already running task, + * could also happen before perf_event_task_sched_in() on architectures + * which do context switches with IRQs enabled. */ - if (ctx->task && cpuctx->task_ctx != ctx) { - if (cpuctx->task_ctx || ctx->task != current) - return; - cpuctx->task_ctx = ctx; - } + if (ctx->task && !cpuctx->task_ctx) + perf_event_context_sched_in(ctx); raw_spin_lock(&ctx->lock); ctx->is_active = 1; @@ -997,6 +1063,8 @@ static void __perf_install_in_context(void *info) unlock: raw_spin_unlock(&ctx->lock); + + return 0; } /* @@ -1008,8 +1076,6 @@ unlock: * If the event is attached to a task which is on a CPU we use a smp * call to enable it in the task context. The task might have been * scheduled away, but we check this in the smp call again. - * - * Must be called with ctx->mutex held. */ static void perf_install_in_context(struct perf_event_context *ctx, @@ -1018,6 +1084,8 @@ perf_install_in_context(struct perf_event_context *ctx, { struct task_struct *task = ctx->task; + lockdep_assert_held(&ctx->mutex); + event->ctx = ctx; if (!task) { @@ -1025,31 +1093,29 @@ perf_install_in_context(struct perf_event_context *ctx, * Per cpu events are installed via an smp call and * the install is always successful. */ - smp_call_function_single(cpu, __perf_install_in_context, - event, 1); + cpu_function_call(cpu, __perf_install_in_context, event); return; } retry: - task_oncpu_function_call(task, __perf_install_in_context, - event); + if (!task_function_call(task, __perf_install_in_context, event)) + return; raw_spin_lock_irq(&ctx->lock); /* - * we need to retry the smp call. + * If we failed to find a running task, but find the context active now + * that we've acquired the ctx->lock, retry. */ - if (ctx->is_active && list_empty(&event->group_entry)) { + if (ctx->is_active) { raw_spin_unlock_irq(&ctx->lock); goto retry; } /* - * The lock prevents that this context is scheduled in so we - * can add the event safely, if it the call above did not - * succeed. + * Since the task isn't running, its safe to add the event, us holding + * the ctx->lock ensures the task won't get scheduled in. */ - if (list_empty(&event->group_entry)) - add_event_to_ctx(event, ctx); + add_event_to_ctx(event, ctx); raw_spin_unlock_irq(&ctx->lock); } @@ -1078,7 +1144,7 @@ static void __perf_event_mark_enabled(struct perf_event *event, /* * Cross CPU call to enable a performance event */ -static void __perf_event_enable(void *info) +static int __perf_event_enable(void *info) { struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; @@ -1086,18 +1152,10 @@ static void __perf_event_enable(void *info) struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; - /* - * If this is a per-task event, need to check whether this - * event's task is the current task on this cpu. - */ - if (ctx->task && cpuctx->task_ctx != ctx) { - if (cpuctx->task_ctx || ctx->task != current) - return; - cpuctx->task_ctx = ctx; - } + if (WARN_ON_ONCE(!ctx->is_active)) + return -EINVAL; raw_spin_lock(&ctx->lock); - ctx->is_active = 1; update_context_time(ctx); if (event->state >= PERF_EVENT_STATE_INACTIVE) @@ -1138,6 +1196,8 @@ static void __perf_event_enable(void *info) unlock: raw_spin_unlock(&ctx->lock); + + return 0; } /* @@ -1158,8 +1218,7 @@ void perf_event_enable(struct perf_event *event) /* * Enable the event on the cpu that it's on */ - smp_call_function_single(event->cpu, __perf_event_enable, - event, 1); + cpu_function_call(event->cpu, __perf_event_enable, event); return; } @@ -1178,8 +1237,15 @@ void perf_event_enable(struct perf_event *event) event->state = PERF_EVENT_STATE_OFF; retry: + if (!ctx->is_active) { + __perf_event_mark_enabled(event, ctx); + goto out; + } + raw_spin_unlock_irq(&ctx->lock); - task_oncpu_function_call(task, __perf_event_enable, event); + + if (!task_function_call(task, __perf_event_enable, event)) + return; raw_spin_lock_irq(&ctx->lock); @@ -1187,15 +1253,14 @@ retry: * If the context is active and the event is still off, * we need to retry the cross-call. */ - if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) + if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) { + /* + * task could have been flipped by a concurrent + * perf_event_context_sched_out() + */ + task = ctx->task; goto retry; - - /* - * Since we have the lock this context can't be scheduled - * in, so we can change the state safely. - */ - if (event->state == PERF_EVENT_STATE_OFF) - __perf_event_mark_enabled(event, ctx); + } out: raw_spin_unlock_irq(&ctx->lock); @@ -1339,8 +1404,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, } } -void perf_event_context_sched_out(struct task_struct *task, int ctxn, - struct task_struct *next) +static void perf_event_context_sched_out(struct task_struct *task, int ctxn, + struct task_struct *next) { struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; struct perf_event_context *next_ctx; @@ -1533,7 +1598,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx, { struct perf_cpu_context *cpuctx; - cpuctx = __get_cpu_context(ctx); + cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; @@ -1541,7 +1606,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx, cpuctx->task_ctx = ctx; } -void perf_event_context_sched_in(struct perf_event_context *ctx) +static void perf_event_context_sched_in(struct perf_event_context *ctx) { struct perf_cpu_context *cpuctx; @@ -1627,7 +1692,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) * Reduce accuracy by one bit such that @a and @b converge * to a similar magnitude. */ -#define REDUCE_FLS(a, b) \ +#define REDUCE_FLS(a, b) \ do { \ if (a##_fls > b##_fls) { \ a >>= 1; \ @@ -2213,6 +2278,9 @@ errout: } +/* + * Returns a matching context with refcount and pincount. + */ static struct perf_event_context * find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) { @@ -2237,6 +2305,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; get_ctx(ctx); + ++ctx->pin_count; return ctx; } @@ -2250,6 +2319,7 @@ retry: ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { unclone_ctx(ctx); + ++ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); } @@ -2271,8 +2341,10 @@ retry: err = -ESRCH; else if (task->perf_event_ctxp[ctxn]) err = -EAGAIN; - else + else { + ++ctx->pin_count; rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); + } mutex_unlock(&task->perf_event_mutex); if (unlikely(err)) { @@ -5950,10 +6022,10 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_context *gctx = group_leader->ctx; mutex_lock(&gctx->mutex); - perf_event_remove_from_context(group_leader); + perf_remove_from_context(group_leader); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { - perf_event_remove_from_context(sibling); + perf_remove_from_context(sibling); put_ctx(gctx); } mutex_unlock(&gctx->mutex); @@ -5976,6 +6048,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_install_in_context(ctx, event, cpu); ++ctx->generation; + perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); event->owner = current; @@ -6001,6 +6074,7 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; err_context: + perf_unpin_context(ctx); put_ctx(ctx); err_alloc: free_event(event); @@ -6051,6 +6125,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, mutex_lock(&ctx->mutex); perf_install_in_context(ctx, event, cpu); ++ctx->generation; + perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); return event; @@ -6104,7 +6179,7 @@ __perf_event_exit_task(struct perf_event *child_event, { struct perf_event *parent_event; - perf_event_remove_from_context(child_event); + perf_remove_from_context(child_event); parent_event = child_event->parent; /* @@ -6411,7 +6486,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, return 0; } - child_ctx = child->perf_event_ctxp[ctxn]; + child_ctx = child->perf_event_ctxp[ctxn]; if (!child_ctx) { /* * This is executed from the parent task context, so @@ -6526,6 +6601,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) mutex_unlock(&parent_ctx->mutex); perf_unpin_context(parent_ctx); + put_ctx(parent_ctx); return ret; } @@ -6595,9 +6671,9 @@ static void __perf_event_exit_context(void *__info) perf_pmu_rotate_stop(ctx->pmu); list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) - __perf_event_remove_from_context(event); + __perf_remove_from_context(event); list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) - __perf_event_remove_from_context(event); + __perf_remove_from_context(event); } static void perf_event_exit_cpu_context(int cpu) diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4..31cb5d5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2265,27 +2265,6 @@ void kick_process(struct task_struct *p) EXPORT_SYMBOL_GPL(kick_process); #endif /* CONFIG_SMP */ -/** - * task_oncpu_function_call - call a function on the cpu on which a task runs - * @p: the task to evaluate - * @func: the function to be called - * @info: the function call argument - * - * Calls the function @func when the task is currently running. This might - * be on the current CPU, which just calls the function directly - */ -void task_oncpu_function_call(struct task_struct *p, - void (*func) (void *info), void *info) -{ - int cpu; - - preempt_disable(); - cpu = task_cpu(p); - if (task_curr(p)) - smp_call_function_single(cpu, func, info, 1); - preempt_enable(); -} - #ifdef CONFIG_SMP /* * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. @@ -2776,9 +2755,12 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + sched_info_switch(prev, next); + perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); + trace_sched_switch(prev, next); } /** @@ -2911,7 +2893,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - trace_sched_switch(prev, next); + mm = next->mm; oldmm = prev->active_mm; /* @@ -3989,9 +3971,6 @@ need_resched_nonpreemptible: rq->skip_clock_update = 0; if (likely(prev != next)) { - sched_info_switch(prev, next); - perf_event_task_sched_out(prev, next); - rq->nr_switches++; rq->curr = next; ++*switch_count; -- cgit v0.10.2 From 764328d3209dd81b02a55722556b07b6f35e3ca0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Feb 2011 07:33:24 -0200 Subject: perf top: Remove superfluous name_len field From the sym_entry struct, struct symbol already has this field. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 104de9a..154e088 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -787,9 +787,6 @@ static int symbol_filter(struct map *map, struct symbol *sym) } } - if (!syme->skip) - syme->name_len = strlen(sym->name); - return 0; } diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 1d2e265..70a9c13 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -200,6 +200,7 @@ void perf_top__find_widths(struct perf_top *top, struct rb_root *root, for (nd = rb_first(root); nd; nd = rb_next(nd)) { struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); + struct symbol *sym = sym_entry__symbol(syme); if (++printed > top->print_entries || (int)syme->snap_count < top->count_filter) @@ -211,7 +212,7 @@ void perf_top__find_widths(struct perf_top *top, struct rb_root *root, if (syme->map->dso->short_name_len > *dso_short_width) *dso_short_width = syme->map->dso->short_name_len; - if (syme->name_len > *sym_width) - *sym_width = syme->name_len; + if (sym->namelen > *sym_width) + *sym_width = sym->namelen; } } diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 611370f..5009508 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -31,7 +31,6 @@ struct sym_entry { unsigned long snap_count; double weight; int skip; - u16 name_len; u8 origin; struct map *map; struct sym_entry_source *src; -- cgit v0.10.2 From 78f7defedbb4da73b9a07635c357c1afcaa55c8f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Feb 2011 09:45:46 -0200 Subject: perf annotate: Move annotate functions to util/ They will be used by perf top, so that we have just one set of routines to do annotation. Rename "struct sym_priv" to "struct annotation", etc, to clarify this code a bit. Rename "struct sym_ext" to "struct source_line", to give it a meaningful name, that clarifies that it is a the result of an addr2line call, that is sorted by percentage one particular source code line appeared in the annotation. And since we're moving things around also rename 'sym_hist->ip' to 'sym_hist->addr' as we want to do data structure annotation at some point. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 4c9499c..be3eb1d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -401,6 +401,7 @@ LIB_H += util/include/dwarf-regs.h LIB_H += util/include/asm/dwarf2.h LIB_H += util/include/asm/cpufeature.h LIB_H += perf.h +LIB_H += util/annotate.h LIB_H += util/cache.h LIB_H += util/callchain.h LIB_H += util/build-id.h @@ -444,6 +445,7 @@ LIB_H += $(ARCH_INCLUDE) LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o +LIB_OBJS += $(OUTPUT)util/annotate.o LIB_OBJS += $(OUTPUT)util/build-id.o LIB_OBJS += $(OUTPUT)util/config.o LIB_OBJS += $(OUTPUT)util/ctype.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cd9dec4..9072ef4 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -9,6 +9,7 @@ #include "util/util.h" +#include "util/util.h" #include "util/color.h" #include #include "util/cache.h" @@ -18,6 +19,7 @@ #include "perf.h" #include "util/debug.h" +#include "util/annotate.h" #include "util/event.h" #include "util/parse-options.h" #include "util/parse-events.h" @@ -79,245 +81,10 @@ static int process_sample_event(union perf_event *event, return 0; } -static int objdump_line__print(struct objdump_line *self, - struct list_head *head, - struct hist_entry *he, u64 len) -{ - struct symbol *sym = he->ms.sym; - static const char *prev_line; - static const char *prev_color; - - if (self->offset != -1) { - const char *path = NULL; - unsigned int hits = 0; - double percent = 0.0; - const char *color; - struct sym_priv *priv = symbol__priv(sym); - struct sym_ext *sym_ext = priv->ext; - struct sym_hist *h = priv->hist; - s64 offset = self->offset; - struct objdump_line *next = objdump__get_next_ip_line(head, self); - - while (offset < (s64)len && - (next == NULL || offset < next->offset)) { - if (sym_ext) { - if (path == NULL) - path = sym_ext[offset].path; - percent += sym_ext[offset].percent; - } else - hits += h->ip[offset]; - - ++offset; - } - - if (sym_ext == NULL && h->sum) - percent = 100.0 * hits / h->sum; - - color = get_percent_color(percent); - - /* - * Also color the filename and line if needed, with - * the same color than the percentage. Don't print it - * twice for close colored ip with the same filename:line - */ - if (path) { - if (!prev_line || strcmp(prev_line, path) - || color != prev_color) { - color_fprintf(stdout, color, " %s", path); - prev_line = path; - prev_color = color; - } - } - - color_fprintf(stdout, color, " %7.2f", percent); - printf(" : "); - color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line); - } else { - if (!*self->line) - printf(" :\n"); - else - printf(" : %s\n", self->line); - } - - return 0; -} - -static struct rb_root root_sym_ext; - -static void insert_source_line(struct sym_ext *sym_ext) -{ - struct sym_ext *iter; - struct rb_node **p = &root_sym_ext.rb_node; - struct rb_node *parent = NULL; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct sym_ext, node); - - if (sym_ext->percent > iter->percent) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&sym_ext->node, parent, p); - rb_insert_color(&sym_ext->node, &root_sym_ext); -} - -static void free_source_line(struct hist_entry *he, int len) -{ - struct sym_priv *priv = symbol__priv(he->ms.sym); - struct sym_ext *sym_ext = priv->ext; - int i; - - if (!sym_ext) - return; - - for (i = 0; i < len; i++) - free(sym_ext[i].path); - free(sym_ext); - - priv->ext = NULL; - root_sym_ext = RB_ROOT; -} - -/* Get the filename:line for the colored entries */ -static void -get_source_line(struct hist_entry *he, int len, const char *filename) -{ - struct symbol *sym = he->ms.sym; - u64 start; - int i; - char cmd[PATH_MAX * 2]; - struct sym_ext *sym_ext; - struct sym_priv *priv = symbol__priv(sym); - struct sym_hist *h = priv->hist; - - if (!h->sum) - return; - - sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext)); - if (!priv->ext) - return; - - start = he->ms.map->unmap_ip(he->ms.map, sym->start); - - for (i = 0; i < len; i++) { - char *path = NULL; - size_t line_len; - u64 offset; - FILE *fp; - - sym_ext[i].percent = 100.0 * h->ip[i] / h->sum; - if (sym_ext[i].percent <= 0.5) - continue; - - offset = start + i; - sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset); - fp = popen(cmd, "r"); - if (!fp) - continue; - - if (getline(&path, &line_len, fp) < 0 || !line_len) - goto next; - - sym_ext[i].path = malloc(sizeof(char) * line_len + 1); - if (!sym_ext[i].path) - goto next; - - strcpy(sym_ext[i].path, path); - insert_source_line(&sym_ext[i]); - - next: - pclose(fp); - } -} - -static void print_summary(const char *filename) -{ - struct sym_ext *sym_ext; - struct rb_node *node; - - printf("\nSorted summary for file %s\n", filename); - printf("----------------------------------------------\n\n"); - - if (RB_EMPTY_ROOT(&root_sym_ext)) { - printf(" Nothing higher than %1.1f%%\n", MIN_GREEN); - return; - } - - node = rb_first(&root_sym_ext); - while (node) { - double percent; - const char *color; - char *path; - - sym_ext = rb_entry(node, struct sym_ext, node); - percent = sym_ext->percent; - color = get_percent_color(percent); - path = sym_ext->path; - - color_fprintf(stdout, color, " %7.2f %s", percent, path); - node = rb_next(node); - } -} - -static void hist_entry__print_hits(struct hist_entry *self) -{ - struct symbol *sym = self->ms.sym; - struct sym_priv *priv = symbol__priv(sym); - struct sym_hist *h = priv->hist; - u64 len = sym->end - sym->start, offset; - - for (offset = 0; offset < len; ++offset) - if (h->ip[offset] != 0) - printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2, - sym->start + offset, h->ip[offset]); - printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum); -} - static int hist_entry__tty_annotate(struct hist_entry *he) { - struct map *map = he->ms.map; - struct dso *dso = map->dso; - struct symbol *sym = he->ms.sym; - const char *filename = dso->long_name, *d_filename; - u64 len; - LIST_HEAD(head); - struct objdump_line *pos, *n; - - if (hist_entry__annotate(he, &head, 0) < 0) - return -1; - - if (full_paths) - d_filename = filename; - else - d_filename = basename(filename); - - len = sym->end - sym->start; - - if (print_line) { - get_source_line(he, len, filename); - print_summary(filename); - } - - printf("\n\n------------------------------------------------\n"); - printf(" Percent | Source code & Disassembly of %s\n", d_filename); - printf("------------------------------------------------\n"); - - if (verbose) - hist_entry__print_hits(he); - - list_for_each_entry_safe(pos, n, &head, node) { - objdump_line__print(pos, &head, he, len); - list_del(&pos->node); - objdump_line__free(pos); - } - - if (print_line) - free_source_line(he, len); - - return 0; + return symbol__tty_annotate(he->ms.sym, he->ms.map, + print_line, full_paths); } static void hists__find_annotations(struct hists *self) @@ -327,13 +94,13 @@ static void hists__find_annotations(struct hists *self) while (nd) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - struct sym_priv *priv; + struct annotation *notes; if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned) goto find_next; - priv = symbol__priv(he->ms.sym); - if (priv->hist == NULL) { + notes = symbol__annotation(he->ms.sym); + if (notes->histogram == NULL) { find_next: if (key == KEY_LEFT) nd = rb_prev(nd); @@ -362,11 +129,11 @@ find_next: nd = rb_next(nd); /* * Since we have a hist_entry per IP for the same - * symbol, free he->ms.sym->hist to signal we already + * symbol, free he->ms.sym->histogram to signal we already * processed this symbol. */ - free(priv->hist); - priv->hist = NULL; + free(notes->histogram); + notes->histogram = NULL; } } } @@ -454,7 +221,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) setup_browser(true); - symbol_conf.priv_size = sizeof(struct sym_priv); + symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = true; if (symbol__init() < 0) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 080937c..91e4cdb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -9,6 +9,7 @@ #include "util/util.h" +#include "util/annotate.h" #include "util/color.h" #include #include "util/cache.h" @@ -508,7 +509,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) * implementation. */ if (use_browser > 0) { - symbol_conf.priv_size = sizeof(struct sym_priv); + symbol_conf.priv_size = sizeof(struct annotation); /* * For searching by name on the "Browse map details". * providing it only in verbose mode not to bloat too diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c new file mode 100644 index 0000000..9b25575 --- /dev/null +++ b/tools/perf/util/annotate.c @@ -0,0 +1,467 @@ +/* + * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Parts came from builtin-annotate.c, see those files for further + * copyright notes. + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "util.h" +#include "build-id.h" +#include "color.h" +#include "cache.h" +#include "symbol.h" +#include "debug.h" +#include "annotate.h" + +static int symbol__alloc_hist(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + const int size = (sizeof(*notes->histogram) + + (sym->end - sym->start) * sizeof(u64)); + + notes->histogram = zalloc(size); + return notes->histogram == NULL ? -1 : 0; +} + +int symbol__inc_addr_samples(struct symbol *sym, struct map *map, u64 addr) +{ + unsigned int sym_size, offset; + struct annotation *notes; + struct sym_hist *h; + + if (!sym || !map) + return 0; + + notes = symbol__annotation(sym); + if (notes->histogram == NULL && symbol__alloc_hist(sym) < 0) + return -ENOMEM; + + sym_size = sym->end - sym->start; + offset = addr - sym->start; + + pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); + + if (offset >= sym_size) + return 0; + + h = notes->histogram; + h->sum++; + h->addr[offset]++; + + pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 + "] => %" PRIu64 "\n", sym->start, sym->name, + addr, addr - sym->start, h->addr[offset]); + return 0; +} + +static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize) +{ + struct objdump_line *self = malloc(sizeof(*self) + privsize); + + if (self != NULL) { + self->offset = offset; + self->line = line; + } + + return self; +} + +void objdump_line__free(struct objdump_line *self) +{ + free(self->line); + free(self); +} + +static void objdump__add_line(struct list_head *head, struct objdump_line *line) +{ + list_add_tail(&line->node, head); +} + +struct objdump_line *objdump__get_next_ip_line(struct list_head *head, + struct objdump_line *pos) +{ + list_for_each_entry_continue(pos, head, node) + if (pos->offset >= 0) + return pos; + + return NULL; +} + +static void objdump_line__print(struct objdump_line *oline, + struct list_head *head, + struct symbol *sym, u64 len) +{ + static const char *prev_line; + static const char *prev_color; + + if (oline->offset != -1) { + const char *path = NULL; + unsigned int hits = 0; + double percent = 0.0; + const char *color; + struct annotation *notes = symbol__annotation(sym); + struct source_line *src_line = notes->src_line; + struct sym_hist *h = notes->histogram; + s64 offset = oline->offset; + struct objdump_line *next = objdump__get_next_ip_line(head, oline); + + while (offset < (s64)len && + (next == NULL || offset < next->offset)) { + if (src_line) { + if (path == NULL) + path = src_line[offset].path; + percent += src_line[offset].percent; + } else + hits += h->addr[offset]; + + ++offset; + } + + if (src_line == NULL && h->sum) + percent = 100.0 * hits / h->sum; + + color = get_percent_color(percent); + + /* + * Also color the filename and line if needed, with + * the same color than the percentage. Don't print it + * twice for close colored addr with the same filename:line + */ + if (path) { + if (!prev_line || strcmp(prev_line, path) + || color != prev_color) { + color_fprintf(stdout, color, " %s", path); + prev_line = path; + prev_color = color; + } + } + + color_fprintf(stdout, color, " %7.2f", percent); + printf(" : "); + color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line); + } else { + if (!*oline->line) + printf(" :\n"); + else + printf(" : %s\n", oline->line); + } +} + +static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, FILE *file, + struct list_head *head, size_t privsize) +{ + struct objdump_line *objdump_line; + char *line = NULL, *tmp, *tmp2, *c; + size_t line_len; + s64 line_ip, offset = -1; + + if (getline(&line, &line_len, file) < 0) + return -1; + + if (!line) + return -1; + + while (line_len != 0 && isspace(line[line_len - 1])) + line[--line_len] = '\0'; + + c = strchr(line, '\n'); + if (c) + *c = 0; + + line_ip = -1; + + /* + * Strip leading spaces: + */ + tmp = line; + while (*tmp) { + if (*tmp != ' ') + break; + tmp++; + } + + if (*tmp) { + /* + * Parse hexa addresses followed by ':' + */ + line_ip = strtoull(tmp, &tmp2, 16); + if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0') + line_ip = -1; + } + + if (line_ip != -1) { + u64 start = map__rip_2objdump(map, sym->start), + end = map__rip_2objdump(map, sym->end); + + offset = line_ip - start; + if (offset < 0 || (u64)line_ip > end) + offset = -1; + } + + objdump_line = objdump_line__new(offset, line, privsize); + if (objdump_line == NULL) { + free(line); + return -1; + } + objdump__add_line(head, objdump_line); + + return 0; +} + +int symbol__annotate(struct symbol *sym, struct map *map, + struct list_head *head, size_t privsize) +{ + struct dso *dso = map->dso; + char *filename = dso__build_id_filename(dso, NULL, 0); + bool free_filename = true; + char command[PATH_MAX * 2]; + FILE *file; + int err = 0; + u64 len; + char symfs_filename[PATH_MAX]; + + if (filename) { + snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", + symbol_conf.symfs, filename); + } + + if (filename == NULL) { + if (dso->has_build_id) { + pr_err("Can't annotate %s: not enough memory\n", + sym->name); + return -ENOMEM; + } + goto fallback; + } else if (readlink(symfs_filename, command, sizeof(command)) < 0 || + strstr(command, "[kernel.kallsyms]") || + access(symfs_filename, R_OK)) { + free(filename); +fallback: + /* + * If we don't have build-ids or the build-id file isn't in the + * cache, or is just a kallsyms file, well, lets hope that this + * DSO is the same as when 'perf record' ran. + */ + filename = dso->long_name; + snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", + symbol_conf.symfs, filename); + free_filename = false; + } + + if (dso->origin == DSO__ORIG_KERNEL) { + if (dso->annotate_warned) + goto out_free_filename; + err = -ENOENT; + dso->annotate_warned = 1; + pr_err("Can't annotate %s: No vmlinux file was found in the " + "path\n", sym->name); + goto out_free_filename; + } + + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, + filename, sym->name, map->unmap_ip(map, sym->start), + map->unmap_ip(map, sym->end)); + + len = sym->end - sym->start; + + pr_debug("annotating [%p] %30s : [%p] %30s\n", + dso, dso->long_name, sym, sym->name); + + snprintf(command, sizeof(command), + "objdump --start-address=0x%016" PRIx64 + " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand", + map__rip_2objdump(map, sym->start), + map__rip_2objdump(map, sym->end), + symfs_filename, filename); + + pr_debug("Executing: %s\n", command); + + file = popen(command, "r"); + if (!file) + goto out_free_filename; + + while (!feof(file)) + if (symbol__parse_objdump_line(sym, map, file, head, privsize) < 0) + break; + + pclose(file); +out_free_filename: + if (free_filename) + free(filename); + return err; +} + +static void insert_source_line(struct rb_root *root, struct source_line *src_line) +{ + struct source_line *iter; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct source_line, node); + + if (src_line->percent > iter->percent) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&src_line->node, parent, p); + rb_insert_color(&src_line->node, root); +} + +static void symbol__free_source_line(struct symbol *sym, int len) +{ + struct annotation *notes = symbol__annotation(sym); + struct source_line *src_line = notes->src_line; + int i; + + for (i = 0; i < len; i++) + free(src_line[i].path); + + free(src_line); + notes->src_line = NULL; +} + +/* Get the filename:line for the colored entries */ +static int symbol__get_source_line(struct symbol *sym, struct map *map, + struct rb_root *root, int len, + const char *filename) +{ + u64 start; + int i; + char cmd[PATH_MAX * 2]; + struct source_line *src_line; + struct annotation *notes = symbol__annotation(sym); + struct sym_hist *h = notes->histogram; + + if (!h->sum) + return 0; + + src_line = notes->src_line = calloc(len, sizeof(struct source_line)); + if (!notes->src_line) + return -1; + + start = map->unmap_ip(map, sym->start); + + for (i = 0; i < len; i++) { + char *path = NULL; + size_t line_len; + u64 offset; + FILE *fp; + + src_line[i].percent = 100.0 * h->addr[i] / h->sum; + if (src_line[i].percent <= 0.5) + continue; + + offset = start + i; + sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset); + fp = popen(cmd, "r"); + if (!fp) + continue; + + if (getline(&path, &line_len, fp) < 0 || !line_len) + goto next; + + src_line[i].path = malloc(sizeof(char) * line_len + 1); + if (!src_line[i].path) + goto next; + + strcpy(src_line[i].path, path); + insert_source_line(root, &src_line[i]); + + next: + pclose(fp); + } + + return 0; +} + +static void print_summary(struct rb_root *root, const char *filename) +{ + struct source_line *src_line; + struct rb_node *node; + + printf("\nSorted summary for file %s\n", filename); + printf("----------------------------------------------\n\n"); + + if (RB_EMPTY_ROOT(root)) { + printf(" Nothing higher than %1.1f%%\n", MIN_GREEN); + return; + } + + node = rb_first(root); + while (node) { + double percent; + const char *color; + char *path; + + src_line = rb_entry(node, struct source_line, node); + percent = src_line->percent; + color = get_percent_color(percent); + path = src_line->path; + + color_fprintf(stdout, color, " %7.2f %s", percent, path); + node = rb_next(node); + } +} + +static void symbol__annotate_hits(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + struct sym_hist *h = notes->histogram; + u64 len = sym->end - sym->start, offset; + + for (offset = 0; offset < len; ++offset) + if (h->addr[offset] != 0) + printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2, + sym->start + offset, h->addr[offset]); + printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum); +} + +int symbol__tty_annotate(struct symbol *sym, struct map *map, bool print_lines, + bool full_paths) +{ + struct dso *dso = map->dso; + const char *filename = dso->long_name, *d_filename; + struct rb_root source_line = RB_ROOT; + struct objdump_line *pos, *n; + LIST_HEAD(head); + u64 len; + + if (symbol__annotate(sym, map, &head, 0) < 0) + return -1; + + if (full_paths) + d_filename = filename; + else + d_filename = basename(filename); + + len = sym->end - sym->start; + + if (print_lines) { + symbol__get_source_line(sym, map, &source_line, len, filename); + print_summary(&source_line, filename); + } + + printf("\n\n------------------------------------------------\n"); + printf(" Percent | Source code & Disassembly of %s\n", d_filename); + printf("------------------------------------------------\n"); + + if (verbose) + symbol__annotate_hits(sym); + + list_for_each_entry_safe(pos, n, &head, node) { + objdump_line__print(pos, &head, sym, len); + list_del(&pos->node); + objdump_line__free(pos); + } + + if (print_lines) + symbol__free_source_line(sym, len); + + return 0; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h new file mode 100644 index 0000000..6e2fbc2 --- /dev/null +++ b/tools/perf/util/annotate.h @@ -0,0 +1,65 @@ +#ifndef __PERF_ANNOTATE_H +#define __PERF_ANNOTATE_H + +#include +#include "types.h" +#include "symbol.h" +#include +#include + +struct objdump_line { + struct list_head node; + s64 offset; + char *line; +}; + +void objdump_line__free(struct objdump_line *self); +struct objdump_line *objdump__get_next_ip_line(struct list_head *head, + struct objdump_line *pos); + +struct sym_hist { + u64 sum; + u64 addr[0]; +}; + +struct source_line { + struct rb_node node; + double percent; + char *path; +}; + +struct annotation { + struct sym_hist *histogram; + struct source_line *src_line; +}; + +struct sannotation { + struct annotation annotation; + struct symbol symbol; +}; + +static inline struct annotation *symbol__annotation(struct symbol *sym) +{ + struct sannotation *a = container_of(sym, struct sannotation, symbol); + return &a->annotation; +} + +int symbol__inc_addr_samples(struct symbol *sym, struct map *map, u64 addr); + +int symbol__annotate(struct symbol *sym, struct map *map, + struct list_head *head, size_t privsize); + +int symbol__tty_annotate(struct symbol *sym, struct map *map, + bool print_lines, bool full_paths); + +#ifdef NO_NEWT_SUPPORT +static inline int symbol__tui_annotate(symbol *sym __used, + struct map *map __used) +{ + return 0; +} +#else +int symbol__tui_annotate(struct symbol *sym, struct map *map); +#endif + +#endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9588780..6d9c92c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,3 +1,4 @@ +#include "annotate.h" #include "util.h" #include "build-id.h" #include "hist.h" @@ -949,225 +950,15 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread) } } -static int symbol__alloc_hist(struct symbol *self) +int hist_entry__inc_addr_samples(struct hist_entry *he, u64 ip) { - struct sym_priv *priv = symbol__priv(self); - const int size = (sizeof(*priv->hist) + - (self->end - self->start) * sizeof(u64)); - - priv->hist = zalloc(size); - return priv->hist == NULL ? -1 : 0; -} - -int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip) -{ - unsigned int sym_size, offset; - struct symbol *sym = self->ms.sym; - struct sym_priv *priv; - struct sym_hist *h; - - if (!sym || !self->ms.map) - return 0; - - priv = symbol__priv(sym); - if (priv->hist == NULL && symbol__alloc_hist(sym) < 0) - return -ENOMEM; - - sym_size = sym->end - sym->start; - offset = ip - sym->start; - - pr_debug3("%s: ip=%#" PRIx64 "\n", __func__, self->ms.map->unmap_ip(self->ms.map, ip)); - - if (offset >= sym_size) - return 0; - - h = priv->hist; - h->sum++; - h->ip[offset]++; - - pr_debug3("%#" PRIx64 " %s: period++ [ip: %#" PRIx64 ", %#" PRIx64 - "] => %" PRIu64 "\n", self->ms.sym->start, self->ms.sym->name, - ip, ip - self->ms.sym->start, h->ip[offset]); - return 0; -} - -static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize) -{ - struct objdump_line *self = malloc(sizeof(*self) + privsize); - - if (self != NULL) { - self->offset = offset; - self->line = line; - } - - return self; -} - -void objdump_line__free(struct objdump_line *self) -{ - free(self->line); - free(self); -} - -static void objdump__add_line(struct list_head *head, struct objdump_line *line) -{ - list_add_tail(&line->node, head); -} - -struct objdump_line *objdump__get_next_ip_line(struct list_head *head, - struct objdump_line *pos) -{ - list_for_each_entry_continue(pos, head, node) - if (pos->offset >= 0) - return pos; - - return NULL; + return symbol__inc_addr_samples(he->ms.sym, he->ms.map, ip); } -static int hist_entry__parse_objdump_line(struct hist_entry *self, FILE *file, - struct list_head *head, size_t privsize) -{ - struct symbol *sym = self->ms.sym; - struct objdump_line *objdump_line; - char *line = NULL, *tmp, *tmp2, *c; - size_t line_len; - s64 line_ip, offset = -1; - - if (getline(&line, &line_len, file) < 0) - return -1; - - if (!line) - return -1; - - while (line_len != 0 && isspace(line[line_len - 1])) - line[--line_len] = '\0'; - - c = strchr(line, '\n'); - if (c) - *c = 0; - - line_ip = -1; - - /* - * Strip leading spaces: - */ - tmp = line; - while (*tmp) { - if (*tmp != ' ') - break; - tmp++; - } - - if (*tmp) { - /* - * Parse hexa addresses followed by ':' - */ - line_ip = strtoull(tmp, &tmp2, 16); - if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0') - line_ip = -1; - } - - if (line_ip != -1) { - u64 start = map__rip_2objdump(self->ms.map, sym->start), - end = map__rip_2objdump(self->ms.map, sym->end); - - offset = line_ip - start; - if (offset < 0 || (u64)line_ip > end) - offset = -1; - } - - objdump_line = objdump_line__new(offset, line, privsize); - if (objdump_line == NULL) { - free(line); - return -1; - } - objdump__add_line(head, objdump_line); - - return 0; -} - -int hist_entry__annotate(struct hist_entry *self, struct list_head *head, +int hist_entry__annotate(struct hist_entry *he, struct list_head *head, size_t privsize) { - struct symbol *sym = self->ms.sym; - struct map *map = self->ms.map; - struct dso *dso = map->dso; - char *filename = dso__build_id_filename(dso, NULL, 0); - bool free_filename = true; - char command[PATH_MAX * 2]; - FILE *file; - int err = 0; - u64 len; - char symfs_filename[PATH_MAX]; - - if (filename) { - snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", - symbol_conf.symfs, filename); - } - - if (filename == NULL) { - if (dso->has_build_id) { - pr_err("Can't annotate %s: not enough memory\n", - sym->name); - return -ENOMEM; - } - goto fallback; - } else if (readlink(symfs_filename, command, sizeof(command)) < 0 || - strstr(command, "[kernel.kallsyms]") || - access(symfs_filename, R_OK)) { - free(filename); -fallback: - /* - * If we don't have build-ids or the build-id file isn't in the - * cache, or is just a kallsyms file, well, lets hope that this - * DSO is the same as when 'perf record' ran. - */ - filename = dso->long_name; - snprintf(symfs_filename, sizeof(symfs_filename), "%s%s", - symbol_conf.symfs, filename); - free_filename = false; - } - - if (dso->origin == DSO__ORIG_KERNEL) { - if (dso->annotate_warned) - goto out_free_filename; - err = -ENOENT; - dso->annotate_warned = 1; - pr_err("Can't annotate %s: No vmlinux file was found in the " - "path\n", sym->name); - goto out_free_filename; - } - - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, - filename, sym->name, map->unmap_ip(map, sym->start), - map->unmap_ip(map, sym->end)); - - len = sym->end - sym->start; - - pr_debug("annotating [%p] %30s : [%p] %30s\n", - dso, dso->long_name, sym, sym->name); - - snprintf(command, sizeof(command), - "objdump --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand", - map__rip_2objdump(map, sym->start), - map__rip_2objdump(map, sym->end), - symfs_filename, filename); - - pr_debug("Executing: %s\n", command); - - file = popen(command, "r"); - if (!file) - goto out_free_filename; - - while (!feof(file)) - if (hist_entry__parse_objdump_line(self, file, head, privsize) < 0) - break; - - pclose(file); -out_free_filename: - if (free_filename) - free(filename); - return err; + return symbol__annotate(he->ms.sym, he->ms.map, head, privsize); } void hists__inc_nr_events(struct hists *self, u32 type) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 889559b..8a201f7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -9,33 +9,6 @@ extern struct callchain_param callchain_param; struct hist_entry; struct addr_location; struct symbol; -struct rb_root; - -struct objdump_line { - struct list_head node; - s64 offset; - char *line; -}; - -void objdump_line__free(struct objdump_line *self); -struct objdump_line *objdump__get_next_ip_line(struct list_head *head, - struct objdump_line *pos); - -struct sym_hist { - u64 sum; - u64 ip[0]; -}; - -struct sym_ext { - struct rb_node node; - double percent; - char *path; -}; - -struct sym_priv { - struct sym_hist *hist; - struct sym_ext *ext; -}; /* * The kernel collects the number of events it couldn't send in a stretch and diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 82b78f9..daa7138 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -1,9 +1,11 @@ #include "../browser.h" #include "../helpline.h" #include "../libslang.h" +#include "../../annotate.h" #include "../../hist.h" #include "../../sort.h" #include "../../symbol.h" +#include "../../annotate.h" static void ui__error_window(const char *fmt, ...) { @@ -66,24 +68,26 @@ static double objdump_line__calc_percent(struct objdump_line *self, if (self->offset != -1) { int len = sym->end - sym->start; unsigned int hits = 0; - struct sym_priv *priv = symbol__priv(sym); - struct sym_ext *sym_ext = priv->ext; - struct sym_hist *h = priv->hist; + struct annotation *notes = symbol__annotation(sym); + struct source_line *src_line = notes->src_line; + struct sym_hist *h = notes->histogram; s64 offset = self->offset; struct objdump_line *next = objdump__get_next_ip_line(head, self); - while (offset < (s64)len && (next == NULL || offset < next->offset)) { - if (sym_ext) { - percent += sym_ext[offset].percent; + if (src_line) { + percent += src_line[offset].percent; } else - hits += h->ip[offset]; + hits += h->addr[offset]; ++offset; } - - if (sym_ext == NULL && h->sum) + /* + * If the percentage wasn't already calculated in + * symbol__get_source_line, do it now: + */ + if (src_line == NULL && h->sum) percent = 100.0 * hits / h->sum; } @@ -136,10 +140,10 @@ static void annotate_browser__set_top(struct annotate_browser *self, static int annotate_browser__run(struct annotate_browser *self) { struct rb_node *nd; - struct hist_entry *he = self->b.priv; + struct symbol *sym = self->b.priv; int key; - if (ui_browser__show(&self->b, he->ms.sym->name, + if (ui_browser__show(&self->b, sym->name, "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) return -1; /* @@ -179,7 +183,12 @@ out: return key; } -int hist_entry__tui_annotate(struct hist_entry *self) +int hist_entry__tui_annotate(struct hist_entry *he) +{ + return symbol__tui_annotate(he->ms.sym, he->ms.map); +} + +int symbol__tui_annotate(struct symbol *sym, struct map *map) { struct objdump_line *pos, *n; struct objdump_line_rb_node *rbpos; @@ -190,18 +199,18 @@ int hist_entry__tui_annotate(struct hist_entry *self) .refresh = ui_browser__list_head_refresh, .seek = ui_browser__list_head_seek, .write = annotate_browser__write, - .priv = self, + .priv = sym, }, }; int ret; - if (self->ms.sym == NULL) + if (sym == NULL) return -1; - if (self->ms.map->dso->annotate_warned) + if (map->dso->annotate_warned) return -1; - if (hist_entry__annotate(self, &head, sizeof(*rbpos)) < 0) { + if (symbol__annotate(sym, map, &head, sizeof(*rbpos)) < 0) { ui__error_window(ui_helpline__last_msg); return -1; } @@ -214,7 +223,7 @@ int hist_entry__tui_annotate(struct hist_entry *self) browser.b.width = line_len; rbpos = objdump_line__rb(pos); rbpos->idx = browser.b.nr_entries++; - rbpos->percent = objdump_line__calc_percent(pos, &head, self->ms.sym); + rbpos->percent = objdump_line__calc_percent(pos, &head, sym); if (rbpos->percent < 0.01) continue; objdump__insert_line(&browser.entries, rbpos); -- cgit v0.10.2 From 2f525d0148ef2734c8a172201e5e1e9167a8a5fd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Feb 2011 13:43:24 -0200 Subject: perf annotate: Support multiple histograms in annotation The perf annotate tool continues aggregating everything on just one histograms, but to support the top model add support for one histogram perf evsel in the evlist. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 9072ef4..f3e4423 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -57,7 +57,18 @@ static int hists__add_entry(struct hists *self, struct addr_location *al) if (he == NULL) return -ENOMEM; - return hist_entry__inc_addr_samples(he, al->addr); + if (he->ms.sym != NULL) { + /* + * All aggregated on the first sym_hist. + */ + struct annotation *notes = symbol__annotation(he->ms.sym); + if (notes->histograms == NULL && symbol__alloc_hist(he->ms.sym, 1) < 0) + return -ENOMEM; + + return hist_entry__inc_addr_samples(he, 0, al->addr); + } + + return 0; } static int process_sample_event(union perf_event *event, @@ -81,9 +92,9 @@ static int process_sample_event(union perf_event *event, return 0; } -static int hist_entry__tty_annotate(struct hist_entry *he) +static int hist_entry__tty_annotate(struct hist_entry *he, int evidx) { - return symbol__tty_annotate(he->ms.sym, he->ms.map, + return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx, print_line, full_paths); } @@ -100,7 +111,7 @@ static void hists__find_annotations(struct hists *self) goto find_next; notes = symbol__annotation(he->ms.sym); - if (notes->histogram == NULL) { + if (notes->histograms == NULL) { find_next: if (key == KEY_LEFT) nd = rb_prev(nd); @@ -110,7 +121,8 @@ find_next: } if (use_browser > 0) { - key = hist_entry__tui_annotate(he); + /* For now all is aggregated on the first */ + key = hist_entry__tui_annotate(he, 0); switch (key) { case KEY_RIGHT: next = rb_next(nd); @@ -125,15 +137,16 @@ find_next: if (next != NULL) nd = next; } else { - hist_entry__tty_annotate(he); + /* For now all is aggregated on the first */ + hist_entry__tty_annotate(he, 0); nd = rb_next(nd); /* * Since we have a hist_entry per IP for the same * symbol, free he->ms.sym->histogram to signal we already * processed this symbol. */ - free(notes->histogram); - notes->histogram = NULL; + free(notes->histograms); + notes->histograms = NULL; } } } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 91e4cdb..de06bf5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -118,8 +118,17 @@ static int perf_session__add_hist_entry(struct perf_session *session, * so we don't allocated the extra space needed because the stdio * code will not use it. */ - if (use_browser > 0) - err = hist_entry__inc_addr_samples(he, al->addr); + if (al->sym != NULL && use_browser > 0) { + /* + * All aggregated on the first sym_hist. + */ + struct annotation *notes = symbol__annotation(he->ms.sym); + if (notes->histograms == NULL && + symbol__alloc_hist(he->ms.sym, 1) < 0) + err = -ENOMEM; + else + err = hist_entry__inc_addr_samples(he, 0, al->addr); + } return err; } @@ -349,7 +358,7 @@ static int __cmd_report(void) } if (use_browser > 0) - hists__tui_browse_tree(&session->hists_tree, help); + hists__tui_browse_tree(&session->hists_tree, help, 0); else hists__tty_browse_tree(&session->hists_tree, help); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 9b25575..7488fe9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -15,44 +15,40 @@ #include "debug.h" #include "annotate.h" -static int symbol__alloc_hist(struct symbol *sym) +int symbol__alloc_hist(struct symbol *sym, int nevents) { struct annotation *notes = symbol__annotation(sym); - const int size = (sizeof(*notes->histogram) + - (sym->end - sym->start) * sizeof(u64)); - notes->histogram = zalloc(size); - return notes->histogram == NULL ? -1 : 0; + notes->sizeof_sym_hist = (sizeof(*notes->histograms) + + (sym->end - sym->start) * sizeof(u64)); + notes->histograms = calloc(nevents, notes->sizeof_sym_hist); + return notes->histograms == NULL ? -1 : 0; } -int symbol__inc_addr_samples(struct symbol *sym, struct map *map, u64 addr) +int symbol__inc_addr_samples(struct symbol *sym, struct map *map, + int evidx, u64 addr) { - unsigned int sym_size, offset; + unsigned offset; struct annotation *notes; struct sym_hist *h; - if (!sym || !map) - return 0; - notes = symbol__annotation(sym); - if (notes->histogram == NULL && symbol__alloc_hist(sym) < 0) + if (notes->histograms == NULL) return -ENOMEM; - sym_size = sym->end - sym->start; - offset = addr - sym->start; - pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); - if (offset >= sym_size) + if (addr >= sym->end) return 0; - h = notes->histogram; + offset = addr - sym->start; + h = annotation__histogram(notes, evidx); h->sum++; h->addr[offset]++; pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 - "] => %" PRIu64 "\n", sym->start, sym->name, - addr, addr - sym->start, h->addr[offset]); + ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name, + addr, addr - sym->start, evidx, h->addr[offset]); return 0; } @@ -90,8 +86,8 @@ struct objdump_line *objdump__get_next_ip_line(struct list_head *head, } static void objdump_line__print(struct objdump_line *oline, - struct list_head *head, - struct symbol *sym, u64 len) + struct list_head *head, struct symbol *sym, + int evidx, u64 len) { static const char *prev_line; static const char *prev_color; @@ -103,7 +99,7 @@ static void objdump_line__print(struct objdump_line *oline, const char *color; struct annotation *notes = symbol__annotation(sym); struct source_line *src_line = notes->src_line; - struct sym_hist *h = notes->histogram; + struct sym_hist *h = annotation__histogram(notes, evidx); s64 offset = oline->offset; struct objdump_line *next = objdump__get_next_ip_line(head, oline); @@ -328,7 +324,7 @@ static void symbol__free_source_line(struct symbol *sym, int len) /* Get the filename:line for the colored entries */ static int symbol__get_source_line(struct symbol *sym, struct map *map, - struct rb_root *root, int len, + int evidx, struct rb_root *root, int len, const char *filename) { u64 start; @@ -336,7 +332,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, char cmd[PATH_MAX * 2]; struct source_line *src_line; struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = notes->histogram; + struct sym_hist *h = annotation__histogram(notes, evidx); if (!h->sum) return 0; @@ -409,10 +405,10 @@ static void print_summary(struct rb_root *root, const char *filename) } } -static void symbol__annotate_hits(struct symbol *sym) +static void symbol__annotate_hits(struct symbol *sym, int evidx) { struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = notes->histogram; + struct sym_hist *h = annotation__histogram(notes, evidx); u64 len = sym->end - sym->start, offset; for (offset = 0; offset < len; ++offset) @@ -422,8 +418,8 @@ static void symbol__annotate_hits(struct symbol *sym) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum); } -int symbol__tty_annotate(struct symbol *sym, struct map *map, bool print_lines, - bool full_paths) +int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, + bool print_lines, bool full_paths) { struct dso *dso = map->dso; const char *filename = dso->long_name, *d_filename; @@ -443,7 +439,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, bool print_lines, len = sym->end - sym->start; if (print_lines) { - symbol__get_source_line(sym, map, &source_line, len, filename); + symbol__get_source_line(sym, map, evidx, &source_line, + len, filename); print_summary(&source_line, filename); } @@ -452,10 +449,10 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, bool print_lines, printf("------------------------------------------------\n"); if (verbose) - symbol__annotate_hits(sym); + symbol__annotate_hits(sym, evidx); list_for_each_entry_safe(pos, n, &head, node) { - objdump_line__print(pos, &head, sym, len); + objdump_line__print(pos, &head, sym, evidx, len); list_del(&pos->node); objdump_line__free(pos); } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6e2fbc2..0a5069ca 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -28,9 +28,21 @@ struct source_line { char *path; }; +/** struct annotation - symbols with hits have this attached as in sannotation + * + * @histogram: Array of addr hit histograms per event being monitored + * @src_line: If 'print_lines' is specified, per source code line percentages + * + * src_line is allocated, percentages calculated and all sorted by percentage + * when the annotation is about to be presented, so the percentages are for + * one of the entries in the histogram array, i.e. for the event/counter being + * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate + * returns. + */ struct annotation { - struct sym_hist *histogram; struct source_line *src_line; + struct sym_hist *histograms; + int sizeof_sym_hist; }; struct sannotation { @@ -38,28 +50,35 @@ struct sannotation { struct symbol symbol; }; +static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) +{ + return ((void *)notes->histograms) + (notes->sizeof_sym_hist * idx); +} + static inline struct annotation *symbol__annotation(struct symbol *sym) { struct sannotation *a = container_of(sym, struct sannotation, symbol); return &a->annotation; } -int symbol__inc_addr_samples(struct symbol *sym, struct map *map, u64 addr); +int symbol__inc_addr_samples(struct symbol *sym, struct map *map, + int evidx, u64 addr); +int symbol__alloc_hist(struct symbol *sym, int nevents); int symbol__annotate(struct symbol *sym, struct map *map, struct list_head *head, size_t privsize); -int symbol__tty_annotate(struct symbol *sym, struct map *map, +int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, bool print_lines, bool full_paths); #ifdef NO_NEWT_SUPPORT static inline int symbol__tui_annotate(symbol *sym __used, - struct map *map __used) + struct map *map __used, int evidx __used) { return 0; } #else -int symbol__tui_annotate(struct symbol *sym, struct map *map); +int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx); #endif #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6d9c92c..bac5ab6 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -950,9 +950,9 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread) } } -int hist_entry__inc_addr_samples(struct hist_entry *he, u64 ip) +int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip) { - return symbol__inc_addr_samples(he->ms.sym, he->ms.map, ip); + return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); } int hist_entry__annotate(struct hist_entry *he, struct list_head *head, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 8a201f7..2c6cdae 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -77,7 +77,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp); size_t hists__fprintf(struct hists *self, struct hists *pair, bool show_displacement, FILE *fp); -int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip); +int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr); int hist_entry__annotate(struct hist_entry *self, struct list_head *head, size_t privsize); @@ -91,18 +91,20 @@ bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); #ifdef NO_NEWT_SUPPORT static inline int hists__browse(struct hists *self __used, const char *helpline __used, - const char *ev_name __used) + const char *ev_name __used, int evidx __used) { return 0; } static inline int hists__tui_browse_tree(struct rb_root *self __used, - const char *help __used) + const char *help __used, + int evidx __used) { return 0; } -static inline int hist_entry__tui_annotate(struct hist_entry *self __used) +static inline int hist_entry__tui_annotate(struct hist_entry *self __used, + int evidx __used) { return 0; } @@ -111,13 +113,13 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used) #else #include int hists__browse(struct hists *self, const char *helpline, - const char *ev_name); -int hist_entry__tui_annotate(struct hist_entry *self); + const char *ev_name, int evidx); +int hist_entry__tui_annotate(struct hist_entry *self, int evidx); #define KEY_LEFT NEWT_KEY_LEFT #define KEY_RIGHT NEWT_KEY_RIGHT -int hists__tui_browse_tree(struct rb_root *self, const char *help); +int hists__tui_browse_tree(struct rb_root *self, const char *help, int evidx); #endif unsigned int hists__sort_list_width(struct hists *self); diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index daa7138..8d8a168 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -61,7 +61,7 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro static double objdump_line__calc_percent(struct objdump_line *self, struct list_head *head, - struct symbol *sym) + struct symbol *sym, int evidx) { double percent = 0.0; @@ -70,7 +70,7 @@ static double objdump_line__calc_percent(struct objdump_line *self, unsigned int hits = 0; struct annotation *notes = symbol__annotation(sym); struct source_line *src_line = notes->src_line; - struct sym_hist *h = notes->histogram; + struct sym_hist *h = annotation__histogram(notes, evidx); s64 offset = self->offset; struct objdump_line *next = objdump__get_next_ip_line(head, self); @@ -183,12 +183,12 @@ out: return key; } -int hist_entry__tui_annotate(struct hist_entry *he) +int hist_entry__tui_annotate(struct hist_entry *he, int evidx) { - return symbol__tui_annotate(he->ms.sym, he->ms.map); + return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx); } -int symbol__tui_annotate(struct symbol *sym, struct map *map) +int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx) { struct objdump_line *pos, *n; struct objdump_line_rb_node *rbpos; @@ -223,7 +223,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map) browser.b.width = line_len; rbpos = objdump_line__rb(pos); rbpos->idx = browser.b.nr_entries++; - rbpos->percent = objdump_line__calc_percent(pos, &head, sym); + rbpos->percent = objdump_line__calc_percent(pos, &head, sym, evidx); if (rbpos->percent < 0.01) continue; objdump__insert_line(&browser.entries, rbpos); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 8642823..294b495 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -797,7 +797,8 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, return printed; } -int hists__browse(struct hists *self, const char *helpline, const char *ev_name) +int hists__browse(struct hists *self, const char *helpline, + const char *ev_name, int evidx) { struct hist_browser *browser = hist_browser__new(self); struct pstack *fstack; @@ -935,7 +936,7 @@ do_annotate: if (he == NULL) continue; - hist_entry__tui_annotate(he); + hist_entry__tui_annotate(he, evidx); } else if (choice == browse_map) map__browse(browser->selection->map); else if (choice == zoom_dso) { @@ -984,7 +985,7 @@ out: return key; } -int hists__tui_browse_tree(struct rb_root *self, const char *help) +int hists__tui_browse_tree(struct rb_root *self, const char *help, int evidx) { struct rb_node *first = rb_first(self), *nd = first, *next; int key = 0; @@ -993,7 +994,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help) struct hists *hists = rb_entry(nd, struct hists, rb_node); const char *ev_name = __event_name(hists->type, hists->config); - key = hists__browse(hists, help, ev_name); + key = hists__browse(hists, help, ev_name, evidx); switch (key) { case NEWT_KEY_TAB: next = rb_next(nd); -- cgit v0.10.2 From d040bd363824f9f0ad6610b91ee6c65f292c066c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 5 Feb 2011 15:37:31 -0200 Subject: perf annotate: Config options for symbol__tty_annotate Max line# that should be printed, minimum percentage filter, just like 'perf top', alas, due to it :-) Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index f3e4423..ea6a116 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -95,7 +95,7 @@ static int process_sample_event(union perf_event *event, static int hist_entry__tty_annotate(struct hist_entry *he, int evidx) { return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx, - print_line, full_paths); + print_line, full_paths, 0, 0); } static void hists__find_annotations(struct hists *self) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7488fe9..072bc8d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -87,7 +87,7 @@ struct objdump_line *objdump__get_next_ip_line(struct list_head *head, static void objdump_line__print(struct objdump_line *oline, struct list_head *head, struct symbol *sym, - int evidx, u64 len) + int evidx, u64 len, int min_pcnt) { static const char *prev_line; static const char *prev_color; @@ -118,6 +118,9 @@ static void objdump_line__print(struct objdump_line *oline, if (src_line == NULL && h->sum) percent = 100.0 * hits / h->sum; + if (percent < min_pcnt) + return; + color = get_percent_color(percent); /* @@ -419,13 +422,15 @@ static void symbol__annotate_hits(struct symbol *sym, int evidx) } int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, - bool print_lines, bool full_paths) + bool print_lines, bool full_paths, int min_pcnt, + int max_lines) { struct dso *dso = map->dso; const char *filename = dso->long_name, *d_filename; struct rb_root source_line = RB_ROOT; struct objdump_line *pos, *n; LIST_HEAD(head); + int printed = 2; u64 len; if (symbol__annotate(sym, map, &head, 0) < 0) @@ -444,7 +449,6 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, print_summary(&source_line, filename); } - printf("\n\n------------------------------------------------\n"); printf(" Percent | Source code & Disassembly of %s\n", d_filename); printf("------------------------------------------------\n"); @@ -452,9 +456,11 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, symbol__annotate_hits(sym, evidx); list_for_each_entry_safe(pos, n, &head, node) { - objdump_line__print(pos, &head, sym, evidx, len); + objdump_line__print(pos, &head, sym, evidx, len, min_pcnt); list_del(&pos->node); objdump_line__free(pos); + if (max_lines && ++printed >= max_lines) + break; } if (print_lines) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0a5069ca..6b70732 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -69,7 +69,8 @@ int symbol__annotate(struct symbol *sym, struct map *map, struct list_head *head, size_t privsize); int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, - bool print_lines, bool full_paths); + bool print_lines, bool full_paths, int min_pcnt, + int max_lines); #ifdef NO_NEWT_SUPPORT static inline int symbol__tui_annotate(symbol *sym __used, -- cgit v0.10.2 From f1e2701de02cff6d988b1dd49960620d5720cb89 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 5 Feb 2011 18:51:38 -0200 Subject: perf annotate: Separate objdump parsing from actual screen rendering Because in 'perf top' we'll need to parse just once and then, as samples come, render multiple times with evolving counter values. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 072bc8d..10cdbad 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -421,21 +421,16 @@ static void symbol__annotate_hits(struct symbol *sym, int evidx) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum); } -int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, - bool print_lines, bool full_paths, int min_pcnt, - int max_lines) +void symbol__annotate_printf(struct symbol *sym, struct map *map, + struct list_head *head, int evidx, bool full_paths, + int min_pcnt, int max_lines) { struct dso *dso = map->dso; const char *filename = dso->long_name, *d_filename; - struct rb_root source_line = RB_ROOT; - struct objdump_line *pos, *n; - LIST_HEAD(head); + struct objdump_line *pos; int printed = 2; u64 len; - if (symbol__annotate(sym, map, &head, 0) < 0) - return -1; - if (full_paths) d_filename = filename; else @@ -443,28 +438,57 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, len = sym->end - sym->start; - if (print_lines) { - symbol__get_source_line(sym, map, evidx, &source_line, - len, filename); - print_summary(&source_line, filename); - } - printf(" Percent | Source code & Disassembly of %s\n", d_filename); printf("------------------------------------------------\n"); if (verbose) symbol__annotate_hits(sym, evidx); - list_for_each_entry_safe(pos, n, &head, node) { - objdump_line__print(pos, &head, sym, evidx, len, min_pcnt); - list_del(&pos->node); - objdump_line__free(pos); + list_for_each_entry(pos, head, node) { + objdump_line__print(pos, head, sym, evidx, len, min_pcnt); if (max_lines && ++printed >= max_lines) break; + + } +} + +void objdump_line_list__purge(struct list_head *head) +{ + struct objdump_line *pos, *n; + + list_for_each_entry_safe(pos, n, head, node) { + list_del(&pos->node); + objdump_line__free(pos); + } +} + +int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, + bool print_lines, bool full_paths, int min_pcnt, + int max_lines) +{ + struct dso *dso = map->dso; + const char *filename = dso->long_name; + struct rb_root source_line = RB_ROOT; + LIST_HEAD(head); + u64 len; + + if (symbol__annotate(sym, map, &head, 0) < 0) + return -1; + + len = sym->end - sym->start; + + if (print_lines) { + symbol__get_source_line(sym, map, evidx, &source_line, + len, filename); + print_summary(&source_line, filename); } + symbol__annotate_printf(sym, map, &head, evidx, full_paths, + min_pcnt, max_lines); if (print_lines) symbol__free_source_line(sym, len); + objdump_line_list__purge(&head); + return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6b70732..53dd92d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -67,6 +67,10 @@ int symbol__alloc_hist(struct symbol *sym, int nevents); int symbol__annotate(struct symbol *sym, struct map *map, struct list_head *head, size_t privsize); +void symbol__annotate_printf(struct symbol *sym, struct map *map, + struct list_head *head, int evidx, bool full_paths, + int min_pcnt, int max_lines); +void objdump_line_list__purge(struct list_head *head); int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, bool print_lines, bool full_paths, int min_pcnt, -- cgit v0.10.2 From 36532461a0f60bb36c5470a0326f7394f19db23c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 6 Feb 2011 14:54:44 -0200 Subject: perf top: Ditch private annotation code, share perf annotate's Next step: Live TUI annotation in perf top, just press enter on a symbol line. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 154e088..716118a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -20,6 +20,7 @@ #include "perf.h" +#include "util/annotate.h" #include "util/cache.h" #include "util/color.h" #include "util/evlist.h" @@ -140,10 +141,7 @@ static int parse_source(struct sym_entry *syme) struct symbol *sym; struct sym_entry_source *source; struct map *map; - FILE *file; - char command[PATH_MAX*2]; - const char *path; - u64 len; + int err = -1; if (!syme) return -1; @@ -162,197 +160,80 @@ static int parse_source(struct sym_entry *syme) if (syme->src == NULL) return -1; pthread_mutex_init(&syme->src->lock, NULL); + INIT_LIST_HEAD(&syme->src->head); } source = syme->src; - if (source->lines) { + if (symbol__annotation(sym)->histograms != NULL) { pthread_mutex_lock(&source->lock); goto out_assign; } - path = map->dso->long_name; - - len = sym->end - sym->start; - - sprintf(command, - "objdump --start-address=%#0*" PRIx64 " --stop-address=%#0*" PRIx64 " -dS %s", - BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start), - BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path); - - file = popen(command, "r"); - if (!file) - return -1; pthread_mutex_lock(&source->lock); - source->lines_tail = &source->lines; - while (!feof(file)) { - struct source_line *src; - size_t dummy = 0; - char *c, *sep; - - src = malloc(sizeof(struct source_line)); - assert(src != NULL); - memset(src, 0, sizeof(struct source_line)); - - if (getline(&src->line, &dummy, file) < 0) - break; - if (!src->line) - break; - - c = strchr(src->line, '\n'); - if (c) - *c = 0; - src->next = NULL; - *source->lines_tail = src; - source->lines_tail = &src->next; - - src->eip = strtoull(src->line, &sep, 16); - if (*sep == ':') - src->eip = map__objdump_2ip(map, src->eip); - else /* this line has no ip info (e.g. source line) */ - src->eip = 0; + if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) { + pr_err("Not enough memory for annotating '%s' symbol!\n", + sym->name); + goto out_unlock; } - pclose(file); + + err = symbol__annotate(sym, syme->map, &source->head, 0); + if (err == 0) { out_assign: sym_filter_entry = syme; + } +out_unlock: pthread_mutex_unlock(&source->lock); - return 0; + return err; } static void __zero_source_counters(struct sym_entry *syme) { - int i; - struct source_line *line; - - line = syme->src->lines; - while (line) { - for (i = 0; i < top.evlist->nr_entries; i++) - line->count[i] = 0; - line = line->next; - } + struct symbol *sym = sym_entry__symbol(syme); + symbol__annotate_zero_histograms(sym); } static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) { - struct source_line *line; - if (syme != sym_filter_entry) return; if (pthread_mutex_trylock(&syme->src->lock)) return; - if (syme->src == NULL || syme->src->source == NULL) - goto out_unlock; - - for (line = syme->src->lines; line; line = line->next) { - /* skip lines without IP info */ - if (line->eip == 0) - continue; - if (line->eip == ip) { - line->count[counter]++; - break; - } - if (line->eip > ip) - break; - } -out_unlock: - pthread_mutex_unlock(&syme->src->lock); -} - -#define PATTERN_LEN (BITS_PER_LONG / 4 + 2) + ip = syme->map->map_ip(syme->map, ip); + symbol__inc_addr_samples(sym_entry__symbol(syme), syme->map, counter, ip); -static void lookup_sym_source(struct sym_entry *syme) -{ - struct symbol *symbol = sym_entry__symbol(syme); - struct source_line *line; - char pattern[PATTERN_LEN + 1]; - - sprintf(pattern, "%0*" PRIx64 " <", BITS_PER_LONG / 4, - map__rip_2objdump(syme->map, symbol->start)); - - pthread_mutex_lock(&syme->src->lock); - for (line = syme->src->lines; line; line = line->next) { - if (memcmp(line->line, pattern, PATTERN_LEN) == 0) { - syme->src->source = line; - break; - } - } pthread_mutex_unlock(&syme->src->lock); } -static void show_lines(struct source_line *queue, int count, int total) -{ - int i; - struct source_line *line; - - line = queue; - for (i = 0; i < count; i++) { - float pcnt = 100.0*(float)line->count[top.sym_counter]/(float)total; - - printf("%8li %4.1f%%\t%s\n", line->count[top.sym_counter], pcnt, line->line); - line = line->next; - } -} - -#define TRACE_COUNT 3 - static void show_details(struct sym_entry *syme) { struct symbol *symbol; - struct source_line *line; - struct source_line *line_queue = NULL; - int displayed = 0; - int line_queue_count = 0, total = 0, more = 0; + int more; if (!syme) return; - if (!syme->src->source) - lookup_sym_source(syme); - - if (!syme->src->source) + symbol = sym_entry__symbol(syme); + if (!syme->src || symbol__annotation(symbol)->histograms == NULL) return; - symbol = sym_entry__symbol(syme); printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name); printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); pthread_mutex_lock(&syme->src->lock); - line = syme->src->source; - while (line) { - total += line->count[top.sym_counter]; - line = line->next; - } - - line = syme->src->source; - while (line) { - float pcnt = 0.0; - - if (!line_queue_count) - line_queue = line; - line_queue_count++; - - if (line->count[top.sym_counter]) - pcnt = 100.0 * line->count[top.sym_counter] / (float)total; - if (pcnt >= (float)sym_pcnt_filter) { - if (displayed <= top.print_entries) - show_lines(line_queue, line_queue_count, total); - else more++; - displayed += line_queue_count; - line_queue_count = 0; - line_queue = NULL; - } else if (line_queue_count > TRACE_COUNT) { - line_queue = line_queue->next; - line_queue_count--; - } - - line->count[top.sym_counter] = top.zero ? 0 : line->count[top.sym_counter] * 7 / 8; - line = line->next; - } + more = symbol__annotate_printf(symbol, syme->map, &syme->src->head, + top.sym_evsel->idx, 0, sym_pcnt_filter, + top.print_entries); + if (top.zero) + symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx); + else + symbol__annotate_decay_histogram(symbol, &syme->src->head, + top.sym_evsel->idx); pthread_mutex_unlock(&syme->src->lock); - if (more) + if (more != 0) printf("%d lines not displayed, maybe increase display entries [e]\n", more); } @@ -1172,7 +1053,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); - symbol_conf.priv_size = (sizeof(struct sym_entry) + + symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) + (top.evlist->nr_entries + 1) * sizeof(unsigned long)); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 10cdbad..2973376 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -22,9 +22,19 @@ int symbol__alloc_hist(struct symbol *sym, int nevents) notes->sizeof_sym_hist = (sizeof(*notes->histograms) + (sym->end - sym->start) * sizeof(u64)); notes->histograms = calloc(nevents, notes->sizeof_sym_hist); + notes->nr_histograms = nevents; return notes->histograms == NULL ? -1 : 0; } +void symbol__annotate_zero_histograms(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + + if (notes->histograms != NULL) + memset(notes->histograms, 0, + notes->nr_histograms * notes->sizeof_sym_hist); +} + int symbol__inc_addr_samples(struct symbol *sym, struct map *map, int evidx, u64 addr) { @@ -85,9 +95,10 @@ struct objdump_line *objdump__get_next_ip_line(struct list_head *head, return NULL; } -static void objdump_line__print(struct objdump_line *oline, - struct list_head *head, struct symbol *sym, - int evidx, u64 len, int min_pcnt) +static int objdump_line__print(struct objdump_line *oline, + struct list_head *head, struct symbol *sym, + int evidx, u64 len, int min_pcnt, + int printed, int max_lines) { static const char *prev_line; static const char *prev_color; @@ -119,7 +130,10 @@ static void objdump_line__print(struct objdump_line *oline, percent = 100.0 * hits / h->sum; if (percent < min_pcnt) - return; + return -1; + + if (printed >= max_lines) + return 1; color = get_percent_color(percent); @@ -140,12 +154,16 @@ static void objdump_line__print(struct objdump_line *oline, color_fprintf(stdout, color, " %7.2f", percent); printf(" : "); color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line); - } else { + } else if (printed >= max_lines) + return 1; + else { if (!*oline->line) printf(" :\n"); else printf(" : %s\n", oline->line); } + + return 0; } static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, FILE *file, @@ -421,14 +439,15 @@ static void symbol__annotate_hits(struct symbol *sym, int evidx) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum); } -void symbol__annotate_printf(struct symbol *sym, struct map *map, - struct list_head *head, int evidx, bool full_paths, - int min_pcnt, int max_lines) +int symbol__annotate_printf(struct symbol *sym, struct map *map, + struct list_head *head, int evidx, bool full_paths, + int min_pcnt, int max_lines) { struct dso *dso = map->dso; const char *filename = dso->long_name, *d_filename; struct objdump_line *pos; int printed = 2; + int more = 0; u64 len; if (full_paths) @@ -445,10 +464,47 @@ void symbol__annotate_printf(struct symbol *sym, struct map *map, symbol__annotate_hits(sym, evidx); list_for_each_entry(pos, head, node) { - objdump_line__print(pos, head, sym, evidx, len, min_pcnt); - if (max_lines && ++printed >= max_lines) + switch (objdump_line__print(pos, head, sym, evidx, len, min_pcnt, + printed, max_lines)) { + case 0: + ++printed; + break; + case 1: + /* filtered by max_lines */ + ++more; break; + case -1: + default: + /* filtered by min_pcnt */ + break; + } + } + + return more; +} +void symbol__annotate_zero_histogram(struct symbol *sym, int evidx) +{ + struct annotation *notes = symbol__annotation(sym); + struct sym_hist *h = annotation__histogram(notes, evidx); + + memset(h, 0, notes->sizeof_sym_hist); +} + +void symbol__annotate_decay_histogram(struct symbol *sym, + struct list_head *head, int evidx) +{ + struct annotation *notes = symbol__annotation(sym); + struct sym_hist *h = annotation__histogram(notes, evidx); + struct objdump_line *pos; + + h->sum = 0; + + list_for_each_entry(pos, head, node) { + if (pos->offset != -1) { + h->addr[pos->offset] = h->addr[pos->offset] * 7 / 8; + h->sum += h->addr[pos->offset]; + } } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 53dd92d..b1253aa 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -42,6 +42,7 @@ struct source_line { struct annotation { struct source_line *src_line; struct sym_hist *histograms; + int nr_histograms; int sizeof_sym_hist; }; @@ -64,12 +65,16 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int symbol__inc_addr_samples(struct symbol *sym, struct map *map, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym, int nevents); +void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, struct list_head *head, size_t privsize); -void symbol__annotate_printf(struct symbol *sym, struct map *map, - struct list_head *head, int evidx, bool full_paths, - int min_pcnt, int max_lines); +int symbol__annotate_printf(struct symbol *sym, struct map *map, + struct list_head *head, int evidx, bool full_paths, + int min_pcnt, int max_lines); +void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); +void symbol__annotate_decay_histogram(struct symbol *sym, + struct list_head *head, int evidx); void objdump_line_list__purge(struct list_head *head); int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 5009508..fe44afb 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -11,17 +11,8 @@ struct perf_evlist; struct perf_evsel; -struct source_line { - u64 eip; - unsigned long count[MAX_COUNTERS]; /* FIXME */ - char *line; - struct source_line *next; -}; - struct sym_entry_source { - struct source_line *source; - struct source_line *lines; - struct source_line **lines_tail; + struct list_head head; pthread_mutex_t lock; }; -- cgit v0.10.2 From 9c56dfeb784a586713f467e2028a127a2a58a238 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 3 Feb 2011 22:10:55 -0600 Subject: perf tools: Makefile: Use $(QUIET_GEN) for perf.so So that we get this: CC /home/acme/git/build/perf/bench/mem-memcpy-x86-64-asm.o GEN perf-archive * GEN /home/acme/git/build/perf/python/perf.so CC /home/acme/git/build/perf/builtin-annotate.o Instead of silently building the python binding. LKML-Reference: <1296890359-22659-1-git-send-email-mfwitten@gmail.com> Signed-off-by: Michael Witten Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index be3eb1d..94f73ab 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -326,7 +326,7 @@ grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) $(OUTPUT)python/perf.so: $(PYRF_OBJS) - @python util/setup.py --quiet build_ext --build-lib='$(OUTPUT)python' \ + $(QUIET_GEN)python util/setup.py --quiet build_ext --build-lib='$(OUTPUT)python' \ --build-temp='$(OUTPUT)python/temp' # # No Perl scripts right now: -- cgit v0.10.2 From ef4d001d79ac4bab6c2d81e9986a42059f877ec3 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Sat, 5 Feb 2011 20:39:38 +0000 Subject: perf top: Use pid_t for target_{pid|tid} Use pid_t data type for target_{pid|tid} vars. Cc: Ingo Molnar LKML-Reference: <20110205203938.GA15328@hera.kernel.org> Signed-off-by: Denis Kirjanov [ committer note: those variables are now in struct perf_top, fixed ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index fe44afb..62e3293 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -46,8 +46,8 @@ struct perf_top { u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; - int display_weighted, freq, rb_entries; - int sym_counter, target_pid, target_tid; + int display_weighted, freq, rb_entries, sym_counter; + pid_t target_pid, target_tid; bool hide_kernel_symbols, hide_user_symbols, zero; const char *cpu_list; struct perf_evsel *sym_evsel; -- cgit v0.10.2 From f50c2169bd054984e976e67e8651d28f3caf6ba3 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Thu, 13 Jan 2011 11:18:30 +0100 Subject: perf probe: Rewrite find_lazy_match_lines() by using getline(3) Acked-by: Masami Hiramatsu Cc: Masami Hiramatsu Cc: lkml LKML-Reference: Signed-off-by: Franck Bui-Huu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 69215bf..46addfb 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1234,51 +1234,38 @@ static int find_probe_point_by_line(struct probe_finder *pf) static int find_lazy_match_lines(struct list_head *head, const char *fname, const char *pat) { - char *fbuf, *p1, *p2; - int fd, line, nlines = -1; - struct stat st; - - fd = open(fname, O_RDONLY); - if (fd < 0) { - pr_warning("Failed to open %s: %s\n", fname, strerror(-fd)); + FILE *fp; + char *line = NULL; + size_t line_len; + ssize_t len; + int count = 0, linenum = 1; + + fp = fopen(fname, "r"); + if (!fp) { + pr_warning("Failed to open %s: %s\n", fname, strerror(errno)); return -errno; } - if (fstat(fd, &st) < 0) { - pr_warning("Failed to get the size of %s: %s\n", - fname, strerror(errno)); - nlines = -errno; - goto out_close; - } - - nlines = -ENOMEM; - fbuf = malloc(st.st_size + 2); - if (fbuf == NULL) - goto out_close; - if (read(fd, fbuf, st.st_size) < 0) { - pr_warning("Failed to read %s: %s\n", fname, strerror(errno)); - nlines = -errno; - goto out_free_fbuf; - } - fbuf[st.st_size] = '\n'; /* Dummy line */ - fbuf[st.st_size + 1] = '\0'; - p1 = fbuf; - line = 1; - nlines = 0; - while ((p2 = strchr(p1, '\n')) != NULL) { - *p2 = '\0'; - if (strlazymatch(p1, pat)) { - line_list__add_line(head, line); - nlines++; + while ((len = getline(&line, &line_len, fp)) > 0) { + + if (line[len - 1] == '\n') + line[len - 1] = '\0'; + + if (strlazymatch(line, pat)) { + line_list__add_line(head, linenum); + count++; } - line++; - p1 = p2 + 1; + linenum++; } -out_free_fbuf: - free(fbuf); -out_close: - close(fd); - return nlines; + + if (ferror(fp)) + count = -errno; + free(line); + fclose(fp); + + if (count == 0) + pr_debug("No matched lines found in %s.\n", fname); + return count; } static int probe_point_lazy_walker(const char *fname, int lineno, @@ -1312,10 +1299,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) /* Matching lazy line pattern */ ret = find_lazy_match_lines(&pf->lcache, pf->fname, pf->pev->point.lazy_line); - if (ret == 0) { - pr_debug("No matched lines found in %s.\n", pf->fname); - return 0; - } else if (ret < 0) + if (ret <= 0) return ret; } -- cgit v0.10.2 From 76022db323dd6d7c6958df3d595f7dedf7a14778 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 4 Feb 2011 21:51:53 +0900 Subject: tracing/kprobes: Cleanup strict_strtol() using code Since strict_strtol() accepts minus digits started with '-', it doesn't need to invert after converting. Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110204125153.9507.49335.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2dec9bc..2088893 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -767,16 +767,15 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t, } break; case '+': /* deref memory */ + arg++; /* Skip '+', because strict_strtol() rejects it. */ case '-': tmp = strchr(arg, '('); if (!tmp) break; *tmp = '\0'; - ret = strict_strtol(arg + 1, 0, &offset); + ret = strict_strtol(arg, 0, &offset); if (ret) break; - if (arg[0] == '-') - offset = -offset; arg = tmp + 1; tmp = strrchr(arg, ')'); if (tmp) { -- cgit v0.10.2 From e3745369986ddcdaa19f70e2d24e658876b97e84 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 4 Feb 2011 21:51:59 +0900 Subject: tracing/kprobes: Support longer (>128 bytes) command Expand command line buffer of kprobe-tracer to 4096 bytes. Reported-by: Arnaldo Carvalho de Melo Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110204125159.9507.20895.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2088893..c6ed886 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1129,7 +1129,7 @@ static int command_trace_probe(const char *buf) return ret; } -#define WRITE_BUFSIZE 128 +#define WRITE_BUFSIZE 4096 static ssize_t probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) -- cgit v0.10.2 From 1ff511e35ed87cc2ebade9e678e4a2fe39b6f9c5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 4 Feb 2011 21:52:05 +0900 Subject: tracing/kprobes: Add bitfield type Add bitfield type for tracing arguments on kprobe-tracer. The syntax of a bitfield type is: b@/ e.g. Accessing 2 bits-width field with 4 bits-offset in 32 bits-width data at 4 bytes offseted from the address pointed by AX register: +4(%ax):b2@4/32 Since the width of container data depends on the arch, so I just added the container-size at the end. Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110204125205.9507.11363.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 5f77d94..6d27ab8 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -42,11 +42,25 @@ Synopsis of kprobe_events +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types - (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported. + (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield + are supported. (*) only for return probe. (**) this is useful for fetching a field of data structures. +Types +----- +Several types are supported for fetch-args. Kprobe tracer will access memory +by given type. Prefix 's' and 'u' means those types are signed and unsigned +respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). +String type is a special type, which fetches a "null-terminated" string from +kernel space. This means it will fail and store NULL if the string container +has been paged out. +Bitfield is another special type, which takes 3 parameters, bit-width, bit- +offset, and container-size (usually 32). The syntax is; + + b@/ + Per-Probe Event Filtering ------------------------- diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c6ed886..ccdc542 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -353,6 +353,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) kfree(data); } +/* Bitfield fetch function */ +struct bitfield_fetch_param { + struct fetch_param orig; + unsigned char hi_shift; + unsigned char low_shift; +}; + +#define DEFINE_FETCH_bitfield(type) \ +static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\ + void *data, void *dest) \ +{ \ + struct bitfield_fetch_param *bprm = data; \ + type buf = 0; \ + call_fetch(&bprm->orig, regs, &buf); \ + if (buf) { \ + buf <<= bprm->hi_shift; \ + buf >>= bprm->low_shift; \ + } \ + *(type *)dest = buf; \ +} +DEFINE_BASIC_FETCH_FUNCS(bitfield) +#define fetch_bitfield_string NULL +#define fetch_bitfield_string_size NULL + +static __kprobes void +free_bitfield_fetch_param(struct bitfield_fetch_param *data) +{ + /* + * Don't check the bitfield itself, because this must be the + * last fetch function. + */ + if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) + free_deref_fetch_param(data->orig.data); + else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) + free_symbol_cache(data->orig.data); + kfree(data); +} /* Default (unsigned long) fetch type */ #define __DEFAULT_FETCH_TYPE(t) u##t #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) @@ -367,6 +404,7 @@ enum { FETCH_MTD_memory, FETCH_MTD_symbol, FETCH_MTD_deref, + FETCH_MTD_bitfield, FETCH_MTD_END, }; @@ -387,6 +425,7 @@ ASSIGN_FETCH_FUNC(retval, ftype), \ ASSIGN_FETCH_FUNC(memory, ftype), \ ASSIGN_FETCH_FUNC(symbol, ftype), \ ASSIGN_FETCH_FUNC(deref, ftype), \ +ASSIGN_FETCH_FUNC(bitfield, ftype), \ } \ } @@ -430,9 +469,33 @@ static const struct fetch_type *find_fetch_type(const char *type) if (!type) type = DEFAULT_FETCH_TYPE_STR; + /* Special case: bitfield */ + if (*type == 'b') { + unsigned long bs; + type = strchr(type, '/'); + if (!type) + goto fail; + type++; + if (strict_strtoul(type, 0, &bs)) + goto fail; + switch (bs) { + case 8: + return find_fetch_type("u8"); + case 16: + return find_fetch_type("u16"); + case 32: + return find_fetch_type("u32"); + case 64: + return find_fetch_type("u64"); + default: + goto fail; + } + } + for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) if (strcmp(type, fetch_type_table[i].name) == 0) return &fetch_type_table[i]; +fail: return NULL; } @@ -586,7 +649,9 @@ error: static void free_probe_arg(struct probe_arg *arg) { - if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) + if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) + free_bitfield_fetch_param(arg->fetch.data); + else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) free_deref_fetch_param(arg->fetch.data); else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) free_symbol_cache(arg->fetch.data); @@ -806,6 +871,41 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t, return ret; } +#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long)) + +/* Bitfield type needs to be parsed into a fetch function */ +static int __parse_bitfield_probe_arg(const char *bf, + const struct fetch_type *t, + struct fetch_param *f) +{ + struct bitfield_fetch_param *bprm; + unsigned long bw, bo; + char *tail; + + if (*bf != 'b') + return 0; + + bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + if (!bprm) + return -ENOMEM; + bprm->orig = *f; + f->fn = t->fetch[FETCH_MTD_bitfield]; + f->data = (void *)bprm; + + bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */ + if (bw == 0 || *tail != '@') + return -EINVAL; + + bf = tail + 1; + bo = simple_strtoul(bf, &tail, 0); + if (tail == bf || *tail != '/') + return -EINVAL; + + bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo); + bprm->low_shift = bprm->hi_shift + bo; + return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0; +} + /* String length checking wrapper */ static int parse_probe_arg(char *arg, struct trace_probe *tp, struct probe_arg *parg, int is_return) @@ -835,6 +935,8 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp, parg->offset = tp->size; tp->size += parg->type->size; ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); + if (ret >= 0) + ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); if (ret >= 0) { parg->fetch_size.fn = get_fetch_size_function(parg->type, parg->fetch.fn); -- cgit v0.10.2 From fb7d0b3cefb80a105f7fd26bbc62e0cbf9192822 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Mon, 24 Jan 2011 11:13:04 -0500 Subject: perf tool: Fix gcc 4.6.0 issues GCC 4.6.0 in Fedora rawhide turned up some compile errors in tools/perf due to the -Werror=unused-but-set-variable flag. I've gone through and annotated some of the assignments that had side effects (ie: return value from a function) with the __used annotation, and in some cases, just removed unused code. In a few cases, we were assigning something useful, but not using it in later parts of the function. kyle@dreadnought:~/src% gcc --version gcc (GCC) 4.6.0 20110122 (Red Hat 4.6.0-0.3) Cc: Ingo Molnar LKML-Reference: <20110124161304.GK27353@bombadil.infradead.org> Signed-off-by: Kyle McMartin [ committer note: Fixed up the annotation fixes, as that code moved recently ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index d9ab3ce..0c7454f 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -55,7 +55,7 @@ int bench_sched_pipe(int argc, const char **argv, * discarding returned value of read(), write() * causes error in building environment for perf */ - int ret, wait_stat; + int __used ret, wait_stat; pid_t pid, retpid; argc = parse_options(argc, argv, options, diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index ae26211..a32f411 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -369,11 +369,6 @@ static void process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) { int ret = 0; - u64 now; - long long delta; - - now = get_nsecs(); - delta = start_time + atom->timestamp - now; switch (atom->type) { case SCHED_EVENT_RUN: @@ -562,7 +557,7 @@ static void wait_for_tasks(void) static void run_one_test(void) { - u64 T0, T1, delta, avg_delta, fluct, std_dev; + u64 T0, T1, delta, avg_delta, fluct; T0 = get_nsecs(); wait_for_tasks(); @@ -578,7 +573,6 @@ static void run_one_test(void) else fluct = delta - avg_delta; sum_fluct += fluct; - std_dev = sum_fluct / nr_runs / sqrt(nr_runs); if (!run_avg) run_avg = delta; run_avg = (run_avg*9 + delta)/10; @@ -799,7 +793,7 @@ replay_switch_event(struct trace_switch_event *switch_event, u64 timestamp, struct thread *thread __used) { - struct task_desc *prev, *next; + struct task_desc *prev, __used *next; u64 timestamp0; s64 delta; @@ -1404,7 +1398,7 @@ map_switch_event(struct trace_switch_event *switch_event, u64 timestamp, struct thread *thread __used) { - struct thread *sched_out, *sched_in; + struct thread *sched_out __used, *sched_in; int new_shortname; u64 timestamp0; s64 delta; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 716118a..b790673 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -865,7 +865,7 @@ static int __cmd_top(void) { pthread_t thread; struct perf_evsel *first; - int ret; + int ret __used; /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 2973376..1012841 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -236,7 +236,6 @@ int symbol__annotate(struct symbol *sym, struct map *map, char command[PATH_MAX * 2]; FILE *file; int err = 0; - u64 len; char symfs_filename[PATH_MAX]; if (filename) { @@ -281,8 +280,6 @@ fallback: filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); - len = sym->end - sym->start; - pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso->long_name, sym, sym->name); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index c0de5ec..72c124d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1145,7 +1145,7 @@ int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist, { union perf_event ev; ssize_t size = 0, aligned_size = 0, padding; - int err = 0; + int err __used = 0; memset(&ev, 0, sizeof(ev)); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c6d9933..2040b85 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -248,8 +248,7 @@ static void python_process_event(int cpu, void *data, context = PyCObject_FromVoidPtr(scripting_context, NULL); PyTuple_SetItem(t, n++, PyString_FromString(handler_name)); - PyTuple_SetItem(t, n++, - PyCObject_FromVoidPtr(scripting_context, NULL)); + PyTuple_SetItem(t, n++, context); if (handler) { PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 7821d0e..3e193f8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1525,8 +1525,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) symbol_conf.symfs, self->long_name); break; case DSO__ORIG_GUEST_KMODULE: - if (map->groups && map->groups->machine) - root_dir = map->groups->machine->root_dir; + if (map->groups && machine) + root_dir = machine->root_dir; else root_dir = ""; snprintf(name, size, "%s%s%s", symbol_conf.symfs, diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 73a0222..d8e622d 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -153,7 +153,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused) char *next = NULL; char *addr_str; char ch; - int ret; + int ret __used; int i; line = strtok_r(file, "\n", &next); diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c index e515836..8462bff 100644 --- a/tools/perf/util/ui/browsers/map.c +++ b/tools/perf/util/ui/browsers/map.c @@ -41,7 +41,7 @@ static int ui_entry__read(const char *title, char *bf, size_t size, int width) out_free_form: newtPopWindow(); newtFormDestroy(form); - return 0; + return err; } struct map_browser { -- cgit v0.10.2 From a2221796256ea7b236cec6bf027c1c1de5b8ccd7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 7 Feb 2011 15:32:18 +0100 Subject: perf annotate: Fix build error A small fix for when NO_NEWT_SUPPORT is defined. Add a missing "struct" to the function prototype. Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tom Zanussi LKML-Reference: <20110207143218.GA31197@kryptos.osrc.amd.com> Signed-off-by: Borislav Petkov Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index b1253aa..bc08b36 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -82,7 +82,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, int max_lines); #ifdef NO_NEWT_SUPPORT -static inline int symbol__tui_annotate(symbol *sym __used, +static inline int symbol__tui_annotate(struct symbol *sym __used, struct map *map __used, int evidx __used) { return 0; -- cgit v0.10.2 From 124bb83cd7de4d851af7595650233fb9e9279d5d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 4 Feb 2011 21:52:11 +0900 Subject: perf probe: Add bitfield member support Add bitfield member accessing support to probe arguments. Suggested-by: Arnaldo Carvalho de Melo Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <20110204125211.9507.60265.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu [ committer note: Fixed up '%lu' use for return of BYTES_TO_BITS ('%zd') ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 46addfb..fe461f6 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -33,6 +33,7 @@ #include #include +#include #include "event.h" #include "debug.h" #include "util.h" @@ -333,13 +334,23 @@ static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) return vr_die; } -static bool die_is_signed_type(Dwarf_Die *tp_die) +static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, + Dwarf_Word *result) { Dwarf_Attribute attr; + + if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + dwarf_formudata(&attr, result) != 0) + return -ENOENT; + + return 0; +} + +static bool die_is_signed_type(Dwarf_Die *tp_die) +{ Dwarf_Word ret; - if (dwarf_attr(tp_die, DW_AT_encoding, &attr) == NULL || - dwarf_formudata(&attr, &ret) != 0) + if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret)) return false; return (ret == DW_ATE_signed_char || ret == DW_ATE_signed || @@ -348,11 +359,29 @@ static bool die_is_signed_type(Dwarf_Die *tp_die) static int die_get_byte_size(Dwarf_Die *tp_die) { - Dwarf_Attribute attr; Dwarf_Word ret; - if (dwarf_attr(tp_die, DW_AT_byte_size, &attr) == NULL || - dwarf_formudata(&attr, &ret) != 0) + if (die_get_attr_udata(tp_die, DW_AT_byte_size, &ret)) + return 0; + + return (int)ret; +} + +static int die_get_bit_size(Dwarf_Die *tp_die) +{ + Dwarf_Word ret; + + if (die_get_attr_udata(tp_die, DW_AT_bit_size, &ret)) + return 0; + + return (int)ret; +} + +static int die_get_bit_offset(Dwarf_Die *tp_die) +{ + Dwarf_Word ret; + + if (die_get_attr_udata(tp_die, DW_AT_bit_offset, &ret)) return 0; return (int)ret; @@ -827,6 +856,8 @@ static_var: return 0; } +#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long)) + static int convert_variable_type(Dwarf_Die *vr_die, struct probe_trace_arg *tvar, const char *cast) @@ -843,6 +874,14 @@ static int convert_variable_type(Dwarf_Die *vr_die, return (tvar->type == NULL) ? -ENOMEM : 0; } + if (die_get_bit_size(vr_die) != 0) { + /* This is a bitfield */ + ret = snprintf(buf, 16, "b%d@%d/%zd", die_get_bit_size(vr_die), + die_get_bit_offset(vr_die), + BYTES_TO_BITS(die_get_byte_size(vr_die))); + goto formatted; + } + if (die_get_real_type(vr_die, &type) == NULL) { pr_warning("Failed to get a type information of %s.\n", dwarf_diename(vr_die)); @@ -887,29 +926,31 @@ static int convert_variable_type(Dwarf_Die *vr_die, return (tvar->type == NULL) ? -ENOMEM : 0; } - ret = die_get_byte_size(&type) * 8; - if (ret) { - /* Check the bitwidth */ - if (ret > MAX_BASIC_TYPE_BITS) { - pr_info("%s exceeds max-bitwidth." - " Cut down to %d bits.\n", - dwarf_diename(&type), MAX_BASIC_TYPE_BITS); - ret = MAX_BASIC_TYPE_BITS; - } + ret = BYTES_TO_BITS(die_get_byte_size(&type)); + if (!ret) + /* No size ... try to use default type */ + return 0; - ret = snprintf(buf, 16, "%c%d", - die_is_signed_type(&type) ? 's' : 'u', ret); - if (ret < 0 || ret >= 16) { - if (ret >= 16) - ret = -E2BIG; - pr_warning("Failed to convert variable type: %s\n", - strerror(-ret)); - return ret; - } - tvar->type = strdup(buf); - if (tvar->type == NULL) - return -ENOMEM; + /* Check the bitwidth */ + if (ret > MAX_BASIC_TYPE_BITS) { + pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n", + dwarf_diename(&type), MAX_BASIC_TYPE_BITS); + ret = MAX_BASIC_TYPE_BITS; + } + ret = snprintf(buf, 16, "%c%d", + die_is_signed_type(&type) ? 's' : 'u', ret); + +formatted: + if (ret < 0 || ret >= 16) { + if (ret >= 16) + ret = -E2BIG; + pr_warning("Failed to convert variable type: %s\n", + strerror(-ret)); + return ret; } + tvar->type = strdup(buf); + if (tvar->type == NULL) + return -ENOMEM; return 0; } -- cgit v0.10.2 From 6d54057d76e25c91165cda0e6e007f1811faa2be Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 22:33:26 -0500 Subject: tracing/filter: Have no filter return a match The n_preds field of a file can change at anytime, and even can become zero, just as the filter is about to be processed by an event. In the case that is zero on entering the filter, return 1, telling the caller the event matchs and should be trace. Also use a variable and assign it with ACCESS_ONCE() such that the count stays consistent within the function. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 36d4010..7275f03 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -383,9 +383,14 @@ int filter_match_preds(struct event_filter *filter, void *rec) int match, top = 0, val1 = 0, val2 = 0; int stack[MAX_FILTER_PRED]; struct filter_pred *pred; + int n_preds = ACCESS_ONCE(filter->n_preds); int i; - for (i = 0; i < filter->n_preds; i++) { + /* no filter is considered a match */ + if (!n_preds) + return 1; + + for (i = 0; i < n_preds; i++) { pred = filter->preds[i]; if (!pred->pop_n) { match = pred->fn(pred, rec, val1, val2); -- cgit v0.10.2 From 58d9a597c4275d830a819625e7d437cd6fb23fa5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 22:37:09 -0500 Subject: tracing/filter: Move OR and AND logic out of fn() method The ops OR and AND act different from the other ops, as they are the only ones to take other ops as their arguements. These ops als change the logic of the filter_match_preds. By removing the OR and AND fn's we can also remove the val1 and val2 that is passed to all other fn's and are unused. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9021f8c..1597bc0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -677,8 +677,7 @@ struct event_subsystem { struct filter_pred; struct regex; -typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, - int val1, int val2); +typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); typedef int (*regex_match_func)(char *str, struct regex *r, int len); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 7275f03..5d719b3 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -124,8 +124,7 @@ struct filter_parse_state { }; #define DEFINE_COMPARISON_PRED(type) \ -static int filter_pred_##type(struct filter_pred *pred, void *event, \ - int val1, int val2) \ +static int filter_pred_##type(struct filter_pred *pred, void *event) \ { \ type *addr = (type *)(event + pred->offset); \ type val = (type)pred->val; \ @@ -152,8 +151,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event, \ } #define DEFINE_EQUALITY_PRED(size) \ -static int filter_pred_##size(struct filter_pred *pred, void *event, \ - int val1, int val2) \ +static int filter_pred_##size(struct filter_pred *pred, void *event) \ { \ u##size *addr = (u##size *)(event + pred->offset); \ u##size val = (u##size)pred->val; \ @@ -178,23 +176,8 @@ DEFINE_EQUALITY_PRED(32); DEFINE_EQUALITY_PRED(16); DEFINE_EQUALITY_PRED(8); -static int filter_pred_and(struct filter_pred *pred __attribute((unused)), - void *event __attribute((unused)), - int val1, int val2) -{ - return val1 && val2; -} - -static int filter_pred_or(struct filter_pred *pred __attribute((unused)), - void *event __attribute((unused)), - int val1, int val2) -{ - return val1 || val2; -} - /* Filter predicate for fixed sized arrays of characters */ -static int filter_pred_string(struct filter_pred *pred, void *event, - int val1, int val2) +static int filter_pred_string(struct filter_pred *pred, void *event) { char *addr = (char *)(event + pred->offset); int cmp, match; @@ -207,8 +190,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event, } /* Filter predicate for char * pointers */ -static int filter_pred_pchar(struct filter_pred *pred, void *event, - int val1, int val2) +static int filter_pred_pchar(struct filter_pred *pred, void *event) { char **addr = (char **)(event + pred->offset); int cmp, match; @@ -231,8 +213,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event, * and add it to the address of the entry, and at last we have * the address of the string. */ -static int filter_pred_strloc(struct filter_pred *pred, void *event, - int val1, int val2) +static int filter_pred_strloc(struct filter_pred *pred, void *event) { u32 str_item = *(u32 *)(event + pred->offset); int str_loc = str_item & 0xffff; @@ -247,8 +228,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event, return match; } -static int filter_pred_none(struct filter_pred *pred, void *event, - int val1, int val2) +static int filter_pred_none(struct filter_pred *pred, void *event) { return 0; } @@ -380,7 +360,7 @@ static void filter_build_regex(struct filter_pred *pred) /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct event_filter *filter, void *rec) { - int match, top = 0, val1 = 0, val2 = 0; + int match = -1, top = 0, val1 = 0, val2 = 0; int stack[MAX_FILTER_PRED]; struct filter_pred *pred; int n_preds = ACCESS_ONCE(filter->n_preds); @@ -393,7 +373,7 @@ int filter_match_preds(struct event_filter *filter, void *rec) for (i = 0; i < n_preds; i++) { pred = filter->preds[i]; if (!pred->pop_n) { - match = pred->fn(pred, rec, val1, val2); + match = pred->fn(pred, rec); stack[top++] = match; continue; } @@ -403,7 +383,16 @@ int filter_match_preds(struct event_filter *filter, void *rec) } val1 = stack[--top]; val2 = stack[--top]; - match = pred->fn(pred, rec, val1, val2); + switch (pred->op) { + case OP_AND: + match = val1 && val2; + break; + case OP_OR: + match = val1 || val2; + break; + default: + WARN_ONCE(1, "filter op is not AND or OR"); + } stack[top++] = match; } @@ -775,15 +764,13 @@ static int filter_add_pred(struct filter_parse_state *ps, unsigned long long val; int ret; - pred->fn = filter_pred_none; + fn = pred->fn = filter_pred_none; if (pred->op == OP_AND) { pred->pop_n = 2; - fn = filter_pred_and; goto add_pred_fn; } else if (pred->op == OP_OR) { pred->pop_n = 2; - fn = filter_pred_or; goto add_pred_fn; } -- cgit v0.10.2 From c9c53ca03d6f97fdd9832d5ed3f15b30ee5cdb86 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 22:42:43 -0500 Subject: tracing/filter: Dynamically allocate preds For every filter that is made, we create predicates to hold every operation within the filter. We have a max of 32 predicates that we can hold. Currently, we allocate all 32 even if we only need to use one. Part of the reason we do this is that the filter can be used at any moment by any event. Fortunately, the filter is only used with preemption disabled. By reseting the count of preds used "n_preds" to zero, then performing a synchronize_sched(), we can safely free and reallocate a new array of preds. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1597bc0..441fc1b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -661,7 +661,8 @@ struct ftrace_event_field { }; struct event_filter { - int n_preds; + int n_preds; /* Number assigned */ + int a_preds; /* allocated */ struct filter_pred **preds; char *filter_string; }; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 5d719b3..aac6a61 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -362,6 +362,7 @@ int filter_match_preds(struct event_filter *filter, void *rec) { int match = -1, top = 0, val1 = 0, val2 = 0; int stack[MAX_FILTER_PRED]; + struct filter_pred **preds; struct filter_pred *pred; int n_preds = ACCESS_ONCE(filter->n_preds); int i; @@ -370,8 +371,13 @@ int filter_match_preds(struct event_filter *filter, void *rec) if (!n_preds) return 1; + /* + * n_preds and filter->preds is protect with preemption disabled. + */ + preds = rcu_dereference_sched(filter->preds); + for (i = 0; i < n_preds; i++) { - pred = filter->preds[i]; + pred = preds[i]; if (!pred->pop_n) { match = pred->fn(pred, rec); stack[top++] = match; @@ -548,46 +554,55 @@ static int filter_set_pred(struct filter_pred *dest, return 0; } +static void __free_preds(struct event_filter *filter) +{ + int i; + + if (filter->preds) { + for (i = 0; i < filter->a_preds; i++) { + if (filter->preds[i]) + filter_free_pred(filter->preds[i]); + } + kfree(filter->preds); + filter->preds = NULL; + } + filter->a_preds = 0; + filter->n_preds = 0; +} + static void filter_disable_preds(struct ftrace_event_call *call) { struct event_filter *filter = call->filter; int i; call->flags &= ~TRACE_EVENT_FL_FILTERED; + if (filter->preds) { + for (i = 0; i < filter->n_preds; i++) + filter->preds[i]->fn = filter_pred_none; + } filter->n_preds = 0; - - for (i = 0; i < MAX_FILTER_PRED; i++) - filter->preds[i]->fn = filter_pred_none; } -static void __free_preds(struct event_filter *filter) +static void __free_filter(struct event_filter *filter) { - int i; - if (!filter) return; - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (filter->preds[i]) - filter_free_pred(filter->preds[i]); - } - kfree(filter->preds); + __free_preds(filter); kfree(filter->filter_string); kfree(filter); } void destroy_preds(struct ftrace_event_call *call) { - __free_preds(call->filter); + __free_filter(call->filter); call->filter = NULL; call->flags &= ~TRACE_EVENT_FL_FILTERED; } -static struct event_filter *__alloc_preds(void) +static struct event_filter *__alloc_filter(void) { struct event_filter *filter; - struct filter_pred *pred; - int i; filter = kzalloc(sizeof(*filter), GFP_KERNEL); if (!filter) @@ -595,32 +610,63 @@ static struct event_filter *__alloc_preds(void) filter->n_preds = 0; - filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); + return filter; +} + +static int __alloc_preds(struct event_filter *filter, int n_preds) +{ + struct filter_pred *pred; + int i; + + if (filter->preds) { + if (filter->a_preds < n_preds) { + /* We need to reallocate */ + filter->n_preds = 0; + /* + * It is possible that the filter is currently + * being used. We need to zero out the number + * of preds, wait on preemption and then free + * the preds. + */ + synchronize_sched(); + __free_preds(filter); + } + } + + if (!filter->preds) { + filter->preds = + kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL); + filter->a_preds = n_preds; + } if (!filter->preds) - goto oom; + return -ENOMEM; + + if (WARN_ON(filter->a_preds < n_preds)) + return -EINVAL; - for (i = 0; i < MAX_FILTER_PRED; i++) { - pred = kzalloc(sizeof(*pred), GFP_KERNEL); + for (i = 0; i < n_preds; i++) { + pred = filter->preds[i]; + if (!pred) + pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) goto oom; pred->fn = filter_pred_none; filter->preds[i] = pred; } - return filter; - -oom: + return 0; + oom: __free_preds(filter); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } -static int init_preds(struct ftrace_event_call *call) +static int init_filter(struct ftrace_event_call *call) { if (call->filter) return 0; call->flags &= ~TRACE_EVENT_FL_FILTERED; - call->filter = __alloc_preds(); + call->filter = __alloc_filter(); if (IS_ERR(call->filter)) return PTR_ERR(call->filter); @@ -636,7 +682,7 @@ static int init_subsystem_preds(struct event_subsystem *system) if (strcmp(call->class->system, system->name) != 0) continue; - err = init_preds(call); + err = init_filter(call); if (err) return err; } @@ -665,7 +711,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, { int idx, err; - if (filter->n_preds == MAX_FILTER_PRED) { + if (WARN_ON(filter->n_preds == filter->a_preds)) { parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); return -ENOSPC; } @@ -1179,6 +1225,20 @@ static int check_preds(struct filter_parse_state *ps) return 0; } +static int count_preds(struct filter_parse_state *ps) +{ + struct postfix_elt *elt; + int n_preds = 0; + + list_for_each_entry(elt, &ps->postfix, list) { + if (elt->op == OP_NONE) + continue; + n_preds++; + } + + return n_preds; +} + static int replace_preds(struct ftrace_event_call *call, struct event_filter *filter, struct filter_parse_state *ps, @@ -1191,10 +1251,23 @@ static int replace_preds(struct ftrace_event_call *call, int err; int n_preds = 0; + n_preds = count_preds(ps); + if (n_preds >= MAX_FILTER_PRED) { + parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); + return -ENOSPC; + } + err = check_preds(ps); if (err) return err; + if (!dry_run) { + err = __alloc_preds(filter, n_preds); + if (err) + return err; + } + + n_preds = 0; list_for_each_entry(elt, &ps->postfix, list) { if (elt->op == OP_NONE) { if (!operand1) @@ -1208,7 +1281,7 @@ static int replace_preds(struct ftrace_event_call *call, continue; } - if (n_preds++ == MAX_FILTER_PRED) { + if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); return -ENOSPC; } @@ -1283,7 +1356,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) mutex_lock(&event_mutex); - err = init_preds(call); + err = init_filter(call); if (err) goto out_unlock; @@ -1376,7 +1449,7 @@ void ftrace_profile_free_filter(struct perf_event *event) struct event_filter *filter = event->filter; event->filter = NULL; - __free_preds(filter); + __free_filter(filter); } int ftrace_profile_set_filter(struct perf_event *event, int event_id, @@ -1402,7 +1475,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, if (event->filter) goto out_unlock; - filter = __alloc_preds(); + filter = __alloc_filter(); if (IS_ERR(filter)) { err = PTR_ERR(filter); goto out_unlock; @@ -1411,7 +1484,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, err = -ENOMEM; ps = kzalloc(sizeof(*ps), GFP_KERNEL); if (!ps) - goto free_preds; + goto free_filter; parse_init(ps, filter_ops, filter_str); err = filter_parse(ps); @@ -1427,9 +1500,9 @@ free_ps: postfix_clear(ps); kfree(ps); -free_preds: +free_filter: if (err) - __free_preds(filter); + __free_filter(filter); out_unlock: mutex_unlock(&event_mutex); -- cgit v0.10.2 From 0fc3ca9a10a61a77f18710fb708b41fd99c79a56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 22:46:46 -0500 Subject: tracing/filter: Call synchronize_sched() just once for system filters By separating out the reseting of the filter->n_preds to zero from the reallocation of preds for the filter, we can reset groups of filters first, call synchronize_sched() just once, and then reallocate each of the filters in the system group. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index aac6a61..8f00a11 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -570,17 +570,28 @@ static void __free_preds(struct event_filter *filter) filter->n_preds = 0; } +static void reset_preds(struct event_filter *filter) +{ + struct filter_pred *pred; + int n_preds = filter->n_preds; + int i; + + filter->n_preds = 0; + if (!filter->preds) + return; + + for (i = 0; i < n_preds; i++) { + pred = filter->preds[i]; + pred->fn = filter_pred_none; + } +} + static void filter_disable_preds(struct ftrace_event_call *call) { struct event_filter *filter = call->filter; - int i; call->flags &= ~TRACE_EVENT_FL_FILTERED; - if (filter->preds) { - for (i = 0; i < filter->n_preds; i++) - filter->preds[i]->fn = filter_pred_none; - } - filter->n_preds = 0; + reset_preds(filter); } static void __free_filter(struct event_filter *filter) @@ -620,15 +631,17 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) if (filter->preds) { if (filter->a_preds < n_preds) { - /* We need to reallocate */ - filter->n_preds = 0; /* - * It is possible that the filter is currently - * being used. We need to zero out the number - * of preds, wait on preemption and then free - * the preds. + * We need to reallocate. + * We should have already have zeroed out + * the pred count and called synchronized_sched() + * to make sure no one is using the preds. */ - synchronize_sched(); + if (WARN_ON_ONCE(filter->n_preds)) { + /* We need to reset it now */ + filter->n_preds = 0; + synchronize_sched(); + } __free_preds(filter); } } @@ -1328,6 +1341,30 @@ static int replace_system_preds(struct event_subsystem *system, /* try to see if the filter can be applied */ err = replace_preds(call, filter, ps, filter_string, true); if (err) + goto fail; + } + + /* set all filter pred counts to zero */ + list_for_each_entry(call, &ftrace_events, list) { + struct event_filter *filter = call->filter; + + if (strcmp(call->class->system, system->name) != 0) + continue; + + reset_preds(filter); + } + + /* + * Since some of the preds may be used under preemption + * we need to wait for them to finish before we may + * reallocate them. + */ + synchronize_sched(); + + list_for_each_entry(call, &ftrace_events, list) { + struct event_filter *filter = call->filter; + + if (strcmp(call->class->system, system->name) != 0) continue; /* really apply the filter */ @@ -1342,11 +1379,13 @@ static int replace_system_preds(struct event_subsystem *system, fail = false; } - if (fail) { - parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); - return -EINVAL; - } + if (fail) + goto fail; + return 0; + fail: + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); + return -EINVAL; } int apply_event_filter(struct ftrace_event_call *call, char *filter_string) @@ -1381,6 +1420,13 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) goto out; } + /* + * Make sure all the pred counts are zero so that + * no task is using it when we reallocate the preds array. + */ + reset_preds(call->filter); + synchronize_sched(); + err = replace_preds(call, call->filter, ps, filter_string, false); if (err) append_filter_err(ps, call->filter); -- cgit v0.10.2 From 74e9e58c350a24139e268dd6857bbaa55c5aafcf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 22:49:48 -0500 Subject: tracing/filter: Allocate the preds in an array Currently we allocate an array of pointers to filter_preds, and then allocate a separate filter_pred for each item in the array. This adds slight overhead in the filters as it needs to derefernce twice to get to the op condition. Allocating the preds themselves in a single array removes a dereference as well as helps on the cache footprint. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 441fc1b..254d04a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -663,7 +663,7 @@ struct ftrace_event_field { struct event_filter { int n_preds; /* Number assigned */ int a_preds; /* allocated */ - struct filter_pred **preds; + struct filter_pred *preds; char *filter_string; }; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 8f00a11..b6c9106 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -362,7 +362,7 @@ int filter_match_preds(struct event_filter *filter, void *rec) { int match = -1, top = 0, val1 = 0, val2 = 0; int stack[MAX_FILTER_PRED]; - struct filter_pred **preds; + struct filter_pred *preds; struct filter_pred *pred; int n_preds = ACCESS_ONCE(filter->n_preds); int i; @@ -377,7 +377,7 @@ int filter_match_preds(struct event_filter *filter, void *rec) preds = rcu_dereference_sched(filter->preds); for (i = 0; i < n_preds; i++) { - pred = preds[i]; + pred = &preds[i]; if (!pred->pop_n) { match = pred->fn(pred, rec); stack[top++] = match; @@ -559,10 +559,8 @@ static void __free_preds(struct event_filter *filter) int i; if (filter->preds) { - for (i = 0; i < filter->a_preds; i++) { - if (filter->preds[i]) - filter_free_pred(filter->preds[i]); - } + for (i = 0; i < filter->a_preds; i++) + kfree(filter->preds[i].field_name); kfree(filter->preds); filter->preds = NULL; } @@ -572,7 +570,6 @@ static void __free_preds(struct event_filter *filter) static void reset_preds(struct event_filter *filter) { - struct filter_pred *pred; int n_preds = filter->n_preds; int i; @@ -580,10 +577,8 @@ static void reset_preds(struct event_filter *filter) if (!filter->preds) return; - for (i = 0; i < n_preds; i++) { - pred = filter->preds[i]; - pred->fn = filter_pred_none; - } + for (i = 0; i < n_preds; i++) + filter->preds[i].fn = filter_pred_none; } static void filter_disable_preds(struct ftrace_event_call *call) @@ -658,19 +653,11 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) return -EINVAL; for (i = 0; i < n_preds; i++) { - pred = filter->preds[i]; - if (!pred) - pred = kzalloc(sizeof(*pred), GFP_KERNEL); - if (!pred) - goto oom; + pred = &filter->preds[i]; pred->fn = filter_pred_none; - filter->preds[i] = pred; } return 0; - oom: - __free_preds(filter); - return -ENOMEM; } static int init_filter(struct ftrace_event_call *call) @@ -730,8 +717,8 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, } idx = filter->n_preds; - filter_clear_pred(filter->preds[idx]); - err = filter_set_pred(filter->preds[idx], pred, fn); + filter_clear_pred(&filter->preds[idx]); + err = filter_set_pred(&filter->preds[idx], pred, fn); if (err) return err; -- cgit v0.10.2 From f76690afd05e3e163149310bdcd30234f93b3a7a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 22:53:06 -0500 Subject: tracing/filter: Free pred array on disabling of filter When a filter is disabled, free the preds. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index b6c9106..2f5458e 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1388,6 +1388,10 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) if (!strcmp(strstrip(filter_string), "0")) { filter_disable_preds(call); + reset_preds(call->filter); + /* Make sure the filter is not being used */ + synchronize_sched(); + __free_preds(call->filter); remove_filter_string(call->filter); goto out_unlock; } -- cgit v0.10.2 From 61e9dea20e1ada886cc49a9ec6fe3c6ac0de7324 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 22:54:33 -0500 Subject: tracing/filter: Use a tree instead of stack for filter_match_preds() Currently the filter_match_preds() requires a stack to push and pop the preds to determine if the filter matches the record or not. This has two drawbacks: 1) It requires a stack to store state information. As this is done in fast paths we can't allocate the storage for this stack, and we can't use a global as it must be re-entrant. The stack is stored on the kernel stack and this greatly limits how many preds we may allow. 2) All conditions are calculated even when a short circuit exists. a || b will always calculate a and b even though a was determined to be true. Using a tree we can walk a constant structure that will save the state as we go. The algorithm is simply: pred = root; do { switch (move) { case MOVE_DOWN: if (OR or AND) { pred = left; continue; } if (pred == root) break; match = pred->fn(); pred = pred->parent; move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT; continue; case MOVE_UP_FROM_LEFT: /* Only OR or AND can be a parent */ if (match && OR || !match && AND) { /* short circuit */ if (pred == root) break; pred = pred->parent; move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT; continue; } pred = pred->right; move = MOVE_DOWN; continue; case MOVE_UP_FROM_RIGHT: if (pred == root) break; pred = pred->parent; move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT; continue; } done = 1; } while (!done); This way there's no strict limit to how many preds we allow and it also will short circuit the logical operations when possible. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 254d04a..bba34a7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -664,6 +664,7 @@ struct event_filter { int n_preds; /* Number assigned */ int a_preds; /* allocated */ struct filter_pred *preds; + struct filter_pred *root; char *filter_string; }; @@ -675,6 +676,9 @@ struct event_subsystem { int nr_events; }; +#define FILTER_PRED_INVALID ((unsigned short)-1) +#define FILTER_PRED_IS_RIGHT (1 << 15) + struct filter_pred; struct regex; @@ -704,7 +708,10 @@ struct filter_pred { int offset; int not; int op; - int pop_n; + unsigned short index; + unsigned short parent; + unsigned short left; + unsigned short right; }; extern struct list_head ftrace_common_fields; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 2f5458e..1039049 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -123,6 +123,11 @@ struct filter_parse_state { } operand; }; +struct pred_stack { + struct filter_pred **preds; + int index; +}; + #define DEFINE_COMPARISON_PRED(type) \ static int filter_pred_##type(struct filter_pred *pred, void *event) \ { \ @@ -357,52 +362,95 @@ static void filter_build_regex(struct filter_pred *pred) pred->not ^= not; } +enum move_type { + MOVE_DOWN, + MOVE_UP_FROM_LEFT, + MOVE_UP_FROM_RIGHT +}; + +static struct filter_pred * +get_pred_parent(struct filter_pred *pred, struct filter_pred *preds, + int index, enum move_type *move) +{ + if (pred->parent & FILTER_PRED_IS_RIGHT) + *move = MOVE_UP_FROM_RIGHT; + else + *move = MOVE_UP_FROM_LEFT; + pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT]; + + return pred; +} + /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct event_filter *filter, void *rec) { - int match = -1, top = 0, val1 = 0, val2 = 0; - int stack[MAX_FILTER_PRED]; + int match = -1; + enum move_type move = MOVE_DOWN; struct filter_pred *preds; struct filter_pred *pred; + struct filter_pred *root; int n_preds = ACCESS_ONCE(filter->n_preds); - int i; + int done = 0; /* no filter is considered a match */ if (!n_preds) return 1; /* - * n_preds and filter->preds is protect with preemption disabled. + * n_preds, root and filter->preds are protect with preemption disabled. */ preds = rcu_dereference_sched(filter->preds); + root = rcu_dereference_sched(filter->root); + if (!root) + return 1; - for (i = 0; i < n_preds; i++) { - pred = &preds[i]; - if (!pred->pop_n) { + pred = root; + + /* match is currently meaningless */ + match = -1; + + do { + switch (move) { + case MOVE_DOWN: + /* only AND and OR have children */ + if (pred->left != FILTER_PRED_INVALID) { + /* keep going to leaf node */ + pred = &preds[pred->left]; + continue; + } match = pred->fn(pred, rec); - stack[top++] = match; + /* If this pred is the only pred */ + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + case MOVE_UP_FROM_LEFT: + /* Check for short circuits */ + if ((match && pred->op == OP_OR) || + (!match && pred->op == OP_AND)) { + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + } + /* now go down the right side of the tree. */ + pred = &preds[pred->right]; + move = MOVE_DOWN; + continue; + case MOVE_UP_FROM_RIGHT: + /* We finished this equation. */ + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); continue; } - if (pred->pop_n > top) { - WARN_ON_ONCE(1); - return 0; - } - val1 = stack[--top]; - val2 = stack[--top]; - switch (pred->op) { - case OP_AND: - match = val1 && val2; - break; - case OP_OR: - match = val1 || val2; - break; - default: - WARN_ONCE(1, "filter op is not AND or OR"); - } - stack[top++] = match; - } + done = 1; + } while (!done); - return stack[--top]; + return match; } EXPORT_SYMBOL_GPL(filter_match_preds); @@ -539,10 +587,58 @@ static void filter_clear_pred(struct filter_pred *pred) pred->regex.len = 0; } -static int filter_set_pred(struct filter_pred *dest, +static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) +{ + stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL); + if (!stack->preds) + return -ENOMEM; + stack->index = n_preds; + return 0; +} + +static void __free_pred_stack(struct pred_stack *stack) +{ + kfree(stack->preds); + stack->index = 0; +} + +static int __push_pred_stack(struct pred_stack *stack, + struct filter_pred *pred) +{ + int index = stack->index; + + if (WARN_ON(index == 0)) + return -ENOSPC; + + stack->preds[--index] = pred; + stack->index = index; + return 0; +} + +static struct filter_pred * +__pop_pred_stack(struct pred_stack *stack) +{ + struct filter_pred *pred; + int index = stack->index; + + pred = stack->preds[index++]; + if (!pred) + return NULL; + + stack->index = index; + return pred; +} + +static int filter_set_pred(struct event_filter *filter, + int idx, + struct pred_stack *stack, struct filter_pred *src, filter_pred_fn_t fn) { + struct filter_pred *dest = &filter->preds[idx]; + struct filter_pred *left; + struct filter_pred *right; + *dest = *src; if (src->field_name) { dest->field_name = kstrdup(src->field_name, GFP_KERNEL); @@ -550,8 +646,25 @@ static int filter_set_pred(struct filter_pred *dest, return -ENOMEM; } dest->fn = fn; + dest->index = idx; - return 0; + if (dest->op == OP_OR || dest->op == OP_AND) { + right = __pop_pred_stack(stack); + left = __pop_pred_stack(stack); + if (!left || !right) + return -EINVAL; + dest->left = left->index; + dest->right = right->index; + left->parent = dest->index; + right->parent = dest->index | FILTER_PRED_IS_RIGHT; + } else + /* + * Make dest->left invalid to be used as a quick + * way to know this is a leaf node. + */ + dest->left = FILTER_PRED_INVALID; + + return __push_pred_stack(stack, dest); } static void __free_preds(struct event_filter *filter) @@ -574,6 +687,7 @@ static void reset_preds(struct event_filter *filter) int i; filter->n_preds = 0; + filter->root = NULL; if (!filter->preds) return; @@ -707,6 +821,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, struct ftrace_event_call *call, struct event_filter *filter, struct filter_pred *pred, + struct pred_stack *stack, filter_pred_fn_t fn) { int idx, err; @@ -718,7 +833,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, idx = filter->n_preds; filter_clear_pred(&filter->preds[idx]); - err = filter_set_pred(&filter->preds[idx], pred, fn); + err = filter_set_pred(filter, idx, stack, pred, fn); if (err) return err; @@ -803,6 +918,7 @@ static int filter_add_pred(struct filter_parse_state *ps, struct ftrace_event_call *call, struct event_filter *filter, struct filter_pred *pred, + struct pred_stack *stack, bool dry_run) { struct ftrace_event_field *field; @@ -812,13 +928,10 @@ static int filter_add_pred(struct filter_parse_state *ps, fn = pred->fn = filter_pred_none; - if (pred->op == OP_AND) { - pred->pop_n = 2; + if (pred->op == OP_AND) goto add_pred_fn; - } else if (pred->op == OP_OR) { - pred->pop_n = 2; + else if (pred->op == OP_OR) goto add_pred_fn; - } field = find_event_field(call, pred->field_name); if (!field) { @@ -867,7 +980,7 @@ static int filter_add_pred(struct filter_parse_state *ps, add_pred_fn: if (!dry_run) - return filter_add_pred_fn(ps, call, filter, pred, fn); + return filter_add_pred_fn(ps, call, filter, pred, stack, fn); return 0; } @@ -1248,6 +1361,7 @@ static int replace_preds(struct ftrace_event_call *call, char *operand1 = NULL, *operand2 = NULL; struct filter_pred *pred; struct postfix_elt *elt; + struct pred_stack stack = { }; /* init to NULL */ int err; int n_preds = 0; @@ -1262,9 +1376,12 @@ static int replace_preds(struct ftrace_event_call *call, return err; if (!dry_run) { - err = __alloc_preds(filter, n_preds); + err = __alloc_pred_stack(&stack, n_preds); if (err) return err; + err = __alloc_preds(filter, n_preds); + if (err) + goto fail; } n_preds = 0; @@ -1276,14 +1393,16 @@ static int replace_preds(struct ftrace_event_call *call, operand2 = elt->operand; else { parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); - return -EINVAL; + err = -EINVAL; + goto fail; } continue; } if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); - return -ENOSPC; + err = -ENOSPC; + goto fail; } if (elt->op == OP_AND || elt->op == OP_OR) { @@ -1293,22 +1412,44 @@ static int replace_preds(struct ftrace_event_call *call, if (!operand1 || !operand2) { parse_error(ps, FILT_ERR_MISSING_FIELD, 0); - return -EINVAL; + err = -EINVAL; + goto fail; } pred = create_pred(elt->op, operand1, operand2); add_pred: - if (!pred) - return -ENOMEM; - err = filter_add_pred(ps, call, filter, pred, dry_run); + if (!pred) { + err = -ENOMEM; + goto fail; + } + err = filter_add_pred(ps, call, filter, pred, &stack, dry_run); filter_free_pred(pred); if (err) - return err; + goto fail; operand1 = operand2 = NULL; } - return 0; + if (!dry_run) { + /* We should have one item left on the stack */ + pred = __pop_pred_stack(&stack); + if (!pred) + return -EINVAL; + /* This item is where we start from in matching */ + filter->root = pred; + /* Make sure the stack is empty */ + pred = __pop_pred_stack(&stack); + if (WARN_ON(pred)) { + err = -EINVAL; + filter->root = NULL; + goto fail; + } + } + + err = 0; +fail: + __free_pred_stack(&stack); + return err; } static int replace_system_preds(struct event_subsystem *system, -- cgit v0.10.2 From 55719274188f13cff9e3bd11fdd4c0e7617cd03d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 23:12:05 -0500 Subject: tracing/filter: Optimize short ciruit check The test if we should break out early for OR and AND operations can be optimized by comparing the current result with (pred->op == OP_OR) That is if the result is true and the op is an OP_OR, or if the result is false and the op is not an OP_OR (thus an OP_AND) we can break out early in either case. Otherwise we continue processing. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 1039049..0a3e050 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -426,9 +426,15 @@ int filter_match_preds(struct event_filter *filter, void *rec) pred->parent, &move); continue; case MOVE_UP_FROM_LEFT: - /* Check for short circuits */ - if ((match && pred->op == OP_OR) || - (!match && pred->op == OP_AND)) { + /* + * Check for short circuits. + * + * Optimization: !!match == (pred->op == OP_OR) + * is the same as: + * if ((match && pred->op == OP_OR) || + * (!match && pred->op == OP_AND)) + */ + if (!!match == (pred->op == OP_OR)) { if (pred == root) break; pred = get_pred_parent(pred, preds, -- cgit v0.10.2 From ec126cac23945de12eb2d103374e1f7ee97c5595 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 23:14:25 -0500 Subject: tracing/filter: Check the created pred tree Since the filter walks a tree to determine if a match is made or not, if the tree was incorrectly created, it could cause an infinite loop. Add a check to walk the entire tree before assigning it as a filter to make sure the tree is correct. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 0a3e050..91c9cdc 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1358,6 +1358,68 @@ static int count_preds(struct filter_parse_state *ps) return n_preds; } +/* + * The tree is walked at filtering of an event. If the tree is not correctly + * built, it may cause an infinite loop. Check here that the tree does + * indeed terminate. + */ +static int check_pred_tree(struct event_filter *filter, + struct filter_pred *root) +{ + struct filter_pred *preds; + struct filter_pred *pred; + enum move_type move = MOVE_DOWN; + int count = 0; + int done = 0; + int max; + + /* + * The max that we can hit a node is three times. + * Once going down, once coming up from left, and + * once coming up from right. This is more than enough + * since leafs are only hit a single time. + */ + max = 3 * filter->n_preds; + + preds = filter->preds; + if (!preds) + return -EINVAL; + pred = root; + + do { + if (WARN_ON(count++ > max)) + return -EINVAL; + + switch (move) { + case MOVE_DOWN: + if (pred->left != FILTER_PRED_INVALID) { + pred = &preds[pred->left]; + continue; + } + /* A leaf at the root is just a leaf in the tree */ + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + case MOVE_UP_FROM_LEFT: + pred = &preds[pred->right]; + move = MOVE_DOWN; + continue; + case MOVE_UP_FROM_RIGHT: + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + } + done = 1; + } while (!done); + + /* We are fine. */ + return 0; +} + static int replace_preds(struct ftrace_event_call *call, struct event_filter *filter, struct filter_parse_state *ps, @@ -1366,6 +1428,7 @@ static int replace_preds(struct ftrace_event_call *call, { char *operand1 = NULL, *operand2 = NULL; struct filter_pred *pred; + struct filter_pred *root; struct postfix_elt *elt; struct pred_stack stack = { }; /* init to NULL */ int err; @@ -1442,7 +1505,7 @@ add_pred: if (!pred) return -EINVAL; /* This item is where we start from in matching */ - filter->root = pred; + root = pred; /* Make sure the stack is empty */ pred = __pop_pred_stack(&stack); if (WARN_ON(pred)) { @@ -1450,6 +1513,13 @@ add_pred: filter->root = NULL; goto fail; } + err = check_pred_tree(filter, root); + if (err) + goto fail; + + /* We don't set root until we know it works */ + barrier(); + filter->root = root; } err = 0; -- cgit v0.10.2 From 43cd414552d8137157e926e46361678ea867e476 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 23:16:51 -0500 Subject: tracing/filter: Optimize filter by folding the tree There are many cases that a filter will contain multiple ORs or ANDs together near the leafs. Walking up and down the tree to get to the next compare can be a waste. If there are several ORs or ANDs together, fold them into a single pred and allocate an array of the conditions that they check. This will speed up the filter by linearly walking an array and can still break out if a short circuit condition is met. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index bba34a7..d754330 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -678,6 +678,7 @@ struct event_subsystem { #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) +#define FILTER_PRED_FOLD (1 << 15) struct filter_pred; struct regex; @@ -704,7 +705,16 @@ struct filter_pred { filter_pred_fn_t fn; u64 val; struct regex regex; - char *field_name; + /* + * Leaf nodes use field_name, ops is used by AND and OR + * nodes. The field_name is always freed when freeing a pred. + * We can overload field_name for ops and have it freed + * as well. + */ + union { + char *field_name; + unsigned short *ops; + }; int offset; int not; int op; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 91c9cdc..2403ce5 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -381,6 +381,42 @@ get_pred_parent(struct filter_pred *pred, struct filter_pred *preds, return pred; } +/* + * A series of AND or ORs where found together. Instead of + * climbing up and down the tree branches, an array of the + * ops were made in order of checks. We can just move across + * the array and short circuit if needed. + */ +static int process_ops(struct filter_pred *preds, + struct filter_pred *op, void *rec) +{ + struct filter_pred *pred; + int type; + int match; + int i; + + /* + * Micro-optimization: We set type to true if op + * is an OR and false otherwise (AND). Then we + * just need to test if the match is equal to + * the type, and if it is, we can short circuit the + * rest of the checks: + * + * if ((match && op->op == OP_OR) || + * (!match && op->op == OP_AND)) + * return match; + */ + type = op->op == OP_OR; + + for (i = 0; i < op->val; i++) { + pred = &preds[op->ops[i]]; + match = pred->fn(pred, rec); + if (!!match == type) + return match; + } + return match; +} + /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct event_filter *filter, void *rec) { @@ -414,11 +450,16 @@ int filter_match_preds(struct event_filter *filter, void *rec) case MOVE_DOWN: /* only AND and OR have children */ if (pred->left != FILTER_PRED_INVALID) { - /* keep going to leaf node */ - pred = &preds[pred->left]; - continue; - } - match = pred->fn(pred, rec); + /* If ops is set, then it was folded. */ + if (!pred->ops) { + /* keep going to down the left side */ + pred = &preds[pred->left]; + continue; + } + /* We can treat folded ops as a leaf node */ + match = process_ops(preds, pred, rec); + } else + match = pred->fn(pred, rec); /* If this pred is the only pred */ if (pred == root) break; @@ -659,17 +700,34 @@ static int filter_set_pred(struct event_filter *filter, left = __pop_pred_stack(stack); if (!left || !right) return -EINVAL; - dest->left = left->index; - dest->right = right->index; - left->parent = dest->index; + /* + * If both children can be folded + * and they are the same op as this op or a leaf, + * then this op can be folded. + */ + if (left->index & FILTER_PRED_FOLD && + (left->op == dest->op || + left->left == FILTER_PRED_INVALID) && + right->index & FILTER_PRED_FOLD && + (right->op == dest->op || + right->left == FILTER_PRED_INVALID)) + dest->index |= FILTER_PRED_FOLD; + + dest->left = left->index & ~FILTER_PRED_FOLD; + dest->right = right->index & ~FILTER_PRED_FOLD; + left->parent = dest->index & ~FILTER_PRED_FOLD; right->parent = dest->index | FILTER_PRED_IS_RIGHT; - } else + } else { /* * Make dest->left invalid to be used as a quick * way to know this is a leaf node. */ dest->left = FILTER_PRED_INVALID; + /* All leafs allow folding the parent ops. */ + dest->index |= FILTER_PRED_FOLD; + } + return __push_pred_stack(stack, dest); } @@ -1420,6 +1478,158 @@ static int check_pred_tree(struct event_filter *filter, return 0; } +static int count_leafs(struct filter_pred *preds, struct filter_pred *root) +{ + struct filter_pred *pred; + enum move_type move = MOVE_DOWN; + int count = 0; + int done = 0; + + pred = root; + + do { + switch (move) { + case MOVE_DOWN: + if (pred->left != FILTER_PRED_INVALID) { + pred = &preds[pred->left]; + continue; + } + /* A leaf at the root is just a leaf in the tree */ + if (pred == root) + return 1; + count++; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + case MOVE_UP_FROM_LEFT: + pred = &preds[pred->right]; + move = MOVE_DOWN; + continue; + case MOVE_UP_FROM_RIGHT: + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + } + done = 1; + } while (!done); + + return count; +} + +static int fold_pred(struct filter_pred *preds, struct filter_pred *root) +{ + struct filter_pred *pred; + enum move_type move = MOVE_DOWN; + int count = 0; + int children; + int done = 0; + + /* No need to keep the fold flag */ + root->index &= ~FILTER_PRED_FOLD; + + /* If the root is a leaf then do nothing */ + if (root->left == FILTER_PRED_INVALID) + return 0; + + /* count the children */ + children = count_leafs(preds, &preds[root->left]); + children += count_leafs(preds, &preds[root->right]); + + root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL); + if (!root->ops) + return -ENOMEM; + + root->val = children; + + pred = root; + do { + switch (move) { + case MOVE_DOWN: + if (pred->left != FILTER_PRED_INVALID) { + pred = &preds[pred->left]; + continue; + } + if (WARN_ON(count == children)) + return -EINVAL; + pred->index &= ~FILTER_PRED_FOLD; + root->ops[count++] = pred->index; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + case MOVE_UP_FROM_LEFT: + pred = &preds[pred->right]; + move = MOVE_DOWN; + continue; + case MOVE_UP_FROM_RIGHT: + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + } + done = 1; + } while (!done); + + return 0; +} + +/* + * To optimize the processing of the ops, if we have several "ors" or + * "ands" together, we can put them in an array and process them all + * together speeding up the filter logic. + */ +static int fold_pred_tree(struct event_filter *filter, + struct filter_pred *root) +{ + struct filter_pred *preds; + struct filter_pred *pred; + enum move_type move = MOVE_DOWN; + int done = 0; + int err; + + preds = filter->preds; + if (!preds) + return -EINVAL; + pred = root; + + do { + switch (move) { + case MOVE_DOWN: + if (pred->index & FILTER_PRED_FOLD) { + err = fold_pred(preds, pred); + if (err) + return err; + /* Folded nodes are like leafs */ + } else if (pred->left != FILTER_PRED_INVALID) { + pred = &preds[pred->left]; + continue; + } + + /* A leaf at the root is just a leaf in the tree */ + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + case MOVE_UP_FROM_LEFT: + pred = &preds[pred->right]; + move = MOVE_DOWN; + continue; + case MOVE_UP_FROM_RIGHT: + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, &move); + continue; + } + done = 1; + } while (!done); + + return 0; +} + static int replace_preds(struct ftrace_event_call *call, struct event_filter *filter, struct filter_parse_state *ps, @@ -1517,6 +1727,11 @@ add_pred: if (err) goto fail; + /* Optimize the tree */ + err = fold_pred_tree(filter, root); + if (err) + goto fail; + /* We don't set root until we know it works */ barrier(); filter->root = root; -- cgit v0.10.2 From 4a3d27e98a7f2682e96d6f863752e0424b00d691 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 23:19:49 -0500 Subject: tracing/filter: Move MAX_FILTER_PRED to local tracing directory The MAX_FILTER_PRED is only needed by the kernel/trace/*.c files. Move it to kernel/trace/trace.h. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 47e3997..1a99e79 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -208,7 +208,6 @@ struct ftrace_event_call { #define PERF_MAX_TRACE_SIZE 2048 -#define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ extern void destroy_preds(struct ftrace_event_call *call); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d754330..fbff872 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -680,6 +680,8 @@ struct event_subsystem { #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) +#define MAX_FILTER_PRED 32 + struct filter_pred; struct regex; -- cgit v0.10.2 From bf93f9ed3a2cb89eb7e58851139d3be375b98027 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 27 Jan 2011 23:21:34 -0500 Subject: tracing/filter: Increase the max preds to 2^14 Now that the filter logic does not require to save the pred results on the stack, we can increase the max number of preds we allow. As the preds are index by a short value, and we use the MSBs as flags we can increase the max preds to 2^14 (16384) which should be way more than enough. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fbff872..856e73c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -680,7 +680,14 @@ struct event_subsystem { #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) -#define MAX_FILTER_PRED 32 +/* + * The max preds is the size of unsigned short with + * two flags at the MSBs. One bit is used for both the IS_RIGHT + * and FOLD flags. The other is reserved. + * + * 2^14 preds is way more than enough. + */ +#define MAX_FILTER_PRED 16384 struct filter_pred; struct regex; -- cgit v0.10.2 From 75b8e98263fdb0bfbdeba60d4db463259f1fe8a2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Feb 2011 23:25:46 -0500 Subject: tracing/filter: Swap entire filter of events When creating a new filter, instead of allocating the filter to the event call first and then processing the filter, it is easier to process a temporary filter and then just swap it with the call filter. By doing this, it simplifies the code. A filter is allocated and processed, when it is done, it is swapped with the call filter, synchronize_sched() is called to make sure all callers are done with the old filter (filters are called with premption disabled), and then the old filter is freed. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 2403ce5..f5d335d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -425,10 +425,15 @@ int filter_match_preds(struct event_filter *filter, void *rec) struct filter_pred *preds; struct filter_pred *pred; struct filter_pred *root; - int n_preds = ACCESS_ONCE(filter->n_preds); + int n_preds; int done = 0; /* no filter is considered a match */ + if (!filter) + return 1; + + n_preds = filter->n_preds; + if (!n_preds) return 1; @@ -509,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos) static void remove_filter_string(struct event_filter *filter) { + if (!filter) + return; + kfree(filter->filter_string); filter->filter_string = NULL; } @@ -568,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps, void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) { - struct event_filter *filter = call->filter; + struct event_filter *filter; mutex_lock(&event_mutex); + filter = call->filter; if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); else @@ -581,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s) { - struct event_filter *filter = system->filter; + struct event_filter *filter; mutex_lock(&event_mutex); + filter = system->filter; if (filter && filter->filter_string) trace_seq_printf(s, "%s\n", filter->filter_string); else @@ -745,26 +755,9 @@ static void __free_preds(struct event_filter *filter) filter->n_preds = 0; } -static void reset_preds(struct event_filter *filter) -{ - int n_preds = filter->n_preds; - int i; - - filter->n_preds = 0; - filter->root = NULL; - if (!filter->preds) - return; - - for (i = 0; i < n_preds; i++) - filter->preds[i].fn = filter_pred_none; -} - -static void filter_disable_preds(struct ftrace_event_call *call) +static void filter_disable(struct ftrace_event_call *call) { - struct event_filter *filter = call->filter; - call->flags &= ~TRACE_EVENT_FL_FILTERED; - reset_preds(filter); } static void __free_filter(struct event_filter *filter) @@ -777,11 +770,16 @@ static void __free_filter(struct event_filter *filter) kfree(filter); } +/* + * Called when destroying the ftrace_event_call. + * The call is being freed, so we do not need to worry about + * the call being currently used. This is for module code removing + * the tracepoints from within it. + */ void destroy_preds(struct ftrace_event_call *call) { __free_filter(call->filter); call->filter = NULL; - call->flags &= ~TRACE_EVENT_FL_FILTERED; } static struct event_filter *__alloc_filter(void) @@ -789,11 +787,6 @@ static struct event_filter *__alloc_filter(void) struct event_filter *filter; filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!filter) - return ERR_PTR(-ENOMEM); - - filter->n_preds = 0; - return filter; } @@ -838,46 +831,28 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) return 0; } -static int init_filter(struct ftrace_event_call *call) -{ - if (call->filter) - return 0; - - call->flags &= ~TRACE_EVENT_FL_FILTERED; - call->filter = __alloc_filter(); - if (IS_ERR(call->filter)) - return PTR_ERR(call->filter); - - return 0; -} - -static int init_subsystem_preds(struct event_subsystem *system) +static void filter_free_subsystem_preds(struct event_subsystem *system) { struct ftrace_event_call *call; - int err; list_for_each_entry(call, &ftrace_events, list) { if (strcmp(call->class->system, system->name) != 0) continue; - err = init_filter(call); - if (err) - return err; + filter_disable(call); + remove_filter_string(call->filter); } - - return 0; } -static void filter_free_subsystem_preds(struct event_subsystem *system) +static void filter_free_subsystem_filters(struct event_subsystem *system) { struct ftrace_event_call *call; list_for_each_entry(call, &ftrace_events, list) { if (strcmp(call->class->system, system->name) != 0) continue; - - filter_disable_preds(call); - remove_filter_string(call->filter); + __free_filter(call->filter); + call->filter = NULL; } } @@ -1743,88 +1718,129 @@ fail: return err; } +struct filter_list { + struct list_head list; + struct event_filter *filter; +}; + static int replace_system_preds(struct event_subsystem *system, struct filter_parse_state *ps, char *filter_string) { struct ftrace_event_call *call; + struct filter_list *filter_item; + struct filter_list *tmp; + LIST_HEAD(filter_list); bool fail = true; int err; list_for_each_entry(call, &ftrace_events, list) { - struct event_filter *filter = call->filter; if (strcmp(call->class->system, system->name) != 0) continue; - /* try to see if the filter can be applied */ - err = replace_preds(call, filter, ps, filter_string, true); + /* + * Try to see if the filter can be applied + * (filter arg is ignored on dry_run) + */ + err = replace_preds(call, NULL, ps, filter_string, true); if (err) goto fail; } - /* set all filter pred counts to zero */ list_for_each_entry(call, &ftrace_events, list) { - struct event_filter *filter = call->filter; + struct event_filter *filter; if (strcmp(call->class->system, system->name) != 0) continue; - reset_preds(filter); - } + filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); + if (!filter_item) + goto fail_mem; - /* - * Since some of the preds may be used under preemption - * we need to wait for them to finish before we may - * reallocate them. - */ - synchronize_sched(); + list_add_tail(&filter_item->list, &filter_list); - list_for_each_entry(call, &ftrace_events, list) { - struct event_filter *filter = call->filter; + filter_item->filter = __alloc_filter(); + if (!filter_item->filter) + goto fail_mem; + filter = filter_item->filter; - if (strcmp(call->class->system, system->name) != 0) - continue; + /* Can only fail on no memory */ + err = replace_filter_string(filter, filter_string); + if (err) + goto fail_mem; - /* really apply the filter */ - filter_disable_preds(call); err = replace_preds(call, filter, ps, filter_string, false); - if (err) - filter_disable_preds(call); - else { + if (err) { + filter_disable(call); + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); + append_filter_err(ps, filter); + } else call->flags |= TRACE_EVENT_FL_FILTERED; - replace_filter_string(filter, filter_string); - } + /* + * Regardless of if this returned an error, we still + * replace the filter for the call. + */ + filter = call->filter; + call->filter = filter_item->filter; + filter_item->filter = filter; + fail = false; } if (fail) goto fail; + /* + * The calls can still be using the old filters. + * Do a synchronize_sched() to ensure all calls are + * done with them before we free them. + */ + synchronize_sched(); + list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { + __free_filter(filter_item->filter); + list_del(&filter_item->list); + kfree(filter_item); + } return 0; fail: + /* No call succeeded */ + list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { + list_del(&filter_item->list); + kfree(filter_item); + } parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); return -EINVAL; + fail_mem: + /* If any call succeeded, we still need to sync */ + if (!fail) + synchronize_sched(); + list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { + __free_filter(filter_item->filter); + list_del(&filter_item->list); + kfree(filter_item); + } + return -ENOMEM; } int apply_event_filter(struct ftrace_event_call *call, char *filter_string) { - int err; struct filter_parse_state *ps; + struct event_filter *filter; + struct event_filter *tmp; + int err = 0; mutex_lock(&event_mutex); - err = init_filter(call); - if (err) - goto out_unlock; - if (!strcmp(strstrip(filter_string), "0")) { - filter_disable_preds(call); - reset_preds(call->filter); + filter_disable(call); + filter = call->filter; + if (!filter) + goto out_unlock; + call->filter = NULL; /* Make sure the filter is not being used */ synchronize_sched(); - __free_preds(call->filter); - remove_filter_string(call->filter); + __free_filter(filter); goto out_unlock; } @@ -1833,29 +1849,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) if (!ps) goto out_unlock; - filter_disable_preds(call); - replace_filter_string(call->filter, filter_string); + filter = __alloc_filter(); + if (!filter) { + kfree(ps); + goto out_unlock; + } + + replace_filter_string(filter, filter_string); parse_init(ps, filter_ops, filter_string); err = filter_parse(ps); if (err) { - append_filter_err(ps, call->filter); + append_filter_err(ps, filter); goto out; } - /* - * Make sure all the pred counts are zero so that - * no task is using it when we reallocate the preds array. - */ - reset_preds(call->filter); - synchronize_sched(); - - err = replace_preds(call, call->filter, ps, filter_string, false); - if (err) - append_filter_err(ps, call->filter); - else + err = replace_preds(call, filter, ps, filter_string, false); + if (err) { + filter_disable(call); + append_filter_err(ps, filter); + } else call->flags |= TRACE_EVENT_FL_FILTERED; out: + /* + * Always swap the call filter with the new filter + * even if there was an error. If there was an error + * in the filter, we disable the filter and show the error + * string + */ + tmp = call->filter; + call->filter = filter; + if (tmp) { + /* Make sure the call is done with the filter */ + synchronize_sched(); + __free_filter(tmp); + } filter_opstack_clear(ps); postfix_clear(ps); kfree(ps); @@ -1868,18 +1896,21 @@ out_unlock: int apply_subsystem_event_filter(struct event_subsystem *system, char *filter_string) { - int err; struct filter_parse_state *ps; + struct event_filter *filter; + int err = 0; mutex_lock(&event_mutex); - err = init_subsystem_preds(system); - if (err) - goto out_unlock; - if (!strcmp(strstrip(filter_string), "0")) { filter_free_subsystem_preds(system); remove_filter_string(system->filter); + filter = system->filter; + system->filter = NULL; + /* Ensure all filters are no longer used */ + synchronize_sched(); + filter_free_subsystem_filters(system); + __free_filter(filter); goto out_unlock; } @@ -1888,7 +1919,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system, if (!ps) goto out_unlock; - replace_filter_string(system->filter, filter_string); + filter = __alloc_filter(); + if (!filter) + goto out; + + replace_filter_string(filter, filter_string); + /* + * No event actually uses the system filter + * we can free it without synchronize_sched(). + */ + __free_filter(system->filter); + system->filter = filter; parse_init(ps, filter_ops, filter_string); err = filter_parse(ps); @@ -1945,7 +1986,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, goto out_unlock; filter = __alloc_filter(); - if (IS_ERR(filter)) { + if (!filter) { err = PTR_ERR(filter); goto out_unlock; } -- cgit v0.10.2 From 4defe682d81a4960b6840ee4ed1a36f9db77c7bd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 3 Feb 2011 23:29:06 -0500 Subject: tracing/filter: Remove synchronize_sched() from __alloc_preds() Because the filters are processed first and then activated (added to the call), we no longer need to worry about the preds of the filter in __alloc_preds() being used. As the filter that is allocating preds is not activated yet. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f5d335d..3249b4f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -795,33 +795,17 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) struct filter_pred *pred; int i; - if (filter->preds) { - if (filter->a_preds < n_preds) { - /* - * We need to reallocate. - * We should have already have zeroed out - * the pred count and called synchronized_sched() - * to make sure no one is using the preds. - */ - if (WARN_ON_ONCE(filter->n_preds)) { - /* We need to reset it now */ - filter->n_preds = 0; - synchronize_sched(); - } - __free_preds(filter); - } - } + if (filter->preds) + __free_preds(filter); + + filter->preds = + kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL); - if (!filter->preds) { - filter->preds = - kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL); - filter->a_preds = n_preds; - } if (!filter->preds) return -ENOMEM; - if (WARN_ON(filter->a_preds < n_preds)) - return -EINVAL; + filter->a_preds = n_preds; + filter->n_preds = 0; for (i = 0; i < n_preds; i++) { pred = &filter->preds[i]; -- cgit v0.10.2 From ba976970c79fd2fbfe1a4b3b6766a318f4eb9d4c Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:20 +1100 Subject: tracing/syscalls: Don't add events for unmapped syscalls FTRACE_SYSCALLS would create events for each and every system call, even if it had failed to map the system call's name with it's number. This resulted in a number of events being created that would not behave as expected. This could happen, for example, on architectures who's symbol names are unusual and will not match the system call name. It could also happen with system calls which were mapped to sys_ni_syscall. This patch changes the default system call number in the metadata to -1. If the system call name from the metadata is not successfully mapped to a system call number during boot, than the event initialisation routine will now return an error, preventing the event from being created. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-2-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 98664db..8e8968e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -158,6 +158,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __used \ __syscall_meta_##sname = { \ .name = "sys"#sname, \ + .syscall_nr = -1, /* Filled in at boot */ \ .nb_args = nb, \ .types = types_##sname, \ .args = args_##sname, \ @@ -175,6 +176,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __used \ __syscall_meta__##sname = { \ .name = "sys_"#sname, \ + .syscall_nr = -1, /* Filled in at boot */ \ .nb_args = 0, \ .enter_event = &event_enter__##sname, \ .exit_event = &event_exit__##sname, \ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5c9fe08..a9ceabd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -424,6 +424,14 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) int init_syscall_trace(struct ftrace_event_call *call) { int id; + int num; + + num = ((struct syscall_metadata *)call->data)->syscall_nr; + if (num < 0 || num >= NR_syscalls) { + pr_debug("syscall %s metadata not mapped, disabling ftrace event\n", + ((struct syscall_metadata *)call->data)->name); + return -ENOSYS; + } if (set_syscall_print_fmt(call) < 0) return -ENOMEM; -- cgit v0.10.2 From 3773b389b6927595512558594d040c1edba46f36 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:21 +1100 Subject: tracing/syscalls: Convert redundant syscall_nr checks into WARN_ON With the ftrace events now checking if the syscall_nr is valid upon initialisation it should no longer be possible to register or unregister a syscall event without a valid syscall_nr since they should not be created. This adds a WARN_ON_ONCE in the register and unregister functions to locate potential regressions in the future. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-3-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a9ceabd..4230942 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -359,7 +359,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (num < 0 || num >= NR_syscalls) + if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_enter) @@ -377,7 +377,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call) int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (num < 0 || num >= NR_syscalls) + if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) return; mutex_lock(&syscall_trace_lock); sys_refcount_enter--; @@ -393,7 +393,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (num < 0 || num >= NR_syscalls) + if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) return -ENOSYS; mutex_lock(&syscall_trace_lock); if (!sys_refcount_exit) @@ -411,7 +411,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) int num; num = ((struct syscall_metadata *)call->data)->syscall_nr; - if (num < 0 || num >= NR_syscalls) + if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) return; mutex_lock(&syscall_trace_lock); sys_refcount_exit--; -- cgit v0.10.2 From c763ba06bd9b5db2c46c36276c89103d92d2c604 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:22 +1100 Subject: tracing/syscalls: Make arch_syscall_addr weak Some architectures use non-trivial system call tables and will not work with the generic arch_syscall_addr code. For example, PowerPC64 uses a table of twin long longs. This patch makes the generic arch_syscall_addr weak to allow architectures with non-trivial system call tables to override it. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-4-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index dc52bd4..6fca17b 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -247,6 +247,9 @@ You need very few things to get the syscalls tracing in an arch. - Support the TIF_SYSCALL_TRACEPOINT thread flags. - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace in the ptrace syscalls tracing path. +- If the system call table on this arch is more complicated than a simple array + of addresses of the system calls, implement an arch_syscall_addr to return + the address of a given system call. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 4230942..af83154 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -446,7 +446,7 @@ int init_syscall_trace(struct ftrace_event_call *call) return id; } -unsigned long __init arch_syscall_addr(int nr) +unsigned long __init __weak arch_syscall_addr(int nr) { return (unsigned long)sys_call_table[nr]; } -- cgit v0.10.2 From b2d55496818d64310b9f5486d4eea76ea614d7f8 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:23 +1100 Subject: tracing/syscalls: Allow arch specific syscall symbol matching Some architectures have unusual symbol names and the generic code to match the symbol name with the function name for the syscall metadata will fail. For example, symbols on PPC64 start with a period and the generic code will fail to match them. This patch moves the match logic out into a separate function which an arch can override by defining ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and implementing arch_syscall_match_sym_name. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-5-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 6fca17b..79fcafc 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -250,6 +250,10 @@ You need very few things to get the syscalls tracing in an arch. - If the system call table on this arch is more complicated than a simple array of addresses of the system calls, implement an arch_syscall_addr to return the address of a given system call. +- If the symbol names of the system calls do not match the function names on + this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and + implement arch_syscall_match_sym_name with the appropriate logic to return + true if the function name corresponds with the symbol name. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index af83154..86a23e7 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -60,6 +60,19 @@ extern struct syscall_metadata *__stop_syscalls_metadata[]; static struct syscall_metadata **syscalls_metadata; +#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + return !strcmp(sym + 3, name + 3); +} +#endif + static __init struct syscall_metadata * find_syscall_meta(unsigned long syscall) { @@ -73,13 +86,7 @@ find_syscall_meta(unsigned long syscall) kallsyms_lookup(syscall, NULL, NULL, NULL, str); for ( ; start < stop; start++) { - /* - * Only compare after the "sys" prefix. Archs that use - * syscall wrappers may have syscalls symbols aliases prefixed - * with "SyS" instead of "sys", leading to an unwanted - * mismatch. - */ - if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) + if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name)) return *start; } return NULL; -- cgit v0.10.2 From ae07f551c42d6e4162436ca452a199deac9dab4d Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 3 Feb 2011 14:27:25 +1100 Subject: tracing/syscalls: Early terminate search for sys_ni_syscall Many system calls are unimplemented and mapped to sys_ni_syscall, but at boot ftrace would still search through every syscall metadata entry for a match which wouldn't be there. This patch adds causes the search to terminate early if the system call is not mapped. Signed-off-by: Ian Munsie LKML-Reference: <1296703645-18718-7-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 86a23e7..ee7b5a0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -85,6 +85,9 @@ find_syscall_meta(unsigned long syscall) stop = __stop_syscalls_metadata; kallsyms_lookup(syscall, NULL, NULL, NULL, str); + if (arch_syscall_match_sym_name(str, "sys_ni_syscall")) + return NULL; + for ( ; start < stop; start++) { if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name)) return *start; -- cgit v0.10.2 From 5e38ca8f3ea423442eaafe1b7e206084aa38120a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Feb 2011 13:28:18 +0100 Subject: tracing: Add unstable sched clock note to the warning The warning "Delta way too big" warning might appear on a system with unstable shed clock right after the system is resumed and tracing was enabled during the suspend. Since it's not realy bug, and the unstable sched clock is working fast and reliable otherwise, Steven suggested to keep using the sched clock in any case and just to make note in the warning itself. Signed-off-by: Jiri Olsa LKML-Reference: <1296649698-6003-1-git-send-email-jolsa@redhat.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd1c35a..7739893 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2163,10 +2163,14 @@ rb_reserve_next_event(struct ring_buffer *buffer, delta = diff; if (unlikely(test_time_stamp(delta))) { WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", (unsigned long long)delta, (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp); + (unsigned long long)cpu_buffer->write_stamp, + sched_clock_stable ? "" : + "If you just came from a suspend/resume,\n" + "please switch to the trace global clock:\n" + " echo global > /sys/kernel/debug/tracing/trace_clock\n"); add_timestamp = 1; } } -- cgit v0.10.2 From e3087b80aa0bceda9863f33307460f3ba79f2b15 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Feb 2011 15:01:39 -0200 Subject: perf annotate: Fix --stdio rendering The checks for not using a max_lines parameter were b0rked, problem introduced in 3653246. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1012841..6db4351 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -132,7 +132,7 @@ static int objdump_line__print(struct objdump_line *oline, if (percent < min_pcnt) return -1; - if (printed >= max_lines) + if (max_lines && printed >= max_lines) return 1; color = get_percent_color(percent); @@ -154,7 +154,7 @@ static int objdump_line__print(struct objdump_line *oline, color_fprintf(stdout, color, " %7.2f", percent); printf(" : "); color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line); - } else if (printed >= max_lines) + } else if (max_lines && printed >= max_lines) return 1; else { if (!*oline->line) -- cgit v0.10.2 From ce6f4fab4059cd72638a0cfa596a8ee2c79c1c8e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Feb 2011 13:27:39 -0200 Subject: perf annotate: Move locking to struct annotation Since we'll need it when implementing the live annotate TUI browser. This also simplifies things a bit by having the list head for the source code to be in the dynamicly allocated part of struct annotation, that way we don't have to pass it around, it can be found from the struct symbol that is passed everywhere. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index ea6a116..4271829 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -62,7 +62,8 @@ static int hists__add_entry(struct hists *self, struct addr_location *al) * All aggregated on the first sym_hist. */ struct annotation *notes = symbol__annotation(he->ms.sym); - if (notes->histograms == NULL && symbol__alloc_hist(he->ms.sym, 1) < 0) + if (notes->src == NULL && + symbol__alloc_hist(he->ms.sym, 1) < 0) return -ENOMEM; return hist_entry__inc_addr_samples(he, 0, al->addr); @@ -77,7 +78,8 @@ static int process_sample_event(union perf_event *event, { struct addr_location al; - if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) { + if (perf_event__preprocess_sample(event, session, &al, sample, + symbol__annotate_init) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -111,7 +113,7 @@ static void hists__find_annotations(struct hists *self) goto find_next; notes = symbol__annotation(he->ms.sym); - if (notes->histograms == NULL) { + if (notes->src == NULL) { find_next: if (key == KEY_LEFT) nd = rb_prev(nd); @@ -142,11 +144,11 @@ find_next: nd = rb_next(nd); /* * Since we have a hist_entry per IP for the same - * symbol, free he->ms.sym->histogram to signal we already + * symbol, free he->ms.sym->src to signal we already * processed this symbol. */ - free(notes->histograms); - notes->histograms = NULL; + free(notes->src); + notes->src = NULL; } } } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index de06bf5..f403ace 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -123,7 +123,7 @@ static int perf_session__add_hist_entry(struct perf_session *session, * All aggregated on the first sym_hist. */ struct annotation *notes = symbol__annotation(he->ms.sym); - if (notes->histograms == NULL && + if (notes->src == NULL && symbol__alloc_hist(he->ms.sym, 1) < 0) err = -ENOMEM; else @@ -166,7 +166,8 @@ static int process_sample_event(union perf_event *event, struct addr_location al; struct perf_event_attr *attr; - if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) { + if (perf_event__preprocess_sample(event, session, &al, sample, + symbol__annotate_init) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b790673..7dbf22d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -139,7 +139,7 @@ static void sig_winch_handler(int sig __used) static int parse_source(struct sym_entry *syme) { struct symbol *sym; - struct sym_entry_source *source; + struct annotation *notes; struct map *map; int err = -1; @@ -152,39 +152,35 @@ static int parse_source(struct sym_entry *syme) /* * We can't annotate with just /proc/kallsyms */ - if (map->dso->origin == DSO__ORIG_KERNEL) + if (map->dso->origin == DSO__ORIG_KERNEL) { + pr_err("Can't annotate %s: No vmlinux file was found in the " + "path\n", sym->name); + sleep(1); return -1; - - if (syme->src == NULL) { - syme->src = zalloc(sizeof(*source)); - if (syme->src == NULL) - return -1; - pthread_mutex_init(&syme->src->lock, NULL); - INIT_LIST_HEAD(&syme->src->head); } - source = syme->src; - - if (symbol__annotation(sym)->histograms != NULL) { - pthread_mutex_lock(&source->lock); + notes = symbol__annotation(sym); + if (notes->src != NULL) { + pthread_mutex_lock(¬es->lock); goto out_assign; } - pthread_mutex_lock(&source->lock); + pthread_mutex_lock(¬es->lock); if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) { pr_err("Not enough memory for annotating '%s' symbol!\n", sym->name); + sleep(1); goto out_unlock; } - err = symbol__annotate(sym, syme->map, &source->head, 0); + err = symbol__annotate(sym, syme->map, 0); if (err == 0) { out_assign: sym_filter_entry = syme; } out_unlock: - pthread_mutex_unlock(&source->lock); + pthread_mutex_unlock(¬es->lock); return err; } @@ -196,20 +192,27 @@ static void __zero_source_counters(struct sym_entry *syme) static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) { + struct annotation *notes; + struct symbol *sym; + if (syme != sym_filter_entry) return; - if (pthread_mutex_trylock(&syme->src->lock)) + sym = sym_entry__symbol(syme); + notes = symbol__annotation(sym); + + if (pthread_mutex_trylock(¬es->lock)) return; ip = syme->map->map_ip(syme->map, ip); - symbol__inc_addr_samples(sym_entry__symbol(syme), syme->map, counter, ip); + symbol__inc_addr_samples(sym, syme->map, counter, ip); - pthread_mutex_unlock(&syme->src->lock); + pthread_mutex_unlock(¬es->lock); } static void show_details(struct sym_entry *syme) { + struct annotation *notes; struct symbol *symbol; int more; @@ -217,24 +220,26 @@ static void show_details(struct sym_entry *syme) return; symbol = sym_entry__symbol(syme); - if (!syme->src || symbol__annotation(symbol)->histograms == NULL) - return; + notes = symbol__annotation(symbol); + + pthread_mutex_lock(¬es->lock); + + if (notes->src == NULL) + goto out_unlock; printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name); printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); - pthread_mutex_lock(&syme->src->lock); - more = symbol__annotate_printf(symbol, syme->map, &syme->src->head, - top.sym_evsel->idx, 0, sym_pcnt_filter, - top.print_entries); + more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx, + 0, sym_pcnt_filter, top.print_entries); if (top.zero) symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx); else - symbol__annotate_decay_histogram(symbol, &syme->src->head, - top.sym_evsel->idx); - pthread_mutex_unlock(&syme->src->lock); + symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx); if (more != 0) printf("%d lines not displayed, maybe increase display entries [e]\n", more); +out_unlock: + pthread_mutex_unlock(¬es->lock); } static const char CONSOLE_CLEAR[] = ""; @@ -372,10 +377,8 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) /* zero counters of active symbol */ if (syme) { - pthread_mutex_lock(&syme->src->lock); __zero_source_counters(syme); *target = NULL; - pthread_mutex_unlock(&syme->src->lock); } fprintf(stdout, "\n%s: ", msg); @@ -554,10 +557,8 @@ static void handle_keypress(struct perf_session *session, int c) else { struct sym_entry *syme = sym_filter_entry; - pthread_mutex_lock(&syme->src->lock); sym_filter_entry = NULL; __zero_source_counters(syme); - pthread_mutex_unlock(&syme->src->lock); } break; case 'U': @@ -653,7 +654,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) syme = symbol__priv(sym); syme->map = map; - syme->src = NULL; + symbol__annotate_init(map, sym); if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { /* schedule initial sym_filter_entry setup */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 6db4351..c777bda 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -14,25 +14,39 @@ #include "symbol.h" #include "debug.h" #include "annotate.h" +#include -int symbol__alloc_hist(struct symbol *sym, int nevents) +int symbol__annotate_init(struct map *map __used, struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); + pthread_mutex_init(¬es->lock, NULL); + return 0; +} - notes->sizeof_sym_hist = (sizeof(*notes->histograms) + +int symbol__alloc_hist(struct symbol *sym, int nevents) +{ + struct annotation *notes = symbol__annotation(sym); + size_t sizeof_sym_hist = (sizeof(struct sym_hist) + (sym->end - sym->start) * sizeof(u64)); - notes->histograms = calloc(nevents, notes->sizeof_sym_hist); - notes->nr_histograms = nevents; - return notes->histograms == NULL ? -1 : 0; + + notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist); + if (notes->src == NULL) + return -1; + notes->src->sizeof_sym_hist = sizeof_sym_hist; + notes->src->nr_histograms = nevents; + INIT_LIST_HEAD(¬es->src->source); + return 0; } void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); - if (notes->histograms != NULL) - memset(notes->histograms, 0, - notes->nr_histograms * notes->sizeof_sym_hist); + pthread_mutex_lock(¬es->lock); + if (notes->src != NULL) + memset(notes->src->histograms, 0, + notes->src->nr_histograms * notes->src->sizeof_sym_hist); + pthread_mutex_unlock(¬es->lock); } int symbol__inc_addr_samples(struct symbol *sym, struct map *map, @@ -43,7 +57,7 @@ int symbol__inc_addr_samples(struct symbol *sym, struct map *map, struct sym_hist *h; notes = symbol__annotation(sym); - if (notes->histograms == NULL) + if (notes->src == NULL) return -ENOMEM; pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr)); @@ -95,8 +109,7 @@ struct objdump_line *objdump__get_next_ip_line(struct list_head *head, return NULL; } -static int objdump_line__print(struct objdump_line *oline, - struct list_head *head, struct symbol *sym, +static int objdump_line__print(struct objdump_line *oline, struct symbol *sym, int evidx, u64 len, int min_pcnt, int printed, int max_lines) { @@ -109,10 +122,12 @@ static int objdump_line__print(struct objdump_line *oline, double percent = 0.0; const char *color; struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src_line; + struct source_line *src_line = notes->src->lines; struct sym_hist *h = annotation__histogram(notes, evidx); s64 offset = oline->offset; - struct objdump_line *next = objdump__get_next_ip_line(head, oline); + struct objdump_line *next; + + next = objdump__get_next_ip_line(¬es->src->source, oline); while (offset < (s64)len && (next == NULL || offset < next->offset)) { @@ -166,9 +181,10 @@ static int objdump_line__print(struct objdump_line *oline, return 0; } -static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, FILE *file, - struct list_head *head, size_t privsize) +static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, + FILE *file, size_t privsize) { + struct annotation *notes = symbol__annotation(sym); struct objdump_line *objdump_line; char *line = NULL, *tmp, *tmp2, *c; size_t line_len; @@ -222,13 +238,12 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, FILE free(line); return -1; } - objdump__add_line(head, objdump_line); + objdump__add_line(¬es->src->source, objdump_line); return 0; } -int symbol__annotate(struct symbol *sym, struct map *map, - struct list_head *head, size_t privsize) +int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) { struct dso *dso = map->dso; char *filename = dso__build_id_filename(dso, NULL, 0); @@ -297,7 +312,7 @@ fallback: goto out_free_filename; while (!feof(file)) - if (symbol__parse_objdump_line(sym, map, file, head, privsize) < 0) + if (symbol__parse_objdump_line(sym, map, file, privsize) < 0) break; pclose(file); @@ -330,14 +345,14 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin static void symbol__free_source_line(struct symbol *sym, int len) { struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src_line; + struct source_line *src_line = notes->src->lines; int i; for (i = 0; i < len; i++) free(src_line[i].path); free(src_line); - notes->src_line = NULL; + notes->src->lines = NULL; } /* Get the filename:line for the colored entries */ @@ -355,8 +370,8 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, if (!h->sum) return 0; - src_line = notes->src_line = calloc(len, sizeof(struct source_line)); - if (!notes->src_line) + src_line = notes->src->lines = calloc(len, sizeof(struct source_line)); + if (!notes->src->lines) return -1; start = map->unmap_ip(map, sym->start); @@ -436,12 +451,12 @@ static void symbol__annotate_hits(struct symbol *sym, int evidx) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum); } -int symbol__annotate_printf(struct symbol *sym, struct map *map, - struct list_head *head, int evidx, bool full_paths, - int min_pcnt, int max_lines) +int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, + bool full_paths, int min_pcnt, int max_lines) { struct dso *dso = map->dso; const char *filename = dso->long_name, *d_filename; + struct annotation *notes = symbol__annotation(sym); struct objdump_line *pos; int printed = 2; int more = 0; @@ -460,8 +475,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose) symbol__annotate_hits(sym, evidx); - list_for_each_entry(pos, head, node) { - switch (objdump_line__print(pos, head, sym, evidx, len, min_pcnt, + list_for_each_entry(pos, ¬es->src->source, node) { + switch (objdump_line__print(pos, sym, evidx, len, min_pcnt, printed, max_lines)) { case 0: ++printed; @@ -485,11 +500,10 @@ void symbol__annotate_zero_histogram(struct symbol *sym, int evidx) struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evidx); - memset(h, 0, notes->sizeof_sym_hist); + memset(h, 0, notes->src->sizeof_sym_hist); } -void symbol__annotate_decay_histogram(struct symbol *sym, - struct list_head *head, int evidx) +void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) { struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evidx); @@ -497,7 +511,7 @@ void symbol__annotate_decay_histogram(struct symbol *sym, h->sum = 0; - list_for_each_entry(pos, head, node) { + list_for_each_entry(pos, ¬es->src->source, node) { if (pos->offset != -1) { h->addr[pos->offset] = h->addr[pos->offset] * 7 / 8; h->sum += h->addr[pos->offset]; @@ -522,10 +536,9 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, struct dso *dso = map->dso; const char *filename = dso->long_name; struct rb_root source_line = RB_ROOT; - LIST_HEAD(head); u64 len; - if (symbol__annotate(sym, map, &head, 0) < 0) + if (symbol__annotate(sym, map, 0) < 0) return -1; len = sym->end - sym->start; @@ -536,12 +549,12 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, print_summary(&source_line, filename); } - symbol__annotate_printf(sym, map, &head, evidx, full_paths, + symbol__annotate_printf(sym, map, evidx, full_paths, min_pcnt, max_lines); if (print_lines) symbol__free_source_line(sym, len); - objdump_line_list__purge(&head); + objdump_line_list__purge(&symbol__annotation(sym)->src->source); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index bc08b36..b237c86 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -28,22 +28,29 @@ struct source_line { char *path; }; -/** struct annotation - symbols with hits have this attached as in sannotation +/** struct annotated_source - symbols with hits have this attached as in sannotation * * @histogram: Array of addr hit histograms per event being monitored - * @src_line: If 'print_lines' is specified, per source code line percentages + * @lines: If 'print_lines' is specified, per source code line percentages + * @source: source parsed from objdump -dS * - * src_line is allocated, percentages calculated and all sorted by percentage + * lines is allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for * one of the entries in the histogram array, i.e. for the event/counter being * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate * returns. */ -struct annotation { - struct source_line *src_line; - struct sym_hist *histograms; +struct annotated_source { + struct list_head source; + struct source_line *lines; int nr_histograms; int sizeof_sym_hist; + struct sym_hist histograms[0]; +}; + +struct annotation { + pthread_mutex_t lock; + struct annotated_source *src; }; struct sannotation { @@ -53,7 +60,8 @@ struct sannotation { static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) { - return ((void *)notes->histograms) + (notes->sizeof_sym_hist * idx); + return (((void *)¬es->src->histograms) + + (notes->src->sizeof_sym_hist * idx)); } static inline struct annotation *symbol__annotation(struct symbol *sym) @@ -67,14 +75,12 @@ int symbol__inc_addr_samples(struct symbol *sym, struct map *map, int symbol__alloc_hist(struct symbol *sym, int nevents); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__annotate(struct symbol *sym, struct map *map, - struct list_head *head, size_t privsize); -int symbol__annotate_printf(struct symbol *sym, struct map *map, - struct list_head *head, int evidx, bool full_paths, - int min_pcnt, int max_lines); +int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); +int symbol__annotate_init(struct map *map __used, struct symbol *sym); +int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, + bool full_paths, int min_pcnt, int max_lines); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); -void symbol__annotate_decay_histogram(struct symbol *sym, - struct list_head *head, int evidx); +void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void objdump_line_list__purge(struct list_head *head); int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index bac5ab6..3f43723 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -955,10 +955,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip) return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip); } -int hist_entry__annotate(struct hist_entry *he, struct list_head *head, - size_t privsize) +int hist_entry__annotate(struct hist_entry *he, size_t privsize) { - return symbol__annotate(he->ms.sym, he->ms.map, head, privsize); + return symbol__annotate(he->ms.sym, he->ms.map, privsize); } void hists__inc_nr_events(struct hists *self, u32 type) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2c6cdae..37c7908 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -78,8 +78,7 @@ size_t hists__fprintf(struct hists *self, struct hists *pair, bool show_displacement, FILE *fp); int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr); -int hist_entry__annotate(struct hist_entry *self, struct list_head *head, - size_t privsize); +int hist_entry__annotate(struct hist_entry *self, size_t privsize); void hists__filter_by_dso(struct hists *self, const struct dso *dso); void hists__filter_by_thread(struct hists *self, const struct thread *thread); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 62e3293..4f769f4 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -11,11 +11,6 @@ struct perf_evlist; struct perf_evsel; -struct sym_entry_source { - struct list_head head; - pthread_mutex_t lock; -}; - struct sym_entry { struct rb_node rb_node; struct list_head node; @@ -24,7 +19,6 @@ struct sym_entry { int skip; u8 origin; struct map *map; - struct sym_entry_source *src; unsigned long count[0]; }; diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 8d8a168..1aa3965 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -60,7 +60,6 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro } static double objdump_line__calc_percent(struct objdump_line *self, - struct list_head *head, struct symbol *sym, int evidx) { double percent = 0.0; @@ -69,11 +68,12 @@ static double objdump_line__calc_percent(struct objdump_line *self, int len = sym->end - sym->start; unsigned int hits = 0; struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src_line; + struct source_line *src_line = notes->src->lines; struct sym_hist *h = annotation__histogram(notes, evidx); s64 offset = self->offset; - struct objdump_line *next = objdump__get_next_ip_line(head, self); + struct objdump_line *next; + next = objdump__get_next_ip_line(¬es->src->source, self); while (offset < (s64)len && (next == NULL || offset < next->offset)) { if (src_line) { @@ -192,10 +192,10 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx) { struct objdump_line *pos, *n; struct objdump_line_rb_node *rbpos; - LIST_HEAD(head); + struct annotation *notes = symbol__annotation(sym); struct annotate_browser browser = { .b = { - .entries = &head, + .entries = ¬es->src->source, .refresh = ui_browser__list_head_refresh, .seek = ui_browser__list_head_seek, .write = annotate_browser__write, @@ -210,20 +210,20 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx) if (map->dso->annotate_warned) return -1; - if (symbol__annotate(sym, map, &head, sizeof(*rbpos)) < 0) { + if (symbol__annotate(sym, map, sizeof(*rbpos)) < 0) { ui__error_window(ui_helpline__last_msg); return -1; } ui_helpline__push("Press <- or ESC to exit"); - list_for_each_entry(pos, &head, node) { + list_for_each_entry(pos, ¬es->src->source, node) { size_t line_len = strlen(pos->line); if (browser.b.width < line_len) browser.b.width = line_len; rbpos = objdump_line__rb(pos); rbpos->idx = browser.b.nr_entries++; - rbpos->percent = objdump_line__calc_percent(pos, &head, sym, evidx); + rbpos->percent = objdump_line__calc_percent(pos, sym, evidx); if (rbpos->percent < 0.01) continue; objdump__insert_line(&browser.entries, rbpos); @@ -238,7 +238,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx) browser.b.width += 18; /* Percentage */ ret = annotate_browser__run(&browser); - list_for_each_entry_safe(pos, n, &head, node) { + list_for_each_entry_safe(pos, n, ¬es->src->source, node) { list_del(&pos->node); objdump_line__free(pos); } -- cgit v0.10.2 From d5e3d747007fdb541e57ed72e020ff0b94db3470 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Feb 2011 15:29:25 -0200 Subject: perf annotate: Fix annotate context lines regression The live annotation done in 'perf top' needs to limit the context before lines that aren't filtered out by the min percent filter, if we don't do that, the screen in a tty often is not enough for showing what is interesting: lines with hits and a few source code lines before it. Reported-by: Mike Galbraith Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7dbf22d..210c736 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -231,7 +231,7 @@ static void show_details(struct sym_entry *syme) printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx, - 0, sym_pcnt_filter, top.print_entries); + 0, sym_pcnt_filter, top.print_entries, 4); if (top.zero) symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx); else diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index c777bda..02976b8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -111,7 +111,8 @@ struct objdump_line *objdump__get_next_ip_line(struct list_head *head, static int objdump_line__print(struct objdump_line *oline, struct symbol *sym, int evidx, u64 len, int min_pcnt, - int printed, int max_lines) + int printed, int max_lines, + struct objdump_line *queue) { static const char *prev_line; static const char *prev_color; @@ -150,6 +151,15 @@ static int objdump_line__print(struct objdump_line *oline, struct symbol *sym, if (max_lines && printed >= max_lines) return 1; + if (queue != NULL) { + list_for_each_entry_from(queue, ¬es->src->source, node) { + if (queue == oline) + break; + objdump_line__print(queue, sym, evidx, len, + 0, 0, 1, NULL); + } + } + color = get_percent_color(percent); /* @@ -172,6 +182,9 @@ static int objdump_line__print(struct objdump_line *oline, struct symbol *sym, } else if (max_lines && printed >= max_lines) return 1; else { + if (queue) + return -1; + if (!*oline->line) printf(" :\n"); else @@ -452,13 +465,14 @@ static void symbol__annotate_hits(struct symbol *sym, int evidx) } int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, - bool full_paths, int min_pcnt, int max_lines) + bool full_paths, int min_pcnt, int max_lines, + int context) { struct dso *dso = map->dso; const char *filename = dso->long_name, *d_filename; struct annotation *notes = symbol__annotation(sym); - struct objdump_line *pos; - int printed = 2; + struct objdump_line *pos, *queue = NULL; + int printed = 2, queue_len = 0; int more = 0; u64 len; @@ -476,10 +490,20 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, symbol__annotate_hits(sym, evidx); list_for_each_entry(pos, ¬es->src->source, node) { + if (context && queue == NULL) { + queue = pos; + queue_len = 0; + } + switch (objdump_line__print(pos, sym, evidx, len, min_pcnt, - printed, max_lines)) { + printed, max_lines, queue)) { case 0: ++printed; + if (context) { + printed += queue_len; + queue = NULL; + queue_len = 0; + } break; case 1: /* filtered by max_lines */ @@ -487,7 +511,16 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, break; case -1: default: - /* filtered by min_pcnt */ + /* + * Filtered by min_pcnt or non IP lines when + * context != 0 + */ + if (!context) + break; + if (queue_len == context) + queue = list_entry(queue->node.next, typeof(*queue), node); + else + ++queue_len; break; } } @@ -550,7 +583,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, } symbol__annotate_printf(sym, map, evidx, full_paths, - min_pcnt, max_lines); + min_pcnt, max_lines, 0); if (print_lines) symbol__free_source_line(sym, len); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index b237c86..e848803 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -78,7 +78,8 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); int symbol__annotate_init(struct map *map __used, struct symbol *sym); int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx, - bool full_paths, int min_pcnt, int max_lines); + bool full_paths, int min_pcnt, int max_lines, + int context); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void objdump_line_list__purge(struct list_head *head); -- cgit v0.10.2 From cce1dac871f387d0f3da81440d85bd387d8fd5a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 24 Jan 2011 21:12:01 +0100 Subject: trivial: Fix Steven's Copyright typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OK, the copyright allows you to write a copy, still I think the lawyers prefer the correct spelling. Signed-off-by: Uwe Kleine-König LKML-Reference: <1295899921-11333-1-git-send-email-u.kleine-koenig@pengutronix.de> Signed-off-by: Steven Rostedt diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl index fd81fc3..a4fe923 100644 --- a/scripts/kconfig/streamline_config.pl +++ b/scripts/kconfig/streamline_config.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl -w # -# Copywrite 2005-2009 - Steven Rostedt +# Copyright 2005-2009 - Steven Rostedt # Licensed under the terms of the GNU GPL License version 2 # # It's simple enough to figure out how this works. diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index e1c62ee..ba7c63a 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl -w # -# Copywrite 2010 - Steven Rostedt , Red Hat Inc. +# Copyright 2010 - Steven Rostedt , Red Hat Inc. # Licensed under the terms of the GNU GPL License version 2 # -- cgit v0.10.2 From f4d5c029bd6731baac0937324cef0f746e7d5ea7 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 26 Jan 2011 16:49:00 +0800 Subject: tracing: Compile time initialization for event flags value Compile time initialization is better than runtime initialization. Remove many early_initcall()s and many trace_init_flags_##name()s. Acked-by: Frederic Weisbecker Signed-off-by: Lai Jiangshan LKML-Reference: <4D3FDFFC.6030304@cn.fujitsu.com> Signed-off-by: Steven Rostedt diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8e8968e..a17fcea 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -132,11 +132,11 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_enter, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ + .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) \ - *__event_enter_##sname = &event_enter_##sname; \ - __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY) + *__event_enter_##sname = &event_enter_##sname; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static struct syscall_metadata __syscall_meta_##sname; \ @@ -146,11 +146,11 @@ extern struct trace_event_functions exit_syscall_print_funcs; .class = &event_class_syscall_exit, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ + .flags = TRACE_EVENT_FL_CAP_ANY, \ }; \ static struct ftrace_event_call __used \ __attribute__((section("_ftrace_events"))) \ - *__event_exit_##sname = &event_exit_##sname; \ - __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY) + *__event_exit_##sname = &event_exit_##sname; #define SYSCALL_METADATA(sname, nb) \ SYSCALL_TRACE_ENTER_EVENT(sname); \ -- cgit v0.10.2 From 87d80de2800d087ea833cb79bc13f85ff34ed49f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Feb 2011 13:19:49 -0500 Subject: tracing: Remove obsolete sched_switch tracer The trace events sched_switch and sched_wakeup do the same thing as the stand alone sched_switch tracer does. It is no longer needed. Signed-off-by: Steven Rostedt diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 557c1ed..65eddb7 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -202,10 +202,6 @@ Here is the list of current tracers that may be configured. to draw a graph of function calls similar to C code source. - "sched_switch" - - Traces the context switches and wakeups between tasks. - "irqsoff" Traces the areas that disable interrupts and saves @@ -273,39 +269,6 @@ format, the function name that was traced "path_put" and the parent function that called this function "path_walk". The timestamp is the time at which the function was entered. -The sched_switch tracer also includes tracing of task wakeups -and context switches. - - ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S - ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S - ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R - events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R - kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R - ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R - -Wake ups are represented by a "+" and the context switches are -shown as "==>". The format is: - - Context switches: - - Previous task Next Task - - :: ==> :: - - Wake ups: - - Current task Task waking up - - :: + :: - -The prio is the internal kernel priority, which is the inverse -of the priority that is usually displayed by user-space tools. -Zero represents the highest priority (99). Prio 100 starts the -"nice" priorities with 100 being equal to nice -20 and 139 being -nice 19. The prio "140" is reserved for the idle task which is -the lowest priority thread (pid 0). - - Latency trace format -------------------- @@ -491,79 +454,6 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] latencies, as described in "Latency trace format". -sched_switch ------------- - -This tracer simply records schedule switches. Here is an example -of how to use it. - - # echo sched_switch > current_tracer - # echo 1 > tracing_enabled - # sleep 1 - # echo 0 > tracing_enabled - # cat trace - -# tracer: sched_switch -# -# TASK-PID CPU# TIMESTAMP FUNCTION -# | | | | | - bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R - bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R - sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R - bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S - bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R - sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R - bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D - bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R - -0 [00] 240.132589: 0:140:R + 4:115:S - -0 [00] 240.132591: 0:140:R ==> 4:115:R - ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R - -0 [00] 240.132598: 0:140:R + 4:115:S - -0 [00] 240.132599: 0:140:R ==> 4:115:R - ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R - sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R - [...] - - -As we have discussed previously about this format, the header -shows the name of the trace and points to the options. The -"FUNCTION" is a misnomer since here it represents the wake ups -and context switches. - -The sched_switch file only lists the wake ups (represented with -'+') and context switches ('==>') with the previous task or -current task first followed by the next task or task waking up. -The format for both of these is PID:KERNEL-PRIO:TASK-STATE. -Remember that the KERNEL-PRIO is the inverse of the actual -priority with zero (0) being the highest priority and the nice -values starting at 100 (nice -20). Below is a quick chart to map -the kernel priority to user land priorities. - - Kernel Space User Space - =============================================================== - 0(high) to 98(low) user RT priority 99(high) to 1(low) - with SCHED_RR or SCHED_FIFO - --------------------------------------------------------------- - 99 sched_priority is not used in scheduling - decisions(it must be specified as 0) - --------------------------------------------------------------- - 100(high) to 139(low) user nice -20(high) to 19(low) - --------------------------------------------------------------- - 140 idle task priority - --------------------------------------------------------------- - -The task states are: - - R - running : wants to run, may not actually be running - S - sleep : process is waiting to be woken up (handles signals) - D - disk sleep (uninterruptible sleep) : process must be woken up - (ignores signals) - T - stopped : process suspended - t - traced : process is being traced (with something like gdb) - Z - zombie : process waiting to be cleaned up - X - unknown - - ftrace_enabled -------------- diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 8f758d0..7e62c0a 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -247,51 +247,3 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr) ctx_trace = tr; } -static void stop_sched_trace(struct trace_array *tr) -{ - tracing_stop_sched_switch_record(); -} - -static int sched_switch_trace_init(struct trace_array *tr) -{ - ctx_trace = tr; - tracing_reset_online_cpus(tr); - tracing_start_sched_switch_record(); - return 0; -} - -static void sched_switch_trace_reset(struct trace_array *tr) -{ - if (sched_ref) - stop_sched_trace(tr); -} - -static void sched_switch_trace_start(struct trace_array *tr) -{ - sched_stopped = 0; -} - -static void sched_switch_trace_stop(struct trace_array *tr) -{ - sched_stopped = 1; -} - -static struct tracer sched_switch_trace __read_mostly = -{ - .name = "sched_switch", - .init = sched_switch_trace_init, - .reset = sched_switch_trace_reset, - .start = sched_switch_trace_start, - .stop = sched_switch_trace_stop, - .wait_pipe = poll_wait_pipe, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_sched_switch, -#endif -}; - -__init static int init_sched_switch_trace(void) -{ - return register_tracer(&sched_switch_trace); -} -device_initcall(init_sched_switch_trace); - -- cgit v0.10.2 From 6752ab4a9c30d5411b2dfdb251a3f1cb18aae487 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Feb 2011 13:54:06 -0500 Subject: tracing: Deprecate tracing_enabled for tracing_on tracing_enabled should not be used, it is heavy weight and does not do much in helping lower the overhead. tracing_on should be used instead. Warn users to use tracing_on when tracing_enabled is used as it will soon be removed from the tracing directory. Signed-off-by: Steven Rostedt diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 65eddb7..67f1cc4 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -80,11 +80,11 @@ of ftrace. Here is a list of some of the key files: tracers listed here can be configured by echoing their name into current_tracer. - tracing_enabled: + tracing_on: - This sets or displays whether the current_tracer - is activated and tracing or not. Echo 0 into this - file to disable the tracer or 1 to enable it. + This sets or displays whether writing to the trace + ring buffer is enabled. Echo 0 into this file to disable + the tracer or 1 to enable it. trace: @@ -497,10 +497,10 @@ an example: # echo irqsoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: irqsoff # @@ -605,10 +605,10 @@ is much like the irqsoff tracer. # echo preemptoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: preemptoff # @@ -753,10 +753,10 @@ tracers. # echo preemptirqsoff > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # ls -ltr [...] - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: preemptirqsoff # @@ -916,9 +916,9 @@ Instead of performing an 'ls', we will run 'sleep 1' under # echo wakeup > current_tracer # echo latency-format > trace_options # echo 0 > tracing_max_latency - # echo 1 > tracing_enabled + # echo 1 > tracing_on # chrt -f 5 sleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: wakeup # @@ -1030,9 +1030,9 @@ ftrace_enabled is set; otherwise this tracer is a nop. # sysctl kernel.ftrace_enabled=1 # echo function > current_tracer - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: function # @@ -1070,7 +1070,7 @@ int trace_fd; [...] int main(int argc, char *argv[]) { [...] - trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY); + trace_fd = open(tracing_file("tracing_on"), O_WRONLY); [...] if (condition_hit()) { write(trace_fd, "0", 1); @@ -1521,9 +1521,9 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt: # echo sys_nanosleep hrtimer_interrupt \ > set_ftrace_filter # echo function > current_tracer - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: ftrace # @@ -1769,9 +1769,9 @@ different. The trace is live. # echo function > current_tracer # cat trace_pipe > /tmp/trace.out & [1] 4153 - # echo 1 > tracing_enabled + # echo 1 > tracing_on # usleep 1 - # echo 0 > tracing_enabled + # echo 0 > tracing_on # cat trace # tracer: function # diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dc53ecb..8dc8da6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2710,6 +2710,10 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf, mutex_lock(&trace_types_lock); if (tracer_enabled ^ val) { + + /* Only need to warn if this is used to change the state */ + WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on"); + if (val) { tracer_enabled = 1; if (current_trace->start) -- cgit v0.10.2 From 0849327d13a0bd7f6512b7c21f4b3e79efb2076d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Feb 2011 12:09:54 -0200 Subject: perf report: Fix initializion of annotate symbol priv area We only allocate it when in TUI mode. In --stdio mode unconditionally initializing this area leads to memory corruption. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f403ace..f9a99a1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -44,6 +44,7 @@ static const char default_pretty_printing_style[] = "normal"; static const char *pretty_printing_style = default_pretty_printing_style; static char callchain_default_opt[] = "fractal,0.5"; +static symbol_filter_t annotate_init; static struct hists *perf_session__hists_findnew(struct perf_session *self, u64 event_stream, u32 type, @@ -167,7 +168,7 @@ static int process_sample_event(union perf_event *event, struct perf_event_attr *attr; if (perf_event__preprocess_sample(event, session, &al, sample, - symbol__annotate_init) < 0) { + annotate_init) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -520,6 +521,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) */ if (use_browser > 0) { symbol_conf.priv_size = sizeof(struct annotation); + annotate_init = symbol__annotate_init; /* * For searching by name on the "Browse map details". * providing it only in verbose mode not to bloat too -- cgit v0.10.2 From 868baf07b1a259f5f3803c1dc2777b6c358f83cf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Feb 2011 21:26:13 -0500 Subject: ftrace: Fix memory leak with function graph and cpu hotplug When the fuction graph tracer starts, it needs to make a special stack for each task to save the real return values of the tasks. All running tasks have this stack created, as well as any new tasks. On CPU hot plug, the new idle task will allocate a stack as well when init_idle() is called. The problem is that cpu hotplug does not create a new idle_task. Instead it uses the idle task that existed when the cpu went down. ftrace_graph_init_task() will add a new ret_stack to the task that is given to it. Because a clone will make the task have a stack of its parent it does not check if the task's ret_stack is already NULL or not. When the CPU hotplug code starts a CPU up again, it will allocate a new stack even though one already existed for it. The solution is to treat the idle_task specially. In fact, the function_graph code already does, just not at init_idle(). Instead of using the ftrace_graph_init_task() for the idle task, which that function expects the task to be a clone, have a separate ftrace_graph_init_idle_task(). Also, we will create a per_cpu ret_stack that is used by the idle task. When we call ftrace_graph_init_idle_task() it will check if the idle task's ret_stack is NULL, if it is, then it will assign it the per_cpu ret_stack. Reported-by: Benjamin Herrenschmidt Suggested-by: Peter Zijlstra Cc: Stable Tree Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index dcd6a7c..ca29e03 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -428,6 +428,7 @@ extern void unregister_ftrace_graph(void); extern void ftrace_graph_init_task(struct task_struct *t); extern void ftrace_graph_exit_task(struct task_struct *t); +extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu); static inline int task_curr_ret_stack(struct task_struct *t) { @@ -451,6 +452,7 @@ static inline void unpause_graph_tracing(void) static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } +static inline void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { } static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc) diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4..fbe86cb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5571,7 +5571,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; - ftrace_graph_init_task(idle); + ftrace_graph_init_idle_task(idle, cpu); } /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f3dadae..888b611 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3328,7 +3328,7 @@ static int start_graph_tracing(void) /* The cpu_boot init_task->ret_stack will never be freed */ for_each_online_cpu(cpu) { if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_task(idle_task(cpu)); + ftrace_graph_init_idle_task(idle_task(cpu), cpu); } do { @@ -3418,6 +3418,49 @@ void unregister_ftrace_graph(void) mutex_unlock(&ftrace_lock); } +static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); + +static void +graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) +{ + atomic_set(&t->tracing_graph_pause, 0); + atomic_set(&t->trace_overrun, 0); + t->ftrace_timestamp = 0; + /* make curr_ret_stack visable before we add the ret_stack */ + smp_wmb(); + t->ret_stack = ret_stack; +} + +/* + * Allocate a return stack for the idle task. May be the first + * time through, or it may be done by CPU hotplug online. + */ +void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) +{ + t->curr_ret_stack = -1; + /* + * The idle task has no parent, it either has its own + * stack or no stack at all. + */ + if (t->ret_stack) + WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu)); + + if (ftrace_graph_active) { + struct ftrace_ret_stack *ret_stack; + + ret_stack = per_cpu(idle_ret_stack, cpu); + if (!ret_stack) { + ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!ret_stack) + return; + per_cpu(idle_ret_stack, cpu) = ret_stack; + } + graph_init_task(t, ret_stack); + } +} + /* Allocate a return stack for newly created task */ void ftrace_graph_init_task(struct task_struct *t) { @@ -3433,12 +3476,7 @@ void ftrace_graph_init_task(struct task_struct *t) GFP_KERNEL); if (!ret_stack) return; - atomic_set(&t->tracing_graph_pause, 0); - atomic_set(&t->trace_overrun, 0); - t->ftrace_timestamp = 0; - /* make curr_ret_stack visable before we add the ret_stack */ - smp_wmb(); - t->ret_stack = ret_stack; + graph_init_task(t, ret_stack); } } -- cgit v0.10.2 From 0de4b34d466bae571b50f41c7296b85248205e35 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 14 Feb 2011 14:48:07 +0900 Subject: tracing/kprobe: Fix NULL pointer deref check Add NULL check for avoiding NULL pointer deref. This bug has been introduced by: 1ff511e35ed8: tracing/kprobes: Add bitfield type which causes a null pointer dereference bug when kprobe-tracer parses an argument without type. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Peter Zijlstra LKML-Reference: <20110214054807.8919.69740.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Ingo Molnar Reported-by: Arnaldo Carvalho de Melo diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ccdc542..8435b43 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -935,7 +935,7 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp, parg->offset = tp->size; tp->size += parg->type->size; ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); - if (ret >= 0) + if (ret >= 0 && t != NULL) ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch); if (ret >= 0) { parg->fetch_size.fn = get_fetch_size_function(parg->type, -- cgit v0.10.2 From d41d5a01631af821d3a3447e6613a316f5ee6c25 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 7 Feb 2011 17:02:20 +0100 Subject: cgroup: Fix cgroup_subsys::exit callback Make the ::exit method act like ::attach, it is after all very nearly the same thing. The bug had no effect on correctness - fixing it is an optimization for the scheduler. Also, later perf-cgroups patches rely on it. Signed-off-by: Peter Zijlstra Acked-by: Paul Menage LKML-Reference: <1297160655.13327.92.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ce104e3..38117d9 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -474,7 +474,8 @@ struct cgroup_subsys { struct cgroup *old_cgrp, struct task_struct *tsk, bool threadgroup); void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); - void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); + void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup *old_cgrp, struct task_struct *task); int (*populate)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b24d702..f6495f3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4230,20 +4230,8 @@ void cgroup_post_fork(struct task_struct *child) */ void cgroup_exit(struct task_struct *tsk, int run_callbacks) { - int i; struct css_set *cg; - - if (run_callbacks && need_forkexit_callback) { - /* - * modular subsystems can't use callbacks, so no need to lock - * the subsys array - */ - for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { - struct cgroup_subsys *ss = subsys[i]; - if (ss->exit) - ss->exit(ss, tsk); - } - } + int i; /* * Unlink from the css_set task list if necessary. @@ -4261,7 +4249,24 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) task_lock(tsk); cg = tsk->cgroups; tsk->cgroups = &init_css_set; + + if (run_callbacks && need_forkexit_callback) { + /* + * modular subsystems can't use callbacks, so no need to lock + * the subsys array + */ + for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + if (ss->exit) { + struct cgroup *old_cgrp = + rcu_dereference_raw(cg->subsys[i])->cgroup; + struct cgroup *cgrp = task_cgroup(tsk, i); + ss->exit(ss, cgrp, old_cgrp, tsk); + } + } + } task_unlock(tsk); + if (cg) put_css_set_taskexit(cg); } diff --git a/kernel/sched.c b/kernel/sched.c index e142e92..79e611c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -606,9 +606,6 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; struct cgroup_subsys_state *css; - if (p->flags & PF_EXITING) - return &root_task_group; - css = task_subsys_state_check(p, cpu_cgroup_subsys_id, lockdep_is_held(&task_rq(p)->lock)); tg = container_of(css, struct task_group, css); @@ -8863,7 +8860,8 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, } static void -cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) +cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup *old_cgrp, struct task_struct *task) { /* * cgroup_exit() is called in the copy_process() failure path. -- cgit v0.10.2 From e5d1367f17ba6a6fed5fd8b74e4d5720923e0c25 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 14 Feb 2011 11:20:01 +0200 Subject: perf: Add cgroup support This kernel patch adds the ability to filter monitoring based on container groups (cgroups). This is for use in per-cpu mode only. The cgroup to monitor is passed as a file descriptor in the pid argument to the syscall. The file descriptor must be opened to the cgroup name in the cgroup filesystem. For instance, if the cgroup name is foo and cgroupfs is mounted in /cgroup, then the file descriptor is opened to /cgroup/foo. Cgroup mode is activated by passing PERF_FLAG_PID_CGROUP in the flags argument to the syscall. For instance to measure in cgroup foo on CPU1 assuming cgroupfs is mounted under /cgroup: struct perf_event_attr attr; int cgroup_fd, fd; cgroup_fd = open("/cgroup/foo", O_RDONLY); fd = perf_event_open(&attr, cgroup_fd, 1, -1, PERF_FLAG_PID_CGROUP); close(cgroup_fd); Signed-off-by: Stephane Eranian [ added perf_cgroup_{exit,attach} ] Signed-off-by: Peter Zijlstra LKML-Reference: <4d590250.114ddf0a.689e.4482@mx.google.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 38117d9..e654fa2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -627,6 +627,7 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg, /* Get id and depth of css */ unsigned short css_id(struct cgroup_subsys_state *css); unsigned short css_depth(struct cgroup_subsys_state *css); +struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); #else /* !CONFIG_CGROUPS */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ccefff0..cdbfcb8 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -65,4 +65,8 @@ SUBSYS(net_cls) SUBSYS(blkio) #endif +#ifdef CONFIG_CGROUP_PERF +SUBSYS(perf) +#endif + /* */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index dda5b0a..38c8b25 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -464,6 +464,7 @@ enum perf_callchain_context { #define PERF_FLAG_FD_NO_GROUP (1U << 0) #define PERF_FLAG_FD_OUTPUT (1U << 1) +#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */ #ifdef __KERNEL__ /* @@ -471,6 +472,7 @@ enum perf_callchain_context { */ #ifdef CONFIG_PERF_EVENTS +# include # include # include #endif @@ -716,6 +718,22 @@ struct swevent_hlist { #define PERF_ATTACH_GROUP 0x02 #define PERF_ATTACH_TASK 0x04 +#ifdef CONFIG_CGROUP_PERF +/* + * perf_cgroup_info keeps track of time_enabled for a cgroup. + * This is a per-cpu dynamically allocated data structure. + */ +struct perf_cgroup_info { + u64 time; + u64 timestamp; +}; + +struct perf_cgroup { + struct cgroup_subsys_state css; + struct perf_cgroup_info *info; /* timing info, one per cpu */ +}; +#endif + /** * struct perf_event - performance event kernel representation: */ @@ -832,6 +850,11 @@ struct perf_event { struct event_filter *filter; #endif +#ifdef CONFIG_CGROUP_PERF + struct perf_cgroup *cgrp; /* cgroup event is attach to */ + int cgrp_defer_enabled; +#endif + #endif /* CONFIG_PERF_EVENTS */ }; @@ -886,6 +909,7 @@ struct perf_event_context { u64 generation; int pin_count; struct rcu_head rcu_head; + int nr_cgroups; /* cgroup events present */ }; /* @@ -905,6 +929,9 @@ struct perf_cpu_context { struct list_head rotation_list; int jiffies_interval; struct pmu *active_pmu; +#ifdef CONFIG_CGROUP_PERF + struct perf_cgroup *cgrp; +#endif }; struct perf_output_handle { @@ -1040,11 +1067,11 @@ have_event: __perf_sw_event(event_id, nr, nmi, regs, addr); } -extern atomic_t perf_task_events; +extern atomic_t perf_sched_events; static inline void perf_event_task_sched_in(struct task_struct *task) { - COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); + COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task)); } static inline @@ -1052,7 +1079,7 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); - COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); + COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next)); } extern void perf_event_mmap(struct vm_area_struct *vma); diff --git a/init/Kconfig b/init/Kconfig index be788c0..20d6bd9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -683,6 +683,16 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED select this option (if, for some reason, they need to disable it then noswapaccount does the trick). +config CGROUP_PERF + bool "Enable perf_event per-cpu per-container group (cgroup) monitoring" + depends on PERF_EVENTS && CGROUPS + help + This option extends the per-cpu mode to restrict monitoring to + threads which belong to the cgroup specificied and run on the + designated cpu. + + Say N if unsure. + menuconfig CGROUP_SCHED bool "Group CPU scheduler" depends on EXPERIMENTAL diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f6495f3..95362d1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4818,6 +4818,29 @@ css_get_next(struct cgroup_subsys *ss, int id, return ret; } +/* + * get corresponding css from file open on cgroupfs directory + */ +struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) +{ + struct cgroup *cgrp; + struct inode *inode; + struct cgroup_subsys_state *css; + + inode = f->f_dentry->d_inode; + /* check in cgroup filesystem dir */ + if (inode->i_op != &cgroup_dir_inode_operations) + return ERR_PTR(-EBADF); + + if (id < 0 || id >= CGROUP_SUBSYS_COUNT) + return ERR_PTR(-EINVAL); + + /* get cgroup */ + cgrp = __d_cgrp(f->f_dentry); + css = cgrp->subsys[id]; + return css ? css : ERR_PTR(-ENOENT); +} + #ifdef CONFIG_CGROUP_DEBUG static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, struct cgroup *cont) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3d3f282..65dcdc7 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -111,13 +111,23 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) return data.ret; } +#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ + PERF_FLAG_FD_OUTPUT |\ + PERF_FLAG_PID_CGROUP) + enum event_type_t { EVENT_FLEXIBLE = 0x1, EVENT_PINNED = 0x2, EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, }; -atomic_t perf_task_events __read_mostly; +/* + * perf_sched_events : >0 events exist + * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu + */ +atomic_t perf_sched_events __read_mostly; +static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); + static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; @@ -148,7 +158,11 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, enum event_type_t event_type); static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, - enum event_type_t event_type); + enum event_type_t event_type, + struct task_struct *task); + +static void update_context_time(struct perf_event_context *ctx); +static u64 perf_event_time(struct perf_event *event); void __weak perf_event_print_debug(void) { } @@ -162,6 +176,338 @@ static inline u64 perf_clock(void) return local_clock(); } +static inline struct perf_cpu_context * +__get_cpu_context(struct perf_event_context *ctx) +{ + return this_cpu_ptr(ctx->pmu->pmu_cpu_context); +} + +#ifdef CONFIG_CGROUP_PERF + +static inline struct perf_cgroup * +perf_cgroup_from_task(struct task_struct *task) +{ + return container_of(task_subsys_state(task, perf_subsys_id), + struct perf_cgroup, css); +} + +static inline bool +perf_cgroup_match(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + + return !event->cgrp || event->cgrp == cpuctx->cgrp; +} + +static inline void perf_get_cgroup(struct perf_event *event) +{ + css_get(&event->cgrp->css); +} + +static inline void perf_put_cgroup(struct perf_event *event) +{ + css_put(&event->cgrp->css); +} + +static inline void perf_detach_cgroup(struct perf_event *event) +{ + perf_put_cgroup(event); + event->cgrp = NULL; +} + +static inline int is_cgroup_event(struct perf_event *event) +{ + return event->cgrp != NULL; +} + +static inline u64 perf_cgroup_event_time(struct perf_event *event) +{ + struct perf_cgroup_info *t; + + t = per_cpu_ptr(event->cgrp->info, event->cpu); + return t->time; +} + +static inline void __update_cgrp_time(struct perf_cgroup *cgrp) +{ + struct perf_cgroup_info *info; + u64 now; + + now = perf_clock(); + + info = this_cpu_ptr(cgrp->info); + + info->time += now - info->timestamp; + info->timestamp = now; +} + +static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) +{ + struct perf_cgroup *cgrp_out = cpuctx->cgrp; + if (cgrp_out) + __update_cgrp_time(cgrp_out); +} + +static inline void update_cgrp_time_from_event(struct perf_event *event) +{ + struct perf_cgroup *cgrp = perf_cgroup_from_task(current); + /* + * do not update time when cgroup is not active + */ + if (!event->cgrp || cgrp != event->cgrp) + return; + + __update_cgrp_time(event->cgrp); +} + +static inline void +perf_cgroup_set_timestamp(struct task_struct *task, u64 now) +{ + struct perf_cgroup *cgrp; + struct perf_cgroup_info *info; + + if (!task) + return; + + cgrp = perf_cgroup_from_task(task); + info = this_cpu_ptr(cgrp->info); + info->timestamp = now; +} + +#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */ +#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */ + +/* + * reschedule events based on the cgroup constraint of task. + * + * mode SWOUT : schedule out everything + * mode SWIN : schedule in based on cgroup for next + */ +void perf_cgroup_switch(struct task_struct *task, int mode) +{ + struct perf_cpu_context *cpuctx; + struct pmu *pmu; + unsigned long flags; + + /* + * disable interrupts to avoid geting nr_cgroup + * changes via __perf_event_disable(). Also + * avoids preemption. + */ + local_irq_save(flags); + + /* + * we reschedule only in the presence of cgroup + * constrained events. + */ + rcu_read_lock(); + + list_for_each_entry_rcu(pmu, &pmus, entry) { + + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + + perf_pmu_disable(cpuctx->ctx.pmu); + + /* + * perf_cgroup_events says at least one + * context on this CPU has cgroup events. + * + * ctx->nr_cgroups reports the number of cgroup + * events for a context. + */ + if (cpuctx->ctx.nr_cgroups > 0) { + + if (mode & PERF_CGROUP_SWOUT) { + cpu_ctx_sched_out(cpuctx, EVENT_ALL); + /* + * must not be done before ctxswout due + * to event_filter_match() in event_sched_out() + */ + cpuctx->cgrp = NULL; + } + + if (mode & PERF_CGROUP_SWIN) { + /* set cgrp before ctxsw in to + * allow event_filter_match() to not + * have to pass task around + */ + cpuctx->cgrp = perf_cgroup_from_task(task); + cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); + } + } + + perf_pmu_enable(cpuctx->ctx.pmu); + } + + rcu_read_unlock(); + + local_irq_restore(flags); +} + +static inline void perf_cgroup_sched_out(struct task_struct *task) +{ + perf_cgroup_switch(task, PERF_CGROUP_SWOUT); +} + +static inline void perf_cgroup_sched_in(struct task_struct *task) +{ + perf_cgroup_switch(task, PERF_CGROUP_SWIN); +} + +static inline int perf_cgroup_connect(int fd, struct perf_event *event, + struct perf_event_attr *attr, + struct perf_event *group_leader) +{ + struct perf_cgroup *cgrp; + struct cgroup_subsys_state *css; + struct file *file; + int ret = 0, fput_needed; + + file = fget_light(fd, &fput_needed); + if (!file) + return -EBADF; + + css = cgroup_css_from_dir(file, perf_subsys_id); + if (IS_ERR(css)) + return PTR_ERR(css); + + cgrp = container_of(css, struct perf_cgroup, css); + event->cgrp = cgrp; + + /* + * all events in a group must monitor + * the same cgroup because a task belongs + * to only one perf cgroup at a time + */ + if (group_leader && group_leader->cgrp != cgrp) { + perf_detach_cgroup(event); + ret = -EINVAL; + } else { + /* must be done before we fput() the file */ + perf_get_cgroup(event); + } + fput_light(file, fput_needed); + return ret; +} + +static inline void +perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) +{ + struct perf_cgroup_info *t; + t = per_cpu_ptr(event->cgrp->info, event->cpu); + event->shadow_ctx_time = now - t->timestamp; +} + +static inline void +perf_cgroup_defer_enabled(struct perf_event *event) +{ + /* + * when the current task's perf cgroup does not match + * the event's, we need to remember to call the + * perf_mark_enable() function the first time a task with + * a matching perf cgroup is scheduled in. + */ + if (is_cgroup_event(event) && !perf_cgroup_match(event)) + event->cgrp_defer_enabled = 1; +} + +static inline void +perf_cgroup_mark_enabled(struct perf_event *event, + struct perf_event_context *ctx) +{ + struct perf_event *sub; + u64 tstamp = perf_event_time(event); + + if (!event->cgrp_defer_enabled) + return; + + event->cgrp_defer_enabled = 0; + + event->tstamp_enabled = tstamp - event->total_time_enabled; + list_for_each_entry(sub, &event->sibling_list, group_entry) { + if (sub->state >= PERF_EVENT_STATE_INACTIVE) { + sub->tstamp_enabled = tstamp - sub->total_time_enabled; + sub->cgrp_defer_enabled = 0; + } + } +} +#else /* !CONFIG_CGROUP_PERF */ + +static inline bool +perf_cgroup_match(struct perf_event *event) +{ + return true; +} + +static inline void perf_detach_cgroup(struct perf_event *event) +{} + +static inline int is_cgroup_event(struct perf_event *event) +{ + return 0; +} + +static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event) +{ + return 0; +} + +static inline void update_cgrp_time_from_event(struct perf_event *event) +{ +} + +static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) +{ +} + +static inline void perf_cgroup_sched_out(struct task_struct *task) +{ +} + +static inline void perf_cgroup_sched_in(struct task_struct *task) +{ +} + +static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, + struct perf_event_attr *attr, + struct perf_event *group_leader) +{ + return -EINVAL; +} + +static inline void +perf_cgroup_set_timestamp(struct task_struct *task, u64 now) +{ +} + +void +perf_cgroup_switch(struct task_struct *task, struct task_struct *next) +{ +} + +static inline void +perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) +{ +} + +static inline u64 perf_cgroup_event_time(struct perf_event *event) +{ + return 0; +} + +static inline void +perf_cgroup_defer_enabled(struct perf_event *event) +{ +} + +static inline void +perf_cgroup_mark_enabled(struct perf_event *event, + struct perf_event_context *ctx) +{ +} +#endif + void perf_pmu_disable(struct pmu *pmu) { int *count = this_cpu_ptr(pmu->pmu_disable_count); @@ -343,6 +689,10 @@ static void update_context_time(struct perf_event_context *ctx) static u64 perf_event_time(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; + + if (is_cgroup_event(event)) + return perf_cgroup_event_time(event); + return ctx ? ctx->time : 0; } @@ -357,9 +707,20 @@ static void update_event_times(struct perf_event *event) if (event->state < PERF_EVENT_STATE_INACTIVE || event->group_leader->state < PERF_EVENT_STATE_INACTIVE) return; - - if (ctx->is_active) + /* + * in cgroup mode, time_enabled represents + * the time the event was enabled AND active + * tasks were in the monitored cgroup. This is + * independent of the activity of the context as + * there may be a mix of cgroup and non-cgroup events. + * + * That is why we treat cgroup events differently + * here. + */ + if (is_cgroup_event(event)) run_end = perf_event_time(event); + else if (ctx->is_active) + run_end = ctx->time; else run_end = event->tstamp_stopped; @@ -371,6 +732,7 @@ static void update_event_times(struct perf_event *event) run_end = perf_event_time(event); event->total_time_running = run_end - event->tstamp_running; + } /* @@ -419,6 +781,17 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) list_add_tail(&event->group_entry, list); } + if (is_cgroup_event(event)) { + ctx->nr_cgroups++; + /* + * one more event: + * - that has cgroup constraint on event->cpu + * - that may need work on context switch + */ + atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); + jump_label_inc(&perf_sched_events); + } + list_add_rcu(&event->event_entry, &ctx->event_list); if (!ctx->nr_events) perf_pmu_rotate_start(ctx->pmu); @@ -545,6 +918,12 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) event->attach_state &= ~PERF_ATTACH_CONTEXT; + if (is_cgroup_event(event)) { + ctx->nr_cgroups--; + atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); + jump_label_dec(&perf_sched_events); + } + ctx->nr_events--; if (event->attr.inherit_stat) ctx->nr_stat--; @@ -616,7 +995,8 @@ out: static inline int event_filter_match(struct perf_event *event) { - return event->cpu == -1 || event->cpu == smp_processor_id(); + return (event->cpu == -1 || event->cpu == smp_processor_id()) + && perf_cgroup_match(event); } static void @@ -634,7 +1014,7 @@ event_sched_out(struct perf_event *event, */ if (event->state == PERF_EVENT_STATE_INACTIVE && !event_filter_match(event)) { - delta = ctx->time - event->tstamp_stopped; + delta = tstamp - event->tstamp_stopped; event->tstamp_running += delta; event->tstamp_stopped = tstamp; } @@ -678,12 +1058,6 @@ group_sched_out(struct perf_event *group_event, cpuctx->exclusive = 0; } -static inline struct perf_cpu_context * -__get_cpu_context(struct perf_event_context *ctx) -{ - return this_cpu_ptr(ctx->pmu->pmu_cpu_context); -} - /* * Cross CPU call to remove a performance event * @@ -783,6 +1157,7 @@ static int __perf_event_disable(void *info) */ if (event->state >= PERF_EVENT_STATE_INACTIVE) { update_context_time(ctx); + update_cgrp_time_from_event(event); update_group_times(event); if (event == event->group_leader) group_sched_out(event, cpuctx, ctx); @@ -851,6 +1226,41 @@ retry: raw_spin_unlock_irq(&ctx->lock); } +static void perf_set_shadow_time(struct perf_event *event, + struct perf_event_context *ctx, + u64 tstamp) +{ + /* + * use the correct time source for the time snapshot + * + * We could get by without this by leveraging the + * fact that to get to this function, the caller + * has most likely already called update_context_time() + * and update_cgrp_time_xx() and thus both timestamp + * are identical (or very close). Given that tstamp is, + * already adjusted for cgroup, we could say that: + * tstamp - ctx->timestamp + * is equivalent to + * tstamp - cgrp->timestamp. + * + * Then, in perf_output_read(), the calculation would + * work with no changes because: + * - event is guaranteed scheduled in + * - no scheduled out in between + * - thus the timestamp would be the same + * + * But this is a bit hairy. + * + * So instead, we have an explicit cgroup call to remain + * within the time time source all along. We believe it + * is cleaner and simpler to understand. + */ + if (is_cgroup_event(event)) + perf_cgroup_set_shadow_time(event, tstamp); + else + event->shadow_ctx_time = tstamp - ctx->timestamp; +} + #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_event *event, int enable); @@ -891,7 +1301,7 @@ event_sched_in(struct perf_event *event, event->tstamp_running += tstamp - event->tstamp_stopped; - event->shadow_ctx_time = tstamp - ctx->timestamp; + perf_set_shadow_time(event, ctx, tstamp); if (!is_software_event(event)) cpuctx->active_oncpu++; @@ -1012,7 +1422,8 @@ static void add_event_to_ctx(struct perf_event *event, event->tstamp_stopped = tstamp; } -static void perf_event_context_sched_in(struct perf_event_context *ctx); +static void perf_event_context_sched_in(struct perf_event_context *ctx, + struct task_struct *tsk); /* * Cross CPU call to install and enable a performance event @@ -1033,11 +1444,17 @@ static int __perf_install_in_context(void *info) * which do context switches with IRQs enabled. */ if (ctx->task && !cpuctx->task_ctx) - perf_event_context_sched_in(ctx); + perf_event_context_sched_in(ctx, ctx->task); raw_spin_lock(&ctx->lock); ctx->is_active = 1; update_context_time(ctx); + /* + * update cgrp time only if current cgrp + * matches event->cgrp. Must be done before + * calling add_event_to_ctx() + */ + update_cgrp_time_from_event(event); add_event_to_ctx(event, ctx); @@ -1175,10 +1592,19 @@ static int __perf_event_enable(void *info) if (event->state >= PERF_EVENT_STATE_INACTIVE) goto unlock; + + /* + * set current task's cgroup time reference point + */ + perf_cgroup_set_timestamp(current, perf_clock()); + __perf_event_mark_enabled(event, ctx); - if (!event_filter_match(event)) + if (!event_filter_match(event)) { + if (is_cgroup_event(event)) + perf_cgroup_defer_enabled(event); goto unlock; + } /* * If the event is in a group and isn't the group leader, @@ -1307,6 +1733,7 @@ static void ctx_sched_out(struct perf_event_context *ctx, if (likely(!ctx->nr_events)) goto out; update_context_time(ctx); + update_cgrp_time_from_cpuctx(cpuctx); if (!ctx->nr_active) goto out; @@ -1496,6 +1923,14 @@ void __perf_event_task_sched_out(struct task_struct *task, for_each_task_context_nr(ctxn) perf_event_context_sched_out(task, ctxn, next); + + /* + * if cgroup events exist on this CPU, then we need + * to check if we have to switch out PMU state. + * cgroup event are system-wide mode only + */ + if (atomic_read(&__get_cpu_var(perf_cgroup_events))) + perf_cgroup_sched_out(task); } static void task_ctx_sched_out(struct perf_event_context *ctx, @@ -1534,6 +1969,10 @@ ctx_pinned_sched_in(struct perf_event_context *ctx, if (!event_filter_match(event)) continue; + /* may need to reset tstamp_enabled */ + if (is_cgroup_event(event)) + perf_cgroup_mark_enabled(event, ctx); + if (group_can_go_on(event, cpuctx, 1)) group_sched_in(event, cpuctx, ctx); @@ -1566,6 +2005,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, if (!event_filter_match(event)) continue; + /* may need to reset tstamp_enabled */ + if (is_cgroup_event(event)) + perf_cgroup_mark_enabled(event, ctx); + if (group_can_go_on(event, cpuctx, can_add_hw)) { if (group_sched_in(event, cpuctx, ctx)) can_add_hw = 0; @@ -1576,15 +2019,19 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, static void ctx_sched_in(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, - enum event_type_t event_type) + enum event_type_t event_type, + struct task_struct *task) { + u64 now; + raw_spin_lock(&ctx->lock); ctx->is_active = 1; if (likely(!ctx->nr_events)) goto out; - ctx->timestamp = perf_clock(); - + now = perf_clock(); + ctx->timestamp = now; + perf_cgroup_set_timestamp(task, now); /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. @@ -1601,11 +2048,12 @@ out: } static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, - enum event_type_t event_type) + enum event_type_t event_type, + struct task_struct *task) { struct perf_event_context *ctx = &cpuctx->ctx; - ctx_sched_in(ctx, cpuctx, event_type); + ctx_sched_in(ctx, cpuctx, event_type, task); } static void task_ctx_sched_in(struct perf_event_context *ctx, @@ -1617,11 +2065,12 @@ static void task_ctx_sched_in(struct perf_event_context *ctx, if (cpuctx->task_ctx == ctx) return; - ctx_sched_in(ctx, cpuctx, event_type); + ctx_sched_in(ctx, cpuctx, event_type, NULL); cpuctx->task_ctx = ctx; } -static void perf_event_context_sched_in(struct perf_event_context *ctx) +static void perf_event_context_sched_in(struct perf_event_context *ctx, + struct task_struct *task) { struct perf_cpu_context *cpuctx; @@ -1637,9 +2086,9 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx) */ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - ctx_sched_in(ctx, cpuctx, EVENT_PINNED); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); - ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); + ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); + ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); cpuctx->task_ctx = ctx; @@ -1672,8 +2121,15 @@ void __perf_event_task_sched_in(struct task_struct *task) if (likely(!ctx)) continue; - perf_event_context_sched_in(ctx); + perf_event_context_sched_in(ctx, task); } + /* + * if cgroup events exist on this CPU, then we need + * to check if we have to switch in PMU state. + * cgroup event are system-wide mode only + */ + if (atomic_read(&__get_cpu_var(perf_cgroup_events))) + perf_cgroup_sched_in(task); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -1873,7 +2329,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) if (ctx) rotate_ctx(ctx); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current); if (ctx) task_ctx_sched_in(ctx, EVENT_FLEXIBLE); @@ -1952,7 +2408,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) raw_spin_unlock(&ctx->lock); - perf_event_context_sched_in(ctx); + perf_event_context_sched_in(ctx, ctx->task); out: local_irq_restore(flags); } @@ -1977,8 +2433,10 @@ static void __perf_event_read(void *info) return; raw_spin_lock(&ctx->lock); - if (ctx->is_active) + if (ctx->is_active) { update_context_time(ctx); + update_cgrp_time_from_event(event); + } update_event_times(event); if (event->state == PERF_EVENT_STATE_ACTIVE) event->pmu->read(event); @@ -2009,8 +2467,10 @@ static u64 perf_event_read(struct perf_event *event) * (e.g., thread is blocked), in that case * we cannot update context time */ - if (ctx->is_active) + if (ctx->is_active) { update_context_time(ctx); + update_cgrp_time_from_event(event); + } update_event_times(event); raw_spin_unlock_irqrestore(&ctx->lock, flags); } @@ -2395,7 +2855,7 @@ static void free_event(struct perf_event *event) if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_dec(&perf_task_events); + jump_label_dec(&perf_sched_events); if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); if (event->attr.comm) @@ -2411,6 +2871,9 @@ static void free_event(struct perf_event *event) event->buffer = NULL; } + if (is_cgroup_event(event)) + perf_detach_cgroup(event); + if (event->destroy) event->destroy(event); @@ -5300,6 +5763,7 @@ static void task_clock_event_read(struct perf_event *event) if (!in_nmi()) { update_context_time(event->ctx); + update_cgrp_time_from_event(event); time = event->ctx->time; } else { u64 now = perf_clock(); @@ -5725,7 +6189,7 @@ done: if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_inc(&perf_task_events); + jump_label_inc(&perf_sched_events); if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); if (event->attr.comm) @@ -5900,7 +6364,7 @@ SYSCALL_DEFINE5(perf_event_open, int err; /* for future expandability... */ - if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) + if (flags & ~PERF_FLAG_ALL) return -EINVAL; err = perf_copy_attr(attr_uptr, &attr); @@ -5917,6 +6381,15 @@ SYSCALL_DEFINE5(perf_event_open, return -EINVAL; } + /* + * In cgroup mode, the pid argument is used to pass the fd + * opened to the cgroup directory in cgroupfs. The cpu argument + * designates the cpu on which to monitor threads from that + * cgroup. + */ + if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1)) + return -EINVAL; + event_fd = get_unused_fd_flags(O_RDWR); if (event_fd < 0) return event_fd; @@ -5934,7 +6407,7 @@ SYSCALL_DEFINE5(perf_event_open, group_leader = NULL; } - if (pid != -1) { + if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) { task = find_lively_task_by_vpid(pid); if (IS_ERR(task)) { err = PTR_ERR(task); @@ -5948,6 +6421,12 @@ SYSCALL_DEFINE5(perf_event_open, goto err_task; } + if (flags & PERF_FLAG_PID_CGROUP) { + err = perf_cgroup_connect(pid, event, &attr, group_leader); + if (err) + goto err_alloc; + } + /* * Special case software events and allow them to be part of * any hardware group. @@ -6808,3 +7287,92 @@ unlock: return ret; } device_initcall(perf_event_sysfs_init); + +#ifdef CONFIG_CGROUP_PERF +static struct cgroup_subsys_state *perf_cgroup_create( + struct cgroup_subsys *ss, struct cgroup *cont) +{ + struct perf_cgroup *jc; + struct perf_cgroup_info *t; + int c; + + jc = kmalloc(sizeof(*jc), GFP_KERNEL); + if (!jc) + return ERR_PTR(-ENOMEM); + + memset(jc, 0, sizeof(*jc)); + + jc->info = alloc_percpu(struct perf_cgroup_info); + if (!jc->info) { + kfree(jc); + return ERR_PTR(-ENOMEM); + } + + for_each_possible_cpu(c) { + t = per_cpu_ptr(jc->info, c); + t->time = 0; + t->timestamp = 0; + } + return &jc->css; +} + +static void perf_cgroup_destroy(struct cgroup_subsys *ss, + struct cgroup *cont) +{ + struct perf_cgroup *jc; + jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), + struct perf_cgroup, css); + free_percpu(jc->info); + kfree(jc); +} + +static int __perf_cgroup_move(void *info) +{ + struct task_struct *task = info; + perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); + return 0; +} + +static void perf_cgroup_move(struct task_struct *task) +{ + task_function_call(task, __perf_cgroup_move, task); +} + +static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup *old_cgrp, struct task_struct *task, + bool threadgroup) +{ + perf_cgroup_move(task); + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &task->thread_group, thread_group) { + perf_cgroup_move(c); + } + rcu_read_unlock(); + } +} + +static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup *old_cgrp, struct task_struct *task) +{ + /* + * cgroup_exit() is called in the copy_process() failure path. + * Ignore this case since the task hasn't ran yet, this avoids + * trying to poke a half freed task state from generic code. + */ + if (!(task->flags & PF_EXITING)) + return; + + perf_cgroup_move(task); +} + +struct cgroup_subsys perf_subsys = { + .name = "perf_event", + .subsys_id = perf_subsys_id, + .create = perf_cgroup_create, + .destroy = perf_cgroup_destroy, + .exit = perf_cgroup_exit, + .attach = perf_cgroup_attach, +}; +#endif /* CONFIG_CGROUP_PERF */ -- cgit v0.10.2 From 023695d96ee06f36cf5014e286edcd623e9fb847 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 14 Feb 2011 11:20:01 +0200 Subject: perf tool: Add cgroup support This patch adds the ability to filter monitoring based on container groups (cgroups) for both perf stat and perf record. It is possible to monitor multiple cgroup in parallel. There is one cgroup per event. The cgroups to monitor are passed via a new -G option followed by a comma separated list of cgroup names. The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool finds the corresponding directory in the cgroup filesystem and opens it. It then passes that file descriptor to the kernel. Example: $ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1 Performance counter stats for 'sleep 1': 2,368,667,414 cycles test1 2,369,661,459 cycles cycles test2 1.001856890 seconds time elapsed Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e032716..5a520f8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -137,6 +137,17 @@ Do not update the builid cache. This saves some overhead in situations where the information in the perf.data file (which includes buildids) is sufficient. +-G name,...:: +--cgroup name,...:: +monitor only in the container (cgroup) called "name". This option is available only +in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to +container "name" are monitored when they run on the monitored CPUs. Multiple cgroups +can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup +to first event, second cgroup to second event and so on. It is possible to provide +an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have +corresponding events, i.e., they always refer to events defined earlier on the command +line. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index b6da7af..918cc38 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -83,6 +83,17 @@ This option is only valid in system-wide mode. print counts using a CSV-style output to make it easy to import directly into spreadsheets. Columns are separated by the string specified in SEP. +-G name:: +--cgroup name:: +monitor only in the container (cgroup) called "name". This option is available only +in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to +container "name" are monitored when they run on the monitored CPUs. Multiple cgroups +can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup +to first event, second cgroup to second event and so on. It is possible to provide +an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have +corresponding events, i.e., they always refer to events defined earlier on the command +line. + EXAMPLES -------- diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 94f73ab..bc4d9bf 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -442,6 +442,7 @@ LIB_H += util/pstack.h LIB_H += util/cpumap.h LIB_H += util/top.h LIB_H += $(ARCH_INCLUDE) +LIB_H += util/cgroup.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -496,6 +497,7 @@ LIB_OBJS += $(OUTPUT)util/probe-event.o LIB_OBJS += $(OUTPUT)util/util.o LIB_OBJS += $(OUTPUT)util/xyarray.o LIB_OBJS += $(OUTPUT)util/cpumap.o +LIB_OBJS += $(OUTPUT)util/cgroup.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 12e0e41..a4aaadc 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -807,6 +807,9 @@ const struct option record_options[] = { "do not update the buildid cache"), OPT_BOOLEAN('B', "no-buildid", &no_buildid, "do not collect buildids in perf.data"), + OPT_CALLBACK('G', "cgroup", &evsel_list, "name", + "monitor event in cgroup name only", + parse_cgroups), OPT_END() }; @@ -835,6 +838,12 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) write_mode = WRITE_FORCE; } + if (nr_cgroups && !system_wide) { + fprintf(stderr, "cgroup monitoring only available in" + " system-wide mode\n"); + usage_with_options(record_usage, record_options); + } + symbol__init(); if (no_buildid_cache || no_buildid) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 806a999..21c0252 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -390,6 +390,9 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); + if (evsel->cgrp) + fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name); + if (csv_output) return; @@ -420,6 +423,9 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel)); + if (evsel->cgrp) + fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name); + if (csv_output) return; @@ -460,9 +466,17 @@ static void print_counter_aggr(struct perf_evsel *counter) int scaled = counter->counts->scaled; if (scaled == -1) { - fprintf(stderr, "%*s%s%-24s\n", + fprintf(stderr, "%*s%s%*s", csv_output ? 0 : 18, - "", csv_sep, event_name(counter)); + "", + csv_sep, + csv_output ? 0 : -24, + event_name(counter)); + + if (counter->cgrp) + fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name); + + fputc('\n', stderr); return; } @@ -487,7 +501,6 @@ static void print_counter_aggr(struct perf_evsel *counter) fprintf(stderr, " (scaled from %.2f%%)", 100 * avg_running / avg_enabled); } - fprintf(stderr, "\n"); } @@ -505,14 +518,18 @@ static void print_counter(struct perf_evsel *counter) ena = counter->counts->cpu[cpu].ena; run = counter->counts->cpu[cpu].run; if (run == 0 || ena == 0) { - fprintf(stderr, "CPU%*d%s%*s%s%-24s", + fprintf(stderr, "CPU%*d%s%*s%s%*s", csv_output ? 0 : -4, evsel_list->cpus->map[cpu], csv_sep, csv_output ? 0 : 18, "", csv_sep, + csv_output ? 0 : -24, event_name(counter)); - fprintf(stderr, "\n"); + if (counter->cgrp) + fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name); + + fputc('\n', stderr); continue; } @@ -529,7 +546,7 @@ static void print_counter(struct perf_evsel *counter) 100.0 * run / ena); } } - fprintf(stderr, "\n"); + fputc('\n', stderr); } } @@ -642,6 +659,9 @@ static const struct option options[] = { "disable CPU count aggregation"), OPT_STRING('x', "field-separator", &csv_sep, "separator", "print counts with custom separator"), + OPT_CALLBACK('G', "cgroup", &evsel_list, "name", + "monitor event in cgroup name only", + parse_cgroups), OPT_END() }; @@ -682,9 +702,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) if (run_count <= 0) usage_with_options(stat_usage, options); - /* no_aggr is for system-wide only */ - if (no_aggr && !system_wide) + /* no_aggr, cgroup are for system-wide only */ + if ((no_aggr || nr_cgroups) && !system_wide) { + fprintf(stderr, "both cgroup and no-aggregation " + "modes only available in system-wide mode\n"); + usage_with_options(stat_usage, options); + } /* Set attrs and nr_counters if no event is selected and !null_run */ if (!null_run && !evsel_list->nr_entries) { diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c new file mode 100644 index 0000000..9fea755 --- /dev/null +++ b/tools/perf/util/cgroup.c @@ -0,0 +1,178 @@ +#include "util.h" +#include "../perf.h" +#include "parse-options.h" +#include "evsel.h" +#include "cgroup.h" +#include "debugfs.h" /* MAX_PATH, STR() */ +#include "evlist.h" + +int nr_cgroups; + +static int +cgroupfs_find_mountpoint(char *buf, size_t maxlen) +{ + FILE *fp; + char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1]; + char *token, *saved_ptr; + int found = 0; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return -1; + + /* + * in order to handle split hierarchy, we need to scan /proc/mounts + * and inspect every cgroupfs mount point to find one that has + * perf_event subsystem + */ + while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %" + STR(MAX_PATH)"s %*d %*d\n", + mountpoint, type, tokens) == 3) { + + if (!strcmp(type, "cgroup")) { + + token = strtok_r(tokens, ",", &saved_ptr); + + while (token != NULL) { + if (!strcmp(token, "perf_event")) { + found = 1; + break; + } + token = strtok_r(NULL, ",", &saved_ptr); + } + } + if (found) + break; + } + fclose(fp); + if (!found) + return -1; + + if (strlen(mountpoint) < maxlen) { + strcpy(buf, mountpoint); + return 0; + } + return -1; +} + +static int open_cgroup(char *name) +{ + char path[MAX_PATH+1]; + char mnt[MAX_PATH+1]; + int fd; + + + if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1)) + return -1; + + snprintf(path, MAX_PATH, "%s/%s", mnt, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + fprintf(stderr, "no access to cgroup %s\n", path); + + return fd; +} + +static int add_cgroup(struct perf_evlist *evlist, char *str) +{ + struct perf_evsel *counter; + struct cgroup_sel *cgrp = NULL; + int n; + /* + * check if cgrp is already defined, if so we reuse it + */ + list_for_each_entry(counter, &evlist->entries, node) { + cgrp = counter->cgrp; + if (!cgrp) + continue; + if (!strcmp(cgrp->name, str)) + break; + + cgrp = NULL; + } + + if (!cgrp) { + cgrp = zalloc(sizeof(*cgrp)); + if (!cgrp) + return -1; + + cgrp->name = str; + + cgrp->fd = open_cgroup(str); + if (cgrp->fd == -1) { + free(cgrp); + return -1; + } + } + + /* + * find corresponding event + * if add cgroup N, then need to find event N + */ + n = 0; + list_for_each_entry(counter, &evlist->entries, node) { + if (n == nr_cgroups) + goto found; + n++; + } + if (cgrp->refcnt == 0) + free(cgrp); + + return -1; +found: + cgrp->refcnt++; + counter->cgrp = cgrp; + return 0; +} + +void close_cgroup(struct cgroup_sel *cgrp) +{ + if (!cgrp) + return; + + /* XXX: not reentrant */ + if (--cgrp->refcnt == 0) { + close(cgrp->fd); + free(cgrp->name); + free(cgrp); + } +} + +int parse_cgroups(const struct option *opt __used, const char *str, + int unset __used) +{ + struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; + const char *p, *e, *eos = str + strlen(str); + char *s; + int ret; + + if (list_empty(&evlist->entries)) { + fprintf(stderr, "must define events before cgroups\n"); + return -1; + } + + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + /* allow empty cgroups, i.e., skip */ + if (e - str) { + /* termination added */ + s = strndup(str, e - str); + if (!s) + return -1; + ret = add_cgroup(evlist, s); + if (ret) { + free(s); + return -1; + } + } + /* nr_cgroups is increased een for empty cgroups */ + nr_cgroups++; + if (!p) + break; + str = p+1; + } + return 0; +} diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h new file mode 100644 index 0000000..89acd6d --- /dev/null +++ b/tools/perf/util/cgroup.h @@ -0,0 +1,17 @@ +#ifndef __CGROUP_H__ +#define __CGROUP_H__ + +struct option; + +struct cgroup_sel { + char *name; + int fd; + int refcnt; +}; + + +extern int nr_cgroups; /* number of explicit cgroups defined */ +extern void close_cgroup(struct cgroup_sel *cgrp); +extern int parse_cgroups(const struct option *opt, const char *str, int unset); + +#endif /* __CGROUP_H__ */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 211063e..c974e08 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -85,6 +85,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) void perf_evsel__delete(struct perf_evsel *evsel) { perf_evsel__exit(evsel); + close_cgroup(evsel->cgrp); free(evsel); } @@ -163,21 +164,32 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads, bool group, bool inherit) { int cpu, thread; + unsigned long flags = 0; + int pid = -1; if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) return -1; + if (evsel->cgrp) { + flags = PERF_FLAG_PID_CGROUP; + pid = evsel->cgrp->fd; + } + for (cpu = 0; cpu < cpus->nr; cpu++) { int group_fd = -1; evsel->attr.inherit = (cpus->map[cpu] < 0) && inherit; for (thread = 0; thread < threads->nr; thread++) { + + if (!evsel->cgrp) + pid = threads->map[thread]; + FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, - threads->map[thread], + pid, cpus->map[cpu], - group_fd, 0); + group_fd, flags); if (FD(evsel, cpu, thread) < 0) goto out_close; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index eecdc3a..1d3d5a3 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -6,6 +6,7 @@ #include "../../../include/linux/perf_event.h" #include "types.h" #include "xyarray.h" +#include "cgroup.h" struct perf_counts_values { union { @@ -45,6 +46,7 @@ struct perf_evsel { struct perf_counts *counts; int idx; void *priv; + struct cgroup_sel *cgrp; }; struct cpu_map; -- cgit v0.10.2 From d45dd923fcc620c948bd1eda16cc61426ac31646 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 2 Feb 2011 17:40:56 +0100 Subject: perf, x86: Use helper function in x86_pmu_enable_all() Use helper function in x86_pmu_enable_all() to minimize access to x86_pmu.eventsel in the fast path. The counter's msr address is now calculated using struct hw_perf_event. Later we add code that calculates the msr addresses with a table lookup which shouldn't be done in the fast path. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1296664860-10886-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4d98789..70d6d8f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -642,21 +642,24 @@ static void x86_pmu_disable(struct pmu *pmu) x86_pmu.disable_all(); } +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, + u64 enable_mask) +{ + wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); +} + static void x86_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_event *event = cpuc->events[idx]; - u64 val; + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; if (!test_bit(idx, cpuc->active_mask)) continue; - val = event->hw.config; - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu.eventsel + idx, val); + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } } @@ -915,12 +918,6 @@ static void x86_pmu_enable(struct pmu *pmu) x86_pmu.enable_all(added); } -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, - u64 enable_mask) -{ - wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); -} - static inline void x86_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; -- cgit v0.10.2 From 41bf498949a263fa0b2d32524b89d696ac330e94 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 2 Feb 2011 17:40:57 +0100 Subject: perf, x86: Calculate perfctr msr addresses in helper functions This patch adds helper functions to calculate perfctr msr addresses. We need this to later add support for AMD family 15h cpus. For this we have to change the algorithms to generate the perfctr's msr addresses. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1296664860-10886-3-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 70d6d8f..ee40c1ad 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -321,6 +321,16 @@ again: return new_raw_count; } +static inline unsigned int x86_pmu_config_addr(int index) +{ + return x86_pmu.eventsel + index; +} + +static inline unsigned int x86_pmu_event_addr(int index) +{ + return x86_pmu.perfctr + index; +} + static atomic_t active_events; static DEFINE_MUTEX(pmc_reserve_mutex); @@ -331,12 +341,12 @@ static bool reserve_pmc_hardware(void) int i; for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) + if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) goto perfctr_fail; } for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) + if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) goto eventsel_fail; } @@ -344,13 +354,13 @@ static bool reserve_pmc_hardware(void) eventsel_fail: for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu.eventsel + i); + release_evntsel_nmi(x86_pmu_config_addr(i)); i = x86_pmu.num_counters; perfctr_fail: for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu.perfctr + i); + release_perfctr_nmi(x86_pmu_event_addr(i)); return false; } @@ -360,8 +370,8 @@ static void release_pmc_hardware(void) int i; for (i = 0; i < x86_pmu.num_counters; i++) { - release_perfctr_nmi(x86_pmu.perfctr + i); - release_evntsel_nmi(x86_pmu.eventsel + i); + release_perfctr_nmi(x86_pmu_event_addr(i)); + release_evntsel_nmi(x86_pmu_config_addr(i)); } } @@ -382,7 +392,7 @@ static bool check_hw_exists(void) * complain and bail. */ for (i = 0; i < x86_pmu.num_counters; i++) { - reg = x86_pmu.eventsel + i; + reg = x86_pmu_config_addr(i); ret = rdmsrl_safe(reg, &val); if (ret) goto msr_fail; @@ -407,8 +417,8 @@ static bool check_hw_exists(void) * that don't trap on the MSR access and always return 0s. */ val = 0xabcdUL; - ret = checking_wrmsrl(x86_pmu.perfctr, val); - ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); + ret = checking_wrmsrl(x86_pmu_event_addr(0), val); + ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); if (ret || val != val_new) goto msr_fail; @@ -617,11 +627,11 @@ static void x86_pmu_disable_all(void) if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(x86_pmu.eventsel + idx, val); + rdmsrl(x86_pmu_config_addr(idx), val); if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) continue; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu.eventsel + idx, val); + wrmsrl(x86_pmu_config_addr(idx), val); } } @@ -1110,8 +1120,8 @@ void perf_event_print_debug(void) pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); - rdmsrl(x86_pmu.perfctr + idx, pmc_count); + rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); + rdmsrl(x86_pmu_event_addr(idx), pmc_count); prev_left = per_cpu(pmc_prev_left[idx], cpu); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 008835c..084b383 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -691,8 +691,8 @@ static void intel_pmu_reset(void) printk("clearing PMU state on CPU#%d\n", smp_processor_id()); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); - checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); + checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); + checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); } for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); -- cgit v0.10.2 From 69d8e1e8ac0a7d829f1c0fd5bd07eb3022d9a1a0 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 2 Feb 2011 17:40:58 +0100 Subject: perf, x86: Add new AMD family 15h msrs to perfctr reservation code This patch allows the reservation of perfctrs with new msr addresses introduced for AMD cpu family 15h (0xc0010200/0xc0010201, etc). Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1296664860-10886-4-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d5a2366..966512b 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) /* returns the bit offset of the performance counter register */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: + if (msr >= MSR_F15H_PERF_CTR) + return (msr - MSR_F15H_PERF_CTR) >> 1; return msr - MSR_K7_PERFCTR0; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) @@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) /* returns the bit offset of the event selection register */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: + if (msr >= MSR_F15H_PERF_CTL) + return (msr - MSR_F15H_PERF_CTL) >> 1; return msr - MSR_K7_EVNTSEL0; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) -- cgit v0.10.2 From 73d6e52206a20354738418625cedc244cbfd5023 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 2 Feb 2011 17:40:59 +0100 Subject: perf, x86: Store perfctr msr addresses in config_base/event_base Instead of storing the base addresses we can store the counter's msr addresses directly in config_base/event_base of struct hw_perf_event. This avoids recalculating the address with each msr access. The addresses are configured one time. We also need this change to later modify the address calculation. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1296664860-10886-5-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ee40c1ad..3161943 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -298,7 +298,7 @@ x86_perf_event_update(struct perf_event *event) */ again: prev_raw_count = local64_read(&hwc->prev_count); - rdmsrl(hwc->event_base + idx, new_raw_count); + rdmsrl(hwc->event_base, new_raw_count); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -655,7 +655,7 @@ static void x86_pmu_disable(struct pmu *pmu) static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { - wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); + wrmsrl(hwc->config_base, hwc->config | enable_mask); } static void x86_pmu_enable_all(int added) @@ -834,15 +834,10 @@ static inline void x86_assign_hw_event(struct perf_event *event, hwc->event_base = 0; } else if (hwc->idx >= X86_PMC_IDX_FIXED) { hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that event_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->event_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0; } else { - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu_config_addr(hwc->idx); + hwc->event_base = x86_pmu_event_addr(hwc->idx); } } @@ -932,7 +927,7 @@ static inline void x86_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base + hwc->idx, hwc->config); + wrmsrl(hwc->config_base, hwc->config); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -985,7 +980,7 @@ x86_perf_event_set_period(struct perf_event *event) */ local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* * Due to erratum on certan cpu we need @@ -993,7 +988,7 @@ x86_perf_event_set_period(struct perf_event *event) * is updated properly */ if (x86_pmu.perfctr_second_write) { - wrmsrl(hwc->event_base + idx, + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); } diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ff751a9..3769ac82 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -764,9 +764,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) u64 v; /* an official way for overflow indication */ - rdmsrl(hwc->config_base + hwc->idx, v); + rdmsrl(hwc->config_base, v); if (v & P4_CCCR_OVF) { - wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); + wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); return 1; } @@ -815,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) * state we need to clear P4_CCCR_OVF, otherwise interrupt get * asserted again and again */ - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (void)checking_wrmsrl(hwc->config_base, (u64)(p4_config_unpack_cccr(hwc->config)) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); } @@ -885,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event) p4_pmu_enable_pebs(hwc->config); (void)checking_wrmsrl(escr_addr, escr_conf); - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (void)checking_wrmsrl(hwc->config_base, (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 34ba07b..20c097e 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event) if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); + (void)checking_wrmsrl(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) @@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event) if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); + (void)checking_wrmsrl(hwc->config_base, val); } static __initconst const struct x86_pmu p6_pmu = { -- cgit v0.10.2 From 4979d2729af22f6ce8faa325fc60a85a2c2daa02 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 2 Feb 2011 17:36:12 +0100 Subject: perf, x86: Add support for AMD family 15h core counters This patch adds support for AMD family 15h core counters. There are major changes compared to family 10h. First, there is a new perfctr msr range for up to 6 counters. Northbridge counters are separate now. This patch only adds support for core counters. Second, certain events may only be scheduled on certain counters. For this we need to extend the event scheduling and constraints. We use cpu feature flags to calculate family 15h msr address offsets. This way we later can implement a faster ALTERNATIVE() version for this. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <20110215135210.GB5874@erda.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 220e2ea..91f3e087 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -160,6 +160,7 @@ #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) +#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3161943..10bfe24 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -321,14 +321,22 @@ again: return new_raw_count; } +/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ +static inline int x86_pmu_addr_offset(int index) +{ + if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) + return index << 1; + return index; +} + static inline unsigned int x86_pmu_config_addr(int index) { - return x86_pmu.eventsel + index; + return x86_pmu.eventsel + x86_pmu_addr_offset(index); } static inline unsigned int x86_pmu_event_addr(int index) { - return x86_pmu.perfctr + index; + return x86_pmu.perfctr + x86_pmu_addr_offset(index); } static atomic_t active_events; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67e2202..461f62b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event) /* * AMD64 events are detected based on their event codes. */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + static inline int amd_is_nb_event(struct hw_perf_event *hwc) { return (hwc->config & 0xe0) == 0xe0; @@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = { .cpu_dead = amd_pmu_cpu_dead, }; +/* AMD Family 15h */ + +#define AMD_EVENT_TYPE_MASK 0x000000F0ULL + +#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL +#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL +#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL +#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL +#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL +#define AMD_EVENT_EX_LS 0x000000C0ULL +#define AMD_EVENT_DE 0x000000D0ULL +#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL + +/* + * AMD family 15h event code/PMC mappings: + * + * type = event_code & 0x0F0: + * + * 0x000 FP PERF_CTL[5:3] + * 0x010 FP PERF_CTL[5:3] + * 0x020 LS PERF_CTL[5:0] + * 0x030 LS PERF_CTL[5:0] + * 0x040 DC PERF_CTL[5:0] + * 0x050 DC PERF_CTL[5:0] + * 0x060 CU PERF_CTL[2:0] + * 0x070 CU PERF_CTL[2:0] + * 0x080 IC/DE PERF_CTL[2:0] + * 0x090 IC/DE PERF_CTL[2:0] + * 0x0A0 --- + * 0x0B0 --- + * 0x0C0 EX/LS PERF_CTL[5:0] + * 0x0D0 DE PERF_CTL[2:0] + * 0x0E0 NB NB_PERF_CTL[3:0] + * 0x0F0 NB NB_PERF_CTL[3:0] + * + * Exceptions: + * + * 0x003 FP PERF_CTL[3] + * 0x00B FP PERF_CTL[3] + * 0x00D FP PERF_CTL[3] + * 0x023 DE PERF_CTL[2:0] + * 0x02D LS PERF_CTL[3] + * 0x02E LS PERF_CTL[3,0] + * 0x043 CU PERF_CTL[2:0] + * 0x045 CU PERF_CTL[2:0] + * 0x046 CU PERF_CTL[2:0] + * 0x054 CU PERF_CTL[2:0] + * 0x055 CU PERF_CTL[2:0] + * 0x08F IC PERF_CTL[0] + * 0x187 DE PERF_CTL[0] + * 0x188 DE PERF_CTL[0] + * 0x0DB EX PERF_CTL[5:0] + * 0x0DC LS PERF_CTL[5:0] + * 0x0DD LS PERF_CTL[5:0] + * 0x0DE LS PERF_CTL[5:0] + * 0x0DF LS PERF_CTL[5:0] + * 0x1D6 EX PERF_CTL[5:0] + * 0x1D8 EX PERF_CTL[5:0] + */ + +static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); +static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); +static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); +static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); +static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); +static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); + +static struct event_constraint * +amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + unsigned int event_code = amd_get_event_code(&event->hw); + + switch (event_code & AMD_EVENT_TYPE_MASK) { + case AMD_EVENT_FP: + switch (event_code) { + case 0x003: + case 0x00B: + case 0x00D: + return &amd_f15_PMC3; + default: + return &amd_f15_PMC53; + } + case AMD_EVENT_LS: + case AMD_EVENT_DC: + case AMD_EVENT_EX_LS: + switch (event_code) { + case 0x023: + case 0x043: + case 0x045: + case 0x046: + case 0x054: + case 0x055: + return &amd_f15_PMC20; + case 0x02D: + return &amd_f15_PMC3; + case 0x02E: + return &amd_f15_PMC30; + default: + return &amd_f15_PMC50; + } + case AMD_EVENT_CU: + case AMD_EVENT_IC_DE: + case AMD_EVENT_DE: + switch (event_code) { + case 0x08F: + case 0x187: + case 0x188: + return &amd_f15_PMC0; + case 0x0DB ... 0x0DF: + case 0x1D6: + case 0x1D8: + return &amd_f15_PMC50; + default: + return &amd_f15_PMC20; + } + case AMD_EVENT_NB: + /* not yet implemented */ + return &emptyconstraint; + default: + return &emptyconstraint; + } +} + +static __initconst const struct x86_pmu amd_pmu_f15h = { + .name = "AMD Family 15h", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, + .hw_config = amd_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_F15H_PERF_CTL, + .perfctr = MSR_F15H_PERF_CTR, + .event_map = amd_pmu_event_map, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_counters = 6, + .cntval_bits = 48, + .cntval_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, + .get_event_constraints = amd_get_event_constraints_f15h, + /* nortbridge counters not yet implemented: */ +#if 0 + .put_event_constraints = amd_put_event_constraints, + + .cpu_prepare = amd_pmu_cpu_prepare, + .cpu_starting = amd_pmu_cpu_starting, + .cpu_dead = amd_pmu_cpu_dead, +#endif +}; + static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) return -ENODEV; - x86_pmu = amd_pmu; + /* + * If core performance counter extensions exists, it must be + * family 15h, otherwise fail. See x86_pmu_addr_offset(). + */ + switch (boot_cpu_data.x86) { + case 0x15: + if (!cpu_has_perfctr_core) + return -ENODEV; + x86_pmu = amd_pmu_f15h; + break; + default: + if (cpu_has_perfctr_core) + return -ENODEV; + x86_pmu = amd_pmu; + break; + } /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, -- cgit v0.10.2 From 163ec4354a5135c6c38c3f4a9b46a31900ebdf48 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Feb 2011 11:22:34 +0100 Subject: perf: Optimize throttling code By pre-computing the maximum number of samples per tick we can avoid a multiplication and a conditional since MAX_INTERRUPTS > max_samples_per_tick. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 38c8b25..8ceb5a6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1110,6 +1110,10 @@ extern int sysctl_perf_event_paranoid; extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; +extern int perf_proc_update_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + static inline bool perf_paranoid_tracepoint_raw(void) { return sysctl_perf_event_paranoid > -1; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 65dcdc7..e03be08 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -150,7 +150,24 @@ int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ /* * max perf event sample rate */ -int sysctl_perf_event_sample_rate __read_mostly = 100000; +#define DEFAULT_MAX_SAMPLE_RATE 100000 +int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE; +static int max_samples_per_tick __read_mostly = + DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); + +int perf_proc_update_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (ret || !write) + return ret; + + max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); + + return 0; +} static atomic64_t perf_event_id; @@ -4941,26 +4958,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, if (unlikely(!is_sampling_event(event))) return 0; - if (!throttle) { - hwc->interrupts++; - } else { - if (hwc->interrupts != MAX_INTERRUPTS) { - hwc->interrupts++; - if (HZ * hwc->interrupts > - (u64)sysctl_perf_event_sample_rate) { - hwc->interrupts = MAX_INTERRUPTS; - perf_log_throttle(event, 0); - ret = 1; - } - } else { - /* - * Keep re-disabling events even though on the previous - * pass we disabled it - just in case we raced with a - * sched-in and the event got enabled again: - */ + if (unlikely(hwc->interrupts >= max_samples_per_tick)) { + if (throttle) { + hwc->interrupts = MAX_INTERRUPTS; + perf_log_throttle(event, 0); ret = 1; } - } + } else + hwc->interrupts++; if (event->attr.freq) { u64 now = perf_clock(); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0f1bd83..daef911 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -948,7 +948,7 @@ static struct ctl_table kern_table[] = { .data = &sysctl_perf_event_sample_rate, .maxlen = sizeof(sysctl_perf_event_sample_rate), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = perf_proc_update_handler, }, #endif #ifdef CONFIG_KMEMCHECK -- cgit v0.10.2 From ba3dd36c6775264ee6e7354ba1aabcd6e86d7298 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Feb 2011 12:41:46 +0100 Subject: perf: Optimize hrtimer events There is no need to re-initialize the hrtimer every time we start it, so don't do that (shaves a few cycles). Also, since we know hrtimers run at a fixed rate (nanoseconds) we can pre-compute the desired frequency at which they tick. This avoids us having to go through the whole adaptive frequency feedback logic (shaves another few cycles). Signed-off-by: Peter Zijlstra LKML-Reference: <1297448589.5226.47.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index e03be08..a0a6987 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5602,6 +5602,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) u64 period; event = container_of(hrtimer, struct perf_event, hw.hrtimer); + + if (event->state != PERF_EVENT_STATE_ACTIVE) + return HRTIMER_NORESTART; + event->pmu->read(event); perf_sample_data_init(&data, 0); @@ -5628,9 +5632,6 @@ static void perf_swevent_start_hrtimer(struct perf_event *event) if (!is_sampling_event(event)) return; - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swevent_hrtimer; - period = local64_read(&hwc->period_left); if (period) { if (period < 0) @@ -5657,6 +5658,30 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) } } +static void perf_swevent_init_hrtimer(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!is_sampling_event(event)) + return; + + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + + /* + * Since hrtimers have a fixed rate, we can do a static freq->period + * mapping and avoid the whole period adjust feedback stuff. + */ + if (event->attr.freq) { + long freq = event->attr.sample_freq; + + event->attr.sample_period = NSEC_PER_SEC / freq; + hwc->sample_period = event->attr.sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + event->attr.freq = 0; + } +} + /* * Software event: cpu wall time clock */ @@ -5709,6 +5734,8 @@ static int cpu_clock_event_init(struct perf_event *event) if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) return -ENOENT; + perf_swevent_init_hrtimer(event); + return 0; } @@ -5787,6 +5814,8 @@ static int task_clock_event_init(struct perf_event *event) if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) return -ENOENT; + perf_swevent_init_hrtimer(event); + return 0; } -- cgit v0.10.2 From 5c35d69fb60b1dc49595f5b9a2c7158283e9eaf3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Feb 2011 11:38:43 -0200 Subject: perf ui: Serialize screen updates The ui operations so far were used by just one thread, but 'perf top --tui' now has two threads updating the screen, so we need to use a mutex to avoid garbling the screen. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index bc4d9bf..ffd1047 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -637,6 +637,7 @@ else LIB_H += util/ui/libslang.h LIB_H += util/ui/progress.h LIB_H += util/ui/util.h + LIB_H += util/ui/ui.h endif endif diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c index 8bc010e..60d6c81 100644 --- a/tools/perf/util/ui/browser.c +++ b/tools/perf/util/ui/browser.c @@ -1,4 +1,5 @@ #include "libslang.h" +#include "ui.h" #include #include #include @@ -178,6 +179,7 @@ int ui_browser__show(struct ui_browser *self, const char *title, if (self->sb == NULL) return -1; + pthread_mutex_lock(&ui__lock); SLsmg_gotorc(0, 0); ui_browser__set_color(self, NEWT_COLORSET_ROOT); slsmg_write_nstring(title, self->width); @@ -188,25 +190,30 @@ int ui_browser__show(struct ui_browser *self, const char *title, va_start(ap, helpline); ui_helpline__vpush(helpline, ap); va_end(ap); + pthread_mutex_unlock(&ui__lock); return 0; } void ui_browser__hide(struct ui_browser *self) { + pthread_mutex_lock(&ui__lock); newtFormDestroy(self->form); self->form = NULL; ui_helpline__pop(); + pthread_mutex_unlock(&ui__lock); } int ui_browser__refresh(struct ui_browser *self) { int row; + pthread_mutex_lock(&ui__lock); newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); row = self->refresh(self); ui_browser__set_color(self, HE_COLORSET_NORMAL); SLsmg_fill_region(self->y + row, self->x, self->height - row, self->width, ' '); + pthread_mutex_unlock(&ui__lock); return 0; } diff --git a/tools/perf/util/ui/helpline.c b/tools/perf/util/ui/helpline.c index 8d79daa..f36d2ff5 100644 --- a/tools/perf/util/ui/helpline.c +++ b/tools/perf/util/ui/helpline.c @@ -5,6 +5,7 @@ #include "../debug.h" #include "helpline.h" +#include "ui.h" void ui_helpline__pop(void) { @@ -55,7 +56,8 @@ int ui_helpline__show_help(const char *format, va_list ap) int ret; static int backlog; - ret = vsnprintf(ui_helpline__last_msg + backlog, + pthread_mutex_lock(&ui__lock); + ret = vsnprintf(ui_helpline__last_msg + backlog, sizeof(ui_helpline__last_msg) - backlog, format, ap); backlog += ret; @@ -64,6 +66,7 @@ int ui_helpline__show_help(const char *format, va_list ap) newtRefresh(); backlog = 0; } + pthread_mutex_unlock(&ui__lock); return ret; } diff --git a/tools/perf/util/ui/setup.c b/tools/perf/util/ui/setup.c index fbf1a14..ee46d67 100644 --- a/tools/perf/util/ui/setup.c +++ b/tools/perf/util/ui/setup.c @@ -6,6 +6,9 @@ #include "../debug.h" #include "browser.h" #include "helpline.h" +#include "ui.h" + +pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; static void newt_suspend(void *d __used) { diff --git a/tools/perf/util/ui/ui.h b/tools/perf/util/ui/ui.h new file mode 100644 index 0000000..d264e05 --- /dev/null +++ b/tools/perf/util/ui/ui.h @@ -0,0 +1,8 @@ +#ifndef _PERF_UI_H_ +#define _PERF_UI_H_ 1 + +#include + +extern pthread_mutex_t ui__lock; + +#endif /* _PERF_UI_H_ */ -- cgit v0.10.2 From 289c082044643e55f65c6a16bb3621cf3f35a454 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Feb 2011 13:56:28 -0200 Subject: perf annotate: Check if offset is less than symbol size Just like done on symbol__inc_addr_samples to catch misparsed offsets from objdump. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 02976b8..70ec422 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -541,11 +541,12 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evidx); struct objdump_line *pos; + int len = sym->end - sym->start; h->sum = 0; list_for_each_entry(pos, ¬es->src->source, node) { - if (pos->offset != -1) { + if (pos->offset != -1 && pos->offset < len) { h->addr[pos->offset] = h->addr[pos->offset] * 7 / 8; h->sum += h->addr[pos->offset]; } -- cgit v0.10.2 From b99976e2d277c963138e090ae17bf835f8a07680 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Feb 2011 13:59:14 -0200 Subject: perf annotate browser: Use the percent color for the whole line Not just for the percentage number, to see the hot lines more easily. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 1aa3965..cfb5a27 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -44,8 +44,6 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro struct objdump_line_rb_node *olrb = objdump_line__rb(ol); ui_browser__set_percent_color(self, olrb->percent, current_entry); slsmg_printf(" %7.2f ", olrb->percent); - if (!current_entry) - ui_browser__set_color(self, HE_COLORSET_CODE); } else { ui_browser__set_percent_color(self, 0, current_entry); slsmg_write_nstring(" ", 9); @@ -57,6 +55,9 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro slsmg_write_nstring(" ", width - 18); else slsmg_write_nstring(ol->line, width - 18); + + if (!current_entry) + ui_browser__set_color(self, HE_COLORSET_CODE); } static double objdump_line__calc_percent(struct objdump_line *self, -- cgit v0.10.2 From 4187e262bc90369ba581ee28ec74ed416618889e Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 9 Feb 2011 17:11:00 -0800 Subject: perf tools: Update Makefile with some help The perf makefile is nicely complete except for a) an uninstall option b) a 'make help' description This patch implements b) it also comments out other non-working makefile targets Signed-off-by: Jesse Brandeburg LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index bd498d4..4626a39 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -178,8 +178,8 @@ install-pdf: pdf $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) -install-html: html - '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) +#install-html: html +# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) ../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE @@ -288,15 +288,16 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ mv $@+ $@ -install-webdoc : html - '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) +# UNIMPLEMENTED +#install-webdoc : html +# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) -quick-install: quick-install-man +# quick-install: quick-install-man -quick-install-man: - '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir) +# quick-install-man: +# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir) -quick-install-html: - '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) +#quick-install-html: +# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) .PHONY: .FORCE-PERF-VERSION-FILE diff --git a/tools/perf/Makefile b/tools/perf/Makefile index ffd1047..7c75f1d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -1102,6 +1102,36 @@ $(sort $(dir $(DIRECTORY_DEPS))): $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) +help: + @echo 'Perf make targets:' + @echo ' doc - make *all* documentation (see below)' + @echo ' man - make manpage documentation (access with man )' + @echo ' html - make html documentation' + @echo ' info - make GNU info documentation (access with info )' + @echo ' pdf - make pdf documentation' + @echo ' TAGS - use etags to make tag information for source browsing' + @echo ' tags - use ctags to make tag information for source browsing' + @echo ' cscope - use cscope to make interactive browsing database' + @echo '' + @echo 'Perf install targets:' + @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed' + @echo ' HINT: use "make prefix= " to install to a particular' + @echo ' path like make prefix=/usr/local install install-doc' + @echo ' install - install compiled binaries' + @echo ' install-doc - install *all* documentation' + @echo ' install-man - install manpage documentation' + @echo ' install-html - install html documentation' + @echo ' install-info - install GNU info documentation' + @echo ' install-pdf - install pdf documentation' + @echo '' + @echo ' quick-install-doc - alias for quick-install-man' + @echo ' quick-install-man - install the documentation quickly' + @echo ' quick-install-html - install the html documentation quickly' + @echo '' + @echo 'Perf maintainer targets:' + @echo ' distclean - alias to clean' + @echo ' clean - clean all binary objects and build output' + doc: $(MAKE) -C Documentation all -- cgit v0.10.2 From e116dfa1c357da49f55e1555767ec991225a8321 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Feb 2011 18:08:10 +0900 Subject: perf probe: Support function@filename syntax for --line Since "perf probe --add" supports function@filename syntax, --line option should also support it. Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Franck Bui-Huu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org LKML-Reference: <20110210090810.1809.26913.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 81c3220..02bafce 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -16,7 +16,7 @@ or or 'perf probe' --list or -'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' +'perf probe' [options] --line='LINE' or 'perf probe' [options] --vars='PROBEPOINT' @@ -128,13 +128,14 @@ LINE SYNTAX ----------- Line range is described by following syntax. - "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]" + "FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]" FUNC specifies the function name of showing lines. 'RLN' is the start line number from function entry line, and 'RLN2' is the end line number. As same as probe syntax, 'SRC' means the source file path, 'ALN' is start line number, and 'ALN2' is end line number in the file. It is also possible to specify how -many lines to show by using 'NUM'. +many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good +for searching a specific function when several functions share same name. So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. LAZY MATCHING diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 9d237e3..cbd7650 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -595,11 +595,11 @@ static int parse_line_num(char **ptr, int *val, const char *what) * The line range syntax is described by: * * SRC[:SLN[+NUM|-ELN]] - * FNC[:SLN[+NUM|-ELN]] + * FNC[@SRC][:SLN[+NUM|-ELN]] */ int parse_line_range_desc(const char *arg, struct line_range *lr) { - char *range, *name = strdup(arg); + char *range, *file, *name = strdup(arg); int err; if (!name) @@ -649,7 +649,16 @@ int parse_line_range_desc(const char *arg, struct line_range *lr) } } - if (strchr(name, '.')) + file = strchr(name, '@'); + if (file) { + *file = '\0'; + lr->file = strdup(++file); + if (lr->file == NULL) { + err = -ENOMEM; + goto err; + } + lr->function = name; + } else if (strchr(name, '.')) lr->file = name; else lr->function = name; -- cgit v0.10.2 From 8737ebdea02315eaffaebb3b73d55f2f726a4fe0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Feb 2011 18:08:16 +0900 Subject: perf probe: Show filename which contains target function Show filename which contains a target function with the function name on "--lines" mode, because perf-probe just shows the first function even if there are many same-name functions. Originally adopted by Franck Bui-Huu's patch which shows file name instead of function name. I've just modified it to show both of function name and file name, because of completeness of output. E.g.) $ perf probe -L t_show 0 static int t_show(struct seq_file *m, void *v) 1 { 2 struct ftrace_iterator *iter = m->private; ... $ perf probe -L t_show@trace/trace.c 0 static int t_show(struct seq_file *m, void *v) 1 { struct tracer *t = v; ... Original-patch-by: Franck Bui-Huu Cc: 2nddept-manager@sdl.hitachi.co.jp Cc: Franck Bui-Huu Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20110210090816.1809.43426.stgit@ltc236.sdl.hitachi.co.jp> Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index cbd7650..0e3ea13 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -409,7 +409,7 @@ int show_line_range(struct line_range *lr, const char *module) setup_pager(); if (lr->function) - fprintf(stdout, "<%s:%d>\n", lr->function, + fprintf(stdout, "<%s@%s:%d>\n", lr->function, lr->path, lr->start - lr->offset); else fprintf(stdout, "<%s:%d>\n", lr->path, lr->start); -- cgit v0.10.2 From 4498062e72fd55b2a9a4ac1b44fab8cb44ad5367 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Feb 2011 10:07:42 -0200 Subject: perf python: Add cgroup.c to setup.py to get it building again The 023695d cset added a new file, util/cgroup.c, that is referenced from util/evsel.c, so it needs to be present in util/setup.py so that the python shared object binding works, fixing this: [root@emilia linux]# export PYTHONPATH=~acme/git/build/perf/python/ [root@emilia linux]# ./tools/perf/python/twatch.py Traceback (most recent call last): File "./tools/perf/python/twatch.py", line 16, in import perf ImportError: /home/acme/git/build/perf/python/perf.so: undefined symbol: close_cgroup Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 1947b04..e24ffad 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -5,7 +5,7 @@ from distutils.core import setup, Extension perf = Extension('perf', sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', - 'util/util.c', 'util/xyarray.c'], + 'util/util.c', 'util/xyarray.c', 'util/cgroup.c'], include_dirs = ['util/include'], extra_compile_args = ['-fno-strict-aliasing', '-Wno-write-strings']) -- cgit v0.10.2 From f0c55bcf4aa41b4b1dbee826513b1acb01bf65e1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 16 Feb 2011 15:10:01 +0200 Subject: perf: make perf stat print user provided full event names This patch changes the way perf stat prints event names at the end of a run. Until now, it was trying to reconstruct the event name from its encoding. The problem is that it would only print generic events without their modifiers (u, k, pp). This patch saves the event name as passed by the user in the evsel struct and uses it to print the final event name. This would also work in case perf is linked with a library (such as libpfm4) which provides full PMU event tables. $ perf stat -e cycles:u,cycles:k date Wed Feb 16 14:58:52 CET 2011 Performance counter stats for 'date': 568600 cycles:u 2779715 cycles:k 0.001908182 seconds time elapsed Cc: Arun Sharma Cc: David S. Miller Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian LPU-Reference: <4d5bdc64.98a1df0a.7aa3.06c2@mx.google.com> Signed-off-by: Stephane Eranian [ committer note: Fixed a merge problem with 023695d "Add cgroup support" ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c974e08..63cadaf 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -86,6 +86,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) { perf_evsel__exit(evsel); close_cgroup(evsel->cgrp); + free(evsel->name); free(evsel); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1d3d5a3..f6fc8f6 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -37,6 +37,12 @@ struct perf_sample_id { struct perf_evsel *evsel; }; +/** struct perf_evsel - event selector + * + * @name - Can be set to retain the original event name passed by the user, + * so that when showing results in tools such as 'perf stat', we + * show the name used, not some alias. + */ struct perf_evsel { struct list_head node; struct perf_event_attr attr; @@ -45,6 +51,7 @@ struct perf_evsel { struct xyarray *id; struct perf_counts *counts; int idx; + char *name; void *priv; struct cgroup_sel *cgrp; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index cf082da..80a3dd5 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -268,6 +268,9 @@ const char *event_name(struct perf_evsel *evsel) u64 config = evsel->attr.config; int type = evsel->attr.type; + if (evsel->name) + return evsel->name; + return __event_name(type, config); } @@ -782,8 +785,10 @@ int parse_events(const struct option *opt, const char *str, int unset __used) struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; struct perf_event_attr attr; enum event_result ret; + const char *ostr; for (;;) { + ostr = str; memset(&attr, 0, sizeof(attr)); ret = parse_event_symbols(opt, &str, &attr); if (ret == EVT_FAILED) @@ -798,6 +803,11 @@ int parse_events(const struct option *opt, const char *str, int unset __used) if (evsel == NULL) return -1; perf_evlist__add(evlist, evsel); + + evsel->name = calloc(str - ostr + 1, 1); + if (!evsel->name) + return -1; + strncpy(evsel->name, ostr, str - ostr); } if (*str == 0) -- cgit v0.10.2 From fec9cbd15b9e99bab9bc50f1ed7e20a1087d7c6d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Feb 2011 10:37:23 -0200 Subject: perf hists: Print number of samples, not the period sum So that we match the header where we state the number of events with the "Samples" column when using 'perf report -n/--show-nr-samples': [root@emilia ~]# perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.111 MB perf.data (~4860 samples) ] [root@emilia ~]# perf report --stdio --show-nr-samples # Events: 11 cycles # # Overhead Samples Command Shared Object Symbol # ........ .......... ........... .................. ............................ # 16.65% 1 sleep [kernel.kallsyms] [k] unmap_vmas 16.10% 1 perf libpthread-2.12.so [.] __pthread_cleanup_push_defer 15.79% 2 perf [kernel.kallsyms] [k] format_decode 12.88% 1 kworker/1:2 [kernel.kallsyms] [k] cache_reap 10.69% 1 swapper [kernel.kallsyms] [k] _raw_spin_lock 7.55% 1 sleep [kernel.kallsyms] [k] prepare_exec_creds 6.00% 1 perf [jbd2] [k] start_this_handle 5.29% 1 perf [kernel.kallsyms] [k] seq_read 4.75% 1 perf [kernel.kallsyms] [k] get_pid_task 4.30% 1 perf [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore # # (For a higher level overview, try: perf report --sort comm,dso) # [root@emilia ~]# Reported-by: Stephane Eranian Acked-by: Stephane Eranian Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 3f43723..da2899e8 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -591,6 +591,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, { struct sort_entry *se; u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; + u64 nr_events; const char *sep = symbol_conf.field_sep; int ret; @@ -599,6 +600,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, if (pair_hists) { period = self->pair ? self->pair->period : 0; + nr_events = self->pair ? self->pair->nr_events : 0; total = pair_hists->stats.total_period; period_sys = self->pair ? self->pair->period_sys : 0; period_us = self->pair ? self->pair->period_us : 0; @@ -606,6 +608,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, period_guest_us = self->pair ? self->pair->period_guest_us : 0; } else { period = self->period; + nr_events = self->nr_events; total = session_total; period_sys = self->period_sys; period_us = self->period_us; @@ -646,9 +649,9 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size, if (symbol_conf.show_nr_samples) { if (sep) - ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); + ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); else - ret += snprintf(s + ret, size - ret, "%11" PRIu64, period); + ret += snprintf(s + ret, size - ret, "%11" PRIu64, nr_events); } if (pair_hists) { -- cgit v0.10.2 From 712a4b6049724278121d56aba683151d86c8c35a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Feb 2011 12:18:42 -0200 Subject: perf record: Delay setting the header writing atexit call While testing the --filter option I noticed that we were writing lots of unneeded stuff to the perf.data header when the filter ioctl fails, so move the atexit(atexit_header) call to after we create the counters successfully. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a4aaadc..db4cd1e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -538,11 +538,6 @@ static int __cmd_record(int argc, const char **argv) if (have_tracepoints(&evsel_list->entries)) perf_header__set_feat(&session->header, HEADER_TRACE_INFO); - /* - * perf_session__delete(session) will be called at atexit_header() - */ - atexit(atexit_header); - if (forks) { child_pid = fork(); if (child_pid < 0) { @@ -601,6 +596,11 @@ static int __cmd_record(int argc, const char **argv) perf_session__set_sample_type(session, sample_type); + /* + * perf_session__delete(session) will be called at atexit_header() + */ + atexit(atexit_header); + if (pipe_output) { err = perf_header__write_pipe(output); if (err < 0) -- cgit v0.10.2 From 74cfc17dc1a69c37ce6c8a76c1ce84bcb796eb0e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Feb 2011 14:40:46 -0200 Subject: perf report: Tell the user when a perf.data file has no samples [root@emilia ~]# perf report --stdio The perf.data file has no samples! [root@emilia ~]# The TUI shows a popup warning message with the same message. Reported-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f9a99a1..dddcc7e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -350,6 +350,12 @@ static int __cmd_report(void) perf_session__fprintf_dsos(session, stdout); next = rb_first(&session->hists_tree); + + if (next == NULL) { + ui__warning("The %s file has no samples!\n", input_name); + goto out_delete; + } + while (next) { struct hists *hists; -- cgit v0.10.2 From 668b8788f497b2386402daeca583d6300240d41d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Feb 2011 15:38:58 -0200 Subject: perf list: Allow filtering list of events The man page has the details, here are some examples: [root@emilia ~]# perf list *fault* *:*wait* List of pre-defined events (to be used in -e): page-faults OR faults [Software event] minor-faults [Software event] major-faults [Software event] alignment-faults [Software event] emulation-faults [Software event] radeon:radeon_fence_wait_begin [Tracepoint event] radeon:radeon_fence_wait_end [Tracepoint event] writeback:wbc_writeback_wait [Tracepoint event] writeback:wbc_balance_dirty_wait [Tracepoint event] writeback:writeback_congestion_wait [Tracepoint event] writeback:writeback_wait_iff_congested [Tracepoint event] sched:sched_wait_task [Tracepoint event] sched:sched_process_wait [Tracepoint event] sched:sched_stat_wait [Tracepoint event] sched:sched_stat_iowait [Tracepoint event] syscalls:sys_enter_epoll_wait [Tracepoint event] syscalls:sys_exit_epoll_wait [Tracepoint event] syscalls:sys_enter_epoll_pwait [Tracepoint event] syscalls:sys_exit_epoll_pwait [Tracepoint event] syscalls:sys_enter_rt_sigtimedwait [Tracepoint event] syscalls:sys_exit_rt_sigtimedwait [Tracepoint event] syscalls:sys_enter_waitid [Tracepoint event] syscalls:sys_exit_waitid [Tracepoint event] syscalls:sys_enter_wait4 [Tracepoint event] syscalls:sys_exit_wait4 [Tracepoint event] syscalls:sys_enter_waitpid [Tracepoint event] syscalls:sys_exit_waitpid [Tracepoint event] [root@emilia ~]# Suggested-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 399751b..7a527f7 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' +'perf list' [hw|sw|cache|tracepoint|event_glob] DESCRIPTION ----------- @@ -63,7 +63,26 @@ details. Some of them are referenced in the SEE ALSO section below. OPTIONS ------- -None + +Without options all known events will be listed. + +To limit the list use: + +. 'hw' or 'hardware' to list hardware events such as cache-misses, etc. + +. 'sw' or 'software' to list software events such as context switches, etc. + +. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc. + +. 'tracepoint' to list all tracepoint events, alternatively use + 'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched, + block, etc. + +. If none of the above is matched, it will apply the supplied glob to all + events, printing the ones that match. + +One or more types can be used at the same time, listing the events for the +types specified. SEE ALSO -------- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index d88c696..6313b6e 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -5,6 +5,7 @@ * * Copyright (C) 2009, Thomas Gleixner * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar + * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo */ #include "builtin.h" @@ -13,9 +14,47 @@ #include "util/parse-events.h" #include "util/cache.h" -int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) +int cmd_list(int argc, const char **argv, const char *prefix __used) { setup_pager(); - print_events(); + + if (argc == 1) + print_events(NULL); + else { + int i; + + for (i = 1; i < argc; ++i) { + if (i > 1) + putchar('\n'); + if (strncmp(argv[i], "tracepoint", 10) == 0) + print_tracepoint_events(NULL, NULL); + else if (strcmp(argv[i], "hw") == 0 || + strcmp(argv[i], "hardware") == 0) + print_events_type(PERF_TYPE_HARDWARE); + else if (strcmp(argv[i], "sw") == 0 || + strcmp(argv[i], "software") == 0) + print_events_type(PERF_TYPE_SOFTWARE); + else if (strcmp(argv[i], "cache") == 0 || + strcmp(argv[i], "hwcache") == 0) + print_hwcache_events(NULL); + else { + char *sep = strchr(argv[i], ':'), *s; + int sep_idx; + + if (sep == NULL) { + print_events(argv[i]); + continue; + } + sep_idx = sep - argv[i]; + s = strdup(argv[i]); + if (s == NULL) + return -1; + + s[sep_idx] = '\0'; + print_tracepoint_events(s, s + sep_idx + 1); + free(s); + } + } + } return 0; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 80a3dd5..54a7e26 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -858,7 +858,7 @@ static const char * const event_type_descriptors[] = { * Print the events from /tracing/events */ -static void print_tracepoint_events(void) +void print_tracepoint_events(const char *subsys_glob, const char *event_glob) { DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; @@ -873,6 +873,9 @@ static void print_tracepoint_events(void) return; for_each_subsystem(sys_dir, sys_dirent, sys_next) { + if (subsys_glob != NULL && + !strglobmatch(sys_dirent.d_name, subsys_glob)) + continue; snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, sys_dirent.d_name); @@ -881,6 +884,10 @@ static void print_tracepoint_events(void) continue; for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + if (event_glob != NULL && + !strglobmatch(evt_dirent.d_name, event_glob)) + continue; + snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); printf(" %-42s [%s]\n", evt_path, @@ -932,13 +939,61 @@ int is_valid_tracepoint(const char *event_string) return 0; } +void print_events_type(u8 type) +{ + struct event_symbol *syms = event_symbols; + unsigned int i; + char name[64]; + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { + if (type != syms->type) + continue; + + if (strlen(syms->alias)) + snprintf(name, sizeof(name), "%s OR %s", + syms->symbol, syms->alias); + else + snprintf(name, sizeof(name), "%s", syms->symbol); + + printf(" %-42s [%s]\n", name, + event_type_descriptors[type]); + } +} + +int print_hwcache_events(const char *event_glob) +{ + unsigned int type, op, i, printed = 0; + + for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!is_cache_op_valid(type, op)) + continue; + + for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + char *name = event_cache_name(type, op, i); + + if (event_glob != NULL && + !strglobmatch(name, event_glob)) + continue; + + printf(" %-42s [%s]\n", name, + event_type_descriptors[PERF_TYPE_HW_CACHE]); + ++printed; + } + } + } + + return printed; +} + /* * Print the help text for the event symbols: */ -void print_events(void) +void print_events(const char *event_glob) { struct event_symbol *syms = event_symbols; - unsigned int i, type, op, prev_type = -1; + unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0; char name[40]; printf("\n"); @@ -947,8 +1002,16 @@ void print_events(void) for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { type = syms->type; - if (type != prev_type) + if (type != prev_type && printed) { printf("\n"); + printed = 0; + ntypes_printed++; + } + + if (event_glob != NULL && + !(strglobmatch(syms->symbol, event_glob) || + (syms->alias && strglobmatch(syms->alias, event_glob)))) + continue; if (strlen(syms->alias)) sprintf(name, "%s OR %s", syms->symbol, syms->alias); @@ -958,22 +1021,17 @@ void print_events(void) event_type_descriptors[type]); prev_type = type; + ++printed; } - printf("\n"); - for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { - /* skip invalid cache type */ - if (!is_cache_op_valid(type, op)) - continue; - - for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - printf(" %-42s [%s]\n", - event_cache_name(type, op, i), - event_type_descriptors[PERF_TYPE_HW_CACHE]); - } - } + if (ntypes_printed) { + printed = 0; + printf("\n"); } + print_hwcache_events(event_glob); + + if (event_glob != NULL) + return; printf("\n"); printf(" %-42s [%s]\n", @@ -986,7 +1044,7 @@ void print_events(void) event_type_descriptors[PERF_TYPE_BREAKPOINT]); printf("\n"); - print_tracepoint_events(); + print_tracepoint_events(NULL, NULL); exit(129); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index cf7e94a..212f88e 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -28,7 +28,10 @@ extern int parse_filter(const struct option *opt, const char *str, int unset); #define EVENTS_HELP_MAX (128*1024) -extern void print_events(void); +void print_events(const char *event_glob); +void print_events_type(u8 type); +void print_tracepoint_events(const char *subsys_glob, const char *event_glob); +int print_hwcache_events(const char *event_glob); extern int is_valid_tracepoint(const char *event_string); extern char debugfs_path[]; -- cgit v0.10.2 From e9345aab675382176740bc8a2c6d3caf1510e46d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Feb 2011 08:09:49 +0100 Subject: Revert "tracing: Add unstable sched clock note to the warning" This reverts commit 5e38ca8f3ea423442eaafe1b7e206084aa38120a. Breaks the build of several !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK architectures. Cc: Jiri Olsa Cc: Steven Rostedt Message-ID: <20110217171823.GB17058@elte.hu> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7739893..bd1c35a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2163,14 +2163,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, delta = diff; if (unlikely(test_time_stamp(delta))) { WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", (unsigned long long)delta, (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp, - sched_clock_stable ? "" : - "If you just came from a suspend/resume,\n" - "please switch to the trace global clock:\n" - " echo global > /sys/kernel/debug/tracing/trace_clock\n"); + (unsigned long long)cpu_buffer->write_stamp); add_timestamp = 1; } } -- cgit v0.10.2 From 02ca752e4181e219e243cd61a60dd1da47251f11 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 17 Feb 2011 15:51:40 +0000 Subject: x86: Remove die_nmi() With no caller left, the function and the DIE_NMIWATCHDOG enumerator can both go away. Signed-off-by: Jan Beulich Cc: Peter Zijlstra Cc: Don Zickus LKML-Reference: <4D5D521C0200007800032702@vpn.id2.novell.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index ca242d3..518bbbb 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -13,7 +13,6 @@ enum die_val { DIE_PANIC, DIE_NMI, DIE_DIE, - DIE_NMIWATCHDOG, DIE_KERNELDEBUG, DIE_TRAP, DIE_GPF, diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c76f5b9..07f4601 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -7,7 +7,6 @@ #ifdef CONFIG_X86_LOCAL_APIC -extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index df20723..220a1c1 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } -void notrace __kprobes -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - unsigned long flags; - - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - /* - * We are in trouble anyway, lets at least try - * to get a message out. - */ - flags = oops_begin(); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - oops_end(flags, regs, 0); - if (do_panic || panic_on_oops) - panic("Non maskable interrupt"); - nmi_exit(); - local_irq_enable(); - do_exit(SIGBUS); -} - static int __init oops_setup(char *s) { if (!s) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index a413000..7c64c42 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMIWATCHDOG: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup: */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - return NOTIFY_STOP; - } - /* Enter debugger: */ - break; - case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) -- cgit v0.10.2 From 006cdc32618e09ffe228a7a86af044f3cc0dd714 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 2 Feb 2011 13:01:41 -0600 Subject: perf tools: Makefile: Remove vestigial git-specific cruft This commit squashes several commits that remove: NO_SYMLINK_HEAD NO_SVN_TESTS NO_FAST_WORKING_DIRECTORY USE_STDEV SHA1/SSL cruft makefile rules Signed-off-by: Michael Witten LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7c75f1d..544367c 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -21,9 +21,6 @@ endif # Define FREAD_READS_DIRECTORIES if your are on a system which succeeds # when attempting to read from an fopen'ed directory. # -# Define NO_OPENSSL environment variable if you do not have OpenSSL. -# This also implies MOZILLA_SHA1. -# # Define CURLDIR=/foo/bar if your curl header and library files are in # /foo/bar/include and /foo/bar/lib directories. # @@ -56,13 +53,6 @@ endif # # Define NO_SYS_SELECT_H if you don't have sys/select.h. # -# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link. -# Enable it on Windows. By default, symrefs are still used. -# -# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability -# tests. These tests take up a significant amount of the total test time -# but are not needed unless you plan to talk to SVN repos. -# # Define NO_FINK if you are building on Darwin/Mac OS X, have Fink # installed in /sw, but don't want PERF to link against any libraries # installed there. If defined you may specify your own (or Fink's) @@ -75,19 +65,6 @@ endif # specify your own (or DarwinPort's) include directories and # library directories by defining CFLAGS and LDFLAGS appropriately. # -# Define PPC_SHA1 environment variable when running make to make use of -# a bundled SHA1 routine optimized for PowerPC. -# -# Define ARM_SHA1 environment variable when running make to make use of -# a bundled SHA1 routine optimized for ARM. -# -# Define MOZILLA_SHA1 environment variable when running make to make use of -# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast -# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default -# choice) has very fast version optimized for i586. -# -# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin). -# # Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). # # Define NEEDS_SOCKET if linking with libc is not enough (SunOS, @@ -100,9 +77,6 @@ endif # Define NO_PREAD if you have a problem with pread() system call (e.g. # cygwin.dll before v1.5.22). # -# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is -# generally faster on your platform than accessing the working directory. -# # Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support # the executable mode bit, but doesn't really do so. # @@ -134,9 +108,6 @@ endif # Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec" # available. This automatically turns USE_NSEC off. # -# Define USE_STDEV below if you want perf to care about the underlying device -# change being considered an inode change from the update-index perspective. -# # Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks # field that counts the on-disk footprint in 512-byte blocks. # @@ -771,9 +742,6 @@ ifdef FREAD_READS_DIRECTORIES COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES COMPAT_OBJS += $(OUTPUT)compat/fopen.o endif -ifdef NO_SYMLINK_HEAD - BASIC_CFLAGS += -DNO_SYMLINK_HEAD -endif ifdef NO_STRCASESTR COMPAT_CFLAGS += -DNO_STRCASESTR COMPAT_OBJS += $(OUTPUT)compat/strcasestr.o @@ -813,9 +781,6 @@ ifdef NO_PREAD COMPAT_CFLAGS += -DNO_PREAD COMPAT_OBJS += $(OUTPUT)compat/pread.o endif -ifdef NO_FAST_WORKING_DIRECTORY - BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY -endif ifdef NO_TRUSTABLE_FILEMODE BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE endif @@ -851,23 +816,6 @@ ifdef NO_DEFLATE_BOUND BASIC_CFLAGS += -DNO_DEFLATE_BOUND endif -ifdef PPC_SHA1 - SHA1_HEADER = "ppc/sha1.h" - LIB_OBJS += $(OUTPUT)ppc/sha1.o ppc/sha1ppc.o -else -ifdef ARM_SHA1 - SHA1_HEADER = "arm/sha1.h" - LIB_OBJS += $(OUTPUT)arm/sha1.o $(OUTPUT)arm/sha1_arm.o -else -ifdef MOZILLA_SHA1 - SHA1_HEADER = "mozilla-sha1/sha1.h" - LIB_OBJS += $(OUTPUT)mozilla-sha1/sha1.o -else - SHA1_HEADER = - EXTLIBS += $(LIB_4_CRYPTO) -endif -endif -endif ifdef NO_PERL_MAKEMAKER export NO_PERL_MAKEMAKER endif @@ -930,7 +878,6 @@ endif # Shell quote (do not use $(call) to accommodate ancient setups); -SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER)) ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) @@ -948,8 +895,7 @@ PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) -BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ - $(COMPAT_CFLAGS) +BASIC_CFLAGS += $(COMPAT_CFLAGS) LIB_OBJS += $(COMPAT_OBJS) ALL_CFLAGS += $(BASIC_CFLAGS) @@ -1048,9 +994,6 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS '-DPREFIX="$(prefix_SQ)"' \ $< -$(OUTPUT)builtin-init-db.o: builtin-init-db.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $< - $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< @@ -1089,7 +1032,6 @@ $(OUTPUT)perf-%$X: %.o $(PERFLIBS) $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) $(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) -builtin-revert.o wt-status.o: wt-status.h # we compile into subdirectories. if the target directory is not the source directory, they might not exists. So # we depend the various files onto their directories. @@ -1192,8 +1134,6 @@ all:: $(TEST_PROGRAMS) # However, the environment gets quite big, and some programs have problems # with that. -export NO_SVN_TESTS - check: $(OUTPUT)common-cmds.h if sparse; \ then \ -- cgit v0.10.2 From 8796cb9d7dc028945af4b2ea858ae8f8f2ecbe8c Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 2 Feb 2011 11:57:41 -0600 Subject: perf tools: Makefile: Remove platform-specific cruft While it makes sense that this tool could be used on other platforms at least to parse data, there doesn't appear to be any real support for such usage. This commit squashes several commits that remove: SNPRINTF_RETURNS_BOGUS FREAD_READS_DIRECTORIES NO_D_{INO,TYPE}_IN_DIRENT NO_STRCASESTR NO_MEMMEM NO_STRTOUMAX and NO_STRTOULL NO_SETENV NO_UNSETENV NO_MKDTEMP NEEDS_LIBICONV NEEDS_SOCKET NO_MMAP NO_PTHREADS NO_PREAD NO_TRUSTABLE_FILEMODE NO_IPV6 and NO_SOCKADDR_STORAGE NO_ICONV and OLD_ICONV NO_NSEC, USE_NSEC, and USE_ST_TIMESPEC NO_ST_BLOCKS_IN_STRUCT_STAT NO_FINK and NO_DARWIN_PORTS NO_SYS_SELECT_H NO_HSTRERROR DIR_HAS_BSD_GROUP_SEMANTICS and FORCE_DIR_SET_GID NEEDS_NSL, NO_UINTMAX_T, NO_INET_{N,P}TON COMPAT_{CFLAGS,OBJS} Executable extension `X' Signed-off-by: Michael Witten LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 544367c..53c1e93 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -14,103 +14,23 @@ endif # Define V=1 to have a more verbose compile. # Define V=2 to have an even more verbose compile. # -# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() -# or vsnprintf() return -1 instead of number of characters which would -# have been written to the final string if enough space had been available. -# -# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds -# when attempting to read from an fopen'ed directory. -# # Define CURLDIR=/foo/bar if your curl header and library files are in # /foo/bar/include and /foo/bar/lib directories. # # Define EXPATDIR=/foo/bar if your expat header and library files are in # /foo/bar/include and /foo/bar/lib directories. # -# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent. -# -# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks -# d_type in struct dirent (latest Cygwin -- will be fixed soonish). -# # Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.) # do not support the 'size specifiers' introduced by C99, namely ll, hh, # j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t). # some C compilers supported these specifiers prior to C99 as an extension. # -# Define NO_STRCASESTR if you don't have strcasestr. -# -# Define NO_MEMMEM if you don't have memmem. -# -# Define NO_STRTOUMAX if you don't have strtoumax in the C library. -# If your compiler also does not support long long or does not have -# strtoull, define NO_STRTOULL. -# -# Define NO_SETENV if you don't have setenv in the C library. -# -# Define NO_UNSETENV if you don't have unsetenv in the C library. -# -# Define NO_MKDTEMP if you don't have mkdtemp in the C library. -# -# Define NO_SYS_SELECT_H if you don't have sys/select.h. -# -# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink -# installed in /sw, but don't want PERF to link against any libraries -# installed there. If defined you may specify your own (or Fink's) -# include directories and library directories by defining CFLAGS -# and LDFLAGS appropriately. -# -# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X, -# have DarwinPorts installed in /opt/local, but don't want PERF to -# link against any libraries installed there. If defined you may -# specify your own (or DarwinPort's) include directories and -# library directories by defining CFLAGS and LDFLAGS appropriately. -# -# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). -# -# Define NEEDS_SOCKET if linking with libc is not enough (SunOS, -# Patrick Mauritz). -# -# Define NO_MMAP if you want to avoid mmap. -# -# Define NO_PTHREADS if you do not have or do not want to use Pthreads. -# -# Define NO_PREAD if you have a problem with pread() system call (e.g. -# cygwin.dll before v1.5.22). -# -# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support -# the executable mode bit, but doesn't really do so. -# -# Define NO_IPV6 if you lack IPv6 support and getaddrinfo(). -# -# Define NO_SOCKADDR_STORAGE if your platform does not have struct -# sockaddr_storage. -# -# Define NO_ICONV if your libc does not properly support iconv. -# -# Define OLD_ICONV if your library has an old iconv(), where the second -# (input buffer pointer) parameter is declared with type (const char **). -# # Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. # # Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib" # that tells runtime paths to dynamic libraries; # "-Wl,-rpath=/path/lib" is used instead. # -# Define USE_NSEC below if you want perf to care about sub-second file mtimes -# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and -# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely -# randomly break unless your underlying filesystem supports those sub-second -# times (my ext3 doesn't). -# -# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of -# "st_ctim" -# -# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec" -# available. This automatically turns USE_NSEC off. -# -# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks -# field that counts the on-disk footprint in 512-byte blocks. -# # Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 # # Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. @@ -282,8 +202,6 @@ BASIC_LDFLAGS = # Guard against environment variables BUILTIN_OBJS = BUILT_INS = -COMPAT_CFLAGS = -COMPAT_OBJS = LIB_H = LIB_OBJS = PYRF_OBJS = @@ -329,7 +247,7 @@ LANG_BINDINGS = ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) # what 'all' will build but not install in perfexecdir -OTHER_PROGRAMS = $(OUTPUT)perf$X +OTHER_PROGRAMS = $(OUTPUT)perf # Set paths to tools early so that they can be used for version tests. ifndef SHELL_PATH @@ -538,22 +456,6 @@ endif # NO_DWARF -include arch/$(ARCH)/Makefile -ifeq ($(uname_S),Darwin) - ifndef NO_FINK - ifeq ($(shell test -d /sw/lib && echo y),y) - BASIC_CFLAGS += -I/sw/include - BASIC_LDFLAGS += -L/sw/lib - endif - endif - ifndef NO_DARWIN_PORTS - ifeq ($(shell test -d /opt/local/lib && echo y),y) - BASIC_CFLAGS += -I/opt/local/include - BASIC_LDFLAGS += -L/opt/local/lib - endif - endif - PTHREAD_LIBS = -endif - ifneq ($(OUTPUT),) BASIC_CFLAGS += -I$(OUTPUT) endif @@ -707,110 +609,9 @@ ifndef CC_LD_DYNPATH endif endif -ifdef NEEDS_SOCKET - EXTLIBS += -lsocket -endif -ifdef NEEDS_NSL - EXTLIBS += -lnsl -endif -ifdef NO_D_TYPE_IN_DIRENT - BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT -endif -ifdef NO_D_INO_IN_DIRENT - BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT -endif -ifdef NO_ST_BLOCKS_IN_STRUCT_STAT - BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT -endif -ifdef USE_NSEC - BASIC_CFLAGS += -DUSE_NSEC -endif -ifdef USE_ST_TIMESPEC - BASIC_CFLAGS += -DUSE_ST_TIMESPEC -endif -ifdef NO_NSEC - BASIC_CFLAGS += -DNO_NSEC -endif ifdef NO_C99_FORMAT BASIC_CFLAGS += -DNO_C99_FORMAT endif -ifdef SNPRINTF_RETURNS_BOGUS - COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS - COMPAT_OBJS += $(OUTPUT)compat/snprintf.o -endif -ifdef FREAD_READS_DIRECTORIES - COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES - COMPAT_OBJS += $(OUTPUT)compat/fopen.o -endif -ifdef NO_STRCASESTR - COMPAT_CFLAGS += -DNO_STRCASESTR - COMPAT_OBJS += $(OUTPUT)compat/strcasestr.o -endif -ifdef NO_STRTOUMAX - COMPAT_CFLAGS += -DNO_STRTOUMAX - COMPAT_OBJS += $(OUTPUT)compat/strtoumax.o -endif -ifdef NO_STRTOULL - COMPAT_CFLAGS += -DNO_STRTOULL -endif -ifdef NO_SETENV - COMPAT_CFLAGS += -DNO_SETENV - COMPAT_OBJS += $(OUTPUT)compat/setenv.o -endif -ifdef NO_MKDTEMP - COMPAT_CFLAGS += -DNO_MKDTEMP - COMPAT_OBJS += $(OUTPUT)compat/mkdtemp.o -endif -ifdef NO_UNSETENV - COMPAT_CFLAGS += -DNO_UNSETENV - COMPAT_OBJS += $(OUTPUT)compat/unsetenv.o -endif -ifdef NO_SYS_SELECT_H - BASIC_CFLAGS += -DNO_SYS_SELECT_H -endif -ifdef NO_MMAP - COMPAT_CFLAGS += -DNO_MMAP - COMPAT_OBJS += $(OUTPUT)compat/mmap.o -else - ifdef USE_WIN32_MMAP - COMPAT_CFLAGS += -DUSE_WIN32_MMAP - COMPAT_OBJS += $(OUTPUT)compat/win32mmap.o - endif -endif -ifdef NO_PREAD - COMPAT_CFLAGS += -DNO_PREAD - COMPAT_OBJS += $(OUTPUT)compat/pread.o -endif -ifdef NO_TRUSTABLE_FILEMODE - BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE -endif -ifdef NO_IPV6 - BASIC_CFLAGS += -DNO_IPV6 -endif -ifdef NO_UINTMAX_T - BASIC_CFLAGS += -Duintmax_t=uint32_t -endif -ifdef NO_SOCKADDR_STORAGE -ifdef NO_IPV6 - BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in -else - BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6 -endif -endif -ifdef NO_INET_NTOP - LIB_OBJS += $(OUTPUT)compat/inet_ntop.o -endif -ifdef NO_INET_PTON - LIB_OBJS += $(OUTPUT)compat/inet_pton.o -endif - -ifdef NO_ICONV - BASIC_CFLAGS += -DNO_ICONV -endif - -ifdef OLD_ICONV - BASIC_CFLAGS += -DOLD_ICONV -endif ifdef NO_DEFLATE_BOUND BASIC_CFLAGS += -DNO_DEFLATE_BOUND @@ -819,14 +620,6 @@ endif ifdef NO_PERL_MAKEMAKER export NO_PERL_MAKEMAKER endif -ifdef NO_HSTRERROR - COMPAT_CFLAGS += -DNO_HSTRERROR - COMPAT_OBJS += $(OUTPUT)compat/hstrerror.o -endif -ifdef NO_MEMMEM - COMPAT_CFLAGS += -DNO_MEMMEM - COMPAT_OBJS += $(OUTPUT)compat/memmem.o -endif ifdef INTERNAL_QSORT COMPAT_CFLAGS += -DINTERNAL_QSORT COMPAT_OBJS += $(OUTPUT)compat/qsort.o @@ -835,9 +628,6 @@ ifdef RUNTIME_PREFIX COMPAT_CFLAGS += -DRUNTIME_PREFIX endif -ifdef DIR_HAS_BSD_GROUP_SEMANTICS - COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS -endif ifdef NO_EXTERNAL_GREP BASIC_CFLAGS += -DNO_EXTERNAL_GREP endif @@ -895,9 +685,6 @@ PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) -BASIC_CFLAGS += $(COMPAT_CFLAGS) -LIB_OBJS += $(COMPAT_OBJS) - ALL_CFLAGS += $(BASIC_CFLAGS) ALL_CFLAGS += $(ARCH_CFLAGS) ALL_LDFLAGS += $(BASIC_LDFLAGS) @@ -910,9 +697,6 @@ export TAR INSTALL DESTDIR SHELL_PATH SHELL = $(SHELL_PATH) all:: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS -ifneq (,$X) - $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) -endif all:: @@ -921,15 +705,15 @@ please_set_SHELL_PATH_to_a_more_modern_shell: shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell -strip: $(PROGRAMS) $(OUTPUT)perf$X - $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf$X +strip: $(PROGRAMS) $(OUTPUT)perf + $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ -$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) +$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \ $(BUILTIN_OBJS) $(LIBS) -o $@ @@ -1027,11 +811,11 @@ $(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/tra $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< -$(OUTPUT)perf-%$X: %.o $(PERFLIBS) +$(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) +$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) # we compile into subdirectories. if the target directory is not the source directory, they might not exists. So # we depend the various files onto their directories. @@ -1168,7 +952,7 @@ export perfexec_instdir install: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' - $(INSTALL) $(OUTPUT)perf$X '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' @@ -1267,7 +1051,7 @@ distclean: clean clean: $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive} - $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X + $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf $(RM) $(TEST_PROGRAMS) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(RM) -r $(PERF_TARNAME) .doc-tmp-dir diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index e833f26..fc78428 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -70,9 +70,7 @@ #include #include #include -#ifndef NO_SYS_SELECT_H #include -#endif #include #include #include @@ -83,10 +81,6 @@ #include "types.h" #include -#ifndef NO_ICONV -#include -#endif - extern const char *graph_line; extern const char *graph_dotted_line; extern char buildid_dir[]; @@ -236,26 +230,6 @@ static inline int sane_case(int x, int high) return x; } -#ifndef DIR_HAS_BSD_GROUP_SEMANTICS -# define FORCE_DIR_SET_GID S_ISGID -#else -# define FORCE_DIR_SET_GID 0 -#endif - -#ifdef NO_NSEC -#undef USE_NSEC -#define ST_CTIME_NSEC(st) 0 -#define ST_MTIME_NSEC(st) 0 -#else -#ifdef USE_ST_TIMESPEC -#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) -#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) -#else -#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) -#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) -#endif -#endif - int mkdir_p(char *path, mode_t mode); int copyfile(const char *from, const char *to); -- cgit v0.10.2 From 0a54fb63600b745e060d24879ed5194382a466c5 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 2 Feb 2011 12:04:27 -0600 Subject: perf tools: Makefile: Remove tool-specific cruft This commit squashes several commits that remove: NO_C99_FORMAT CURLDIR and EXPATDIR NO_DEFLATE_BOUND CC_LD_DYNPATH and NO_R_TO_GCC_LINKER NO_PERL_MAKEMAKER INTERNAL_QSORT NO_EXTERNAL_GREP NO_PERL SCRIPT_PERL PERL_PATH_SQ Signed-off-by: Michael Witten LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 53c1e93..fde196f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -14,40 +14,10 @@ endif # Define V=1 to have a more verbose compile. # Define V=2 to have an even more verbose compile. # -# Define CURLDIR=/foo/bar if your curl header and library files are in -# /foo/bar/include and /foo/bar/lib directories. -# -# Define EXPATDIR=/foo/bar if your expat header and library files are in -# /foo/bar/include and /foo/bar/lib directories. -# -# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.) -# do not support the 'size specifiers' introduced by C99, namely ll, hh, -# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t). -# some C compilers supported these specifiers prior to C99 as an extension. -# -# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. -# -# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib" -# that tells runtime paths to dynamic libraries; -# "-Wl,-rpath=/path/lib" is used instead. -# # Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 # # Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. # -# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's -# MakeMaker (e.g. using ActiveState under Cygwin). -# -# Define NO_PERL if you do not want Perl scripts or libraries at all. -# -# Define INTERNAL_QSORT to use Git's implementation of qsort(), which -# is a simplified version of the merge sort used in glibc. This is -# recommended if Git triggers O(n^2) behavior in your platform's qsort(). -# -# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call -# your external grep (e.g., if your system lacks grep, if its grep is -# broken, or spawning external process is slower than built-in grep perf has). -# # Define LDFLAGS=-static to build a static binary. # # Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. @@ -205,7 +175,6 @@ BUILT_INS = LIB_H = LIB_OBJS = PYRF_OBJS = -SCRIPT_PERL = SCRIPT_SH = TEST_PROGRAMS = @@ -221,10 +190,7 @@ $(OUTPUT)python/perf.so: $(PYRF_OBJS) # No Perl scripts right now: # -# SCRIPT_PERL += perf-add--interactive.perl - -SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \ - $(patsubst %.perl,%,$(SCRIPT_PERL)) +SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) # Empty... EXTRA_PROGRAMS = @@ -599,43 +565,10 @@ else endif endif -ifndef CC_LD_DYNPATH - ifdef NO_R_TO_GCC_LINKER - # Some gcc does not accept and pass -R to the linker to specify - # the runtime dynamic library path. - CC_LD_DYNPATH = -Wl,-rpath, - else - CC_LD_DYNPATH = -R - endif -endif - -ifdef NO_C99_FORMAT - BASIC_CFLAGS += -DNO_C99_FORMAT -endif - -ifdef NO_DEFLATE_BOUND - BASIC_CFLAGS += -DNO_DEFLATE_BOUND -endif - -ifdef NO_PERL_MAKEMAKER - export NO_PERL_MAKEMAKER -endif -ifdef INTERNAL_QSORT - COMPAT_CFLAGS += -DINTERNAL_QSORT - COMPAT_OBJS += $(OUTPUT)compat/qsort.o -endif ifdef RUNTIME_PREFIX COMPAT_CFLAGS += -DRUNTIME_PREFIX endif -ifdef NO_EXTERNAL_GREP - BASIC_CFLAGS += -DNO_EXTERNAL_GREP -endif - -ifeq ($(PERL_PATH),) -NO_PERL=NoThanks -endif - QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir QUIET_SUBDIR1 = @@ -681,7 +614,6 @@ htmldir_SQ = $(subst ','\'',$(htmldir)) prefix_SQ = $(subst ','\'',$(prefix)) SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) -PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) @@ -744,7 +676,6 @@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \ sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ - -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ $@.sh > $(OUTPUT)$@+ && \ @@ -761,7 +692,6 @@ configure: configure.ac # These can record PERF_VERSION $(OUTPUT)perf.o perf.spec \ $(patsubst %.sh,%,$(SCRIPT_SH)) \ - $(patsubst %.perl,%,$(SCRIPT_PERL)) \ : $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS @@ -903,7 +833,6 @@ $(OUTPUT)PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ - @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ ### Testing rules -- cgit v0.10.2 From a3d1ee10d1bf4520af3d44c1aa6cd46956ec4fd7 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Wed, 2 Feb 2011 14:22:08 -0600 Subject: perf tools: Makefile: Remove various and sundry cruft This commit squashes several commits that remove: unnecessary uname calls `sh -c' BUILT_INS and QUIET_BUILT_IN They have no effect, and the `fixup-builtins' and `check-builtins.sh' scripts don't even exist. RUNTIME_PREFIX It's currently never anything but unset, and it's apparently only meaningful when Microsoft Windows is the operating system (according to the source for git). TEST_PROGRAMS EXTRA_PROGRAMS unused SHELL_PATH_SQ portions unused test for V=2 useless exports Only when `V' is undefined (that is, only when the value of `V' is empty) is `export V' performed, which just has the effect of placing the empty-valued variable `V' in the environment. The only other script to make use of `V' is `Documentation/Makefile', which only checks whether `V' is undefined (that is, whether the value of `V' is empty); hence, the `export V' has no effect whatsoever. Similarly, `export QUIET_GEN' is useless because it will only have a non-empty value when `V' has an empty-value, and when `V' has an empty-value, `QUIET_GEN' is always explicitly set in every script in which it is used. `DESTDIR' is only ever defined by the user via the environment or the command line, both of which are automatically exported to sub-make processes. Furthermore, no non-make sub-scripts make use of `DESTDIR' as an environment variable. No other scripts use `perfexec_instdir'. unused QUIET_SUBDIR{0,1} TAR and RPMBUILD PTHREAD_LIBS Maintainer's dist rules and commands distclean target Test suite coverage testing PRINT_DIR and NO_SUBDIR `configure' target NO_CURL @@PERF_VERSION@@ substitution Without the sed command, all of the rule's commands can be reduced to a single line that copies a file and sets the permissions properly in the process. `make test' echo line template_instdir PERF-BUILD-OPTIONS double-colon rules The use of double-colon rules seems misguided or vestigial git. Essentially hard-coded $(SCRIPTS) expansion Signed-off-by: Michael Witten LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index cb43289..416684b 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -1,4 +1,3 @@ -PERF-BUILD-OPTIONS PERF-CFLAGS PERF-GUI-VARS PERF-VERSION-FILE diff --git a/tools/perf/Makefile b/tools/perf/Makefile index fde196f..9b84218 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -3,7 +3,7 @@ ifeq ("$(origin O)", "command line") endif # The default target of this Makefile is... -all:: +all: ifneq ($(OUTPUT),) # check that the output directory actually exists @@ -11,8 +11,7 @@ OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif -# Define V=1 to have a more verbose compile. -# Define V=2 to have an even more verbose compile. +# Define V to have a more verbose compile. # # Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 # @@ -28,12 +27,7 @@ $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) -include $(OUTPUT)PERF-VERSION-FILE -uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') -uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') -uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not') -uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') -uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') -uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') +uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/arm.*/arm/ -e s/sa110/arm/ \ @@ -52,8 +46,6 @@ ifeq ($(ARCH),x86_64) ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S endif -# CFLAGS and LDFLAGS are for the users to override from the command line. - # # Include saner warnings here, which can catch bugs: # @@ -131,22 +123,13 @@ CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar RM = rm -f MKDIR = mkdir -TAR = tar FIND = find INSTALL = install -RPMBUILD = rpmbuild -PTHREAD_LIBS = -lpthread # sparse is architecture-neutral, which means that we need to tell it # explicitly what architecture to check for. Fix this up for yours.. SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ -ifeq ($(V), 2) - QUIET_STDERR = ">/dev/null" -else - QUIET_STDERR = ">/dev/null 2>&1" -endif - -include feature-tests.mak ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y) @@ -171,12 +154,10 @@ BASIC_LDFLAGS = # Guard against environment variables BUILTIN_OBJS = -BUILT_INS = LIB_H = LIB_OBJS = PYRF_OBJS = SCRIPT_SH = -TEST_PROGRAMS = SCRIPT_SH += perf-archive.sh @@ -192,12 +173,6 @@ $(OUTPUT)python/perf.so: $(PYRF_OBJS) SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) -# Empty... -EXTRA_PROGRAMS = - -# ... and all the rest that could be moved out of bindir to perfexecdir -PROGRAMS += $(EXTRA_PROGRAMS) - # # Single 'perf' binary right now: # @@ -205,10 +180,6 @@ PROGRAMS += $(OUTPUT)perf LANG_BINDINGS = -# List built-in command $C whose implementation cmd_$C() is not in -# builtin-$C.o but is linked in as part of some other command. -# - # what 'all' will build and 'install' will install, in perfexecdir ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) @@ -565,33 +536,13 @@ else endif endif -ifdef RUNTIME_PREFIX - COMPAT_CFLAGS += -DRUNTIME_PREFIX -endif - -QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir -QUIET_SUBDIR1 = - -ifneq ($(findstring $(MAKEFLAGS),w),w) -PRINT_DIR = --no-print-directory -else # "make -w" -NO_SUBDIR = : -endif - ifneq ($(findstring $(MAKEFLAGS),s),s) ifndef V QUIET_CC = @echo ' ' CC $@; QUIET_AR = @echo ' ' AR $@; QUIET_LINK = @echo ' ' LINK $@; QUIET_MKDIR = @echo ' ' MKDIR $@; - QUIET_BUILT_IN = @echo ' ' BUILTIN $@; QUIET_GEN = @echo ' ' GEN $@; - QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ - $(MAKE) $(PRINT_DIR) -C $$subdir - export V - export QUIET_GEN - export QUIET_BUILT_IN endif endif @@ -621,16 +572,14 @@ ALL_CFLAGS += $(BASIC_CFLAGS) ALL_CFLAGS += $(ARCH_CFLAGS) ALL_LDFLAGS += $(BASIC_LDFLAGS) -export TAR INSTALL DESTDIR SHELL_PATH +export INSTALL SHELL_PATH ### Build rules SHELL = $(SHELL_PATH) -all:: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS - -all:: +all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) please_set_SHELL_PATH_to_a_more_modern_shell: @$$(:) @@ -661,37 +610,17 @@ $(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPU '-DPERF_MAN_PATH="$(mandir_SQ)"' \ '-DPERF_INFO_PATH="$(infodir_SQ)"' $< -$(BUILT_INS): $(OUTPUT)perf$X - $(QUIET_BUILT_IN)$(RM) $@ && \ - ln perf$X $@ 2>/dev/null || \ - ln -s perf$X $@ 2>/dev/null || \ - cp perf$X $@ - $(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ -$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh - $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \ - sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ - -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ - -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ - -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ - $@.sh > $(OUTPUT)$@+ && \ - chmod +x $(OUTPUT)$@+ && \ - mv $(OUTPUT)$@+ $(OUTPUT)$@ - -configure: configure.ac - $(QUIET_GEN)$(RM) $@ $<+ && \ - sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ - $< > $<+ && \ - autoconf -o $@ $<+ && \ - $(RM) $<+ +$(SCRIPTS) : % : %.sh + $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@' # These can record PERF_VERSION $(OUTPUT)perf.o perf.spec \ - $(patsubst %.sh,%,$(SCRIPT_SH)) \ + $(SCRIPTS) \ : $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS @@ -826,23 +755,8 @@ $(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ fi -# We need to apply sq twice, once to protect from the shell -# that runs $(OUTPUT)PERF-BUILD-OPTIONS, and then again to protect it -# and the first level quoting from the shell that runs "echo". -$(OUTPUT)PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS - @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ - @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ - @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ - ### Testing rules -# -# None right now: -# -# TEST_PROGRAMS += test-something$X - -all:: $(TEST_PROGRAMS) - # GNU make supports exporting all variables by "export" without parameters. # However, the environment gets quite big, and some programs have problems # with that. @@ -855,29 +769,17 @@ check: $(OUTPUT)common-cmds.h sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ done; \ else \ - echo 2>&1 "Did you mean 'make test'?"; \ exit 1; \ fi -remove-dashes: - ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS) - ### Installation rules -ifneq ($(filter /%,$(firstword $(template_dir))),) -template_instdir = $(template_dir) -else -template_instdir = $(prefix)/$(template_dir) -endif -export template_instdir - ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) else perfexec_instdir = $(prefix)/$(perfexecdir) endif perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) -export perfexec_instdir install: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' @@ -894,14 +796,6 @@ install: all $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' -ifdef BUILT_INS - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' - $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' -ifneq (,$X) - $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) $(OUTPUT)perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) -endif -endif - install-doc: $(MAKE) -C Documentation install @@ -926,104 +820,17 @@ quick-install-man: quick-install-html: $(MAKE) -C Documentation quick-install-html - -### Maintainer's dist rules -# -# None right now -# -# -# perf.spec: perf.spec.in -# sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+ -# mv $@+ $@ -# -# PERF_TARNAME=perf-$(PERF_VERSION) -# dist: perf.spec perf-archive$(X) configure -# ./perf-archive --format=tar \ -# --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar -# @mkdir -p $(PERF_TARNAME) -# @cp perf.spec configure $(PERF_TARNAME) -# @echo $(PERF_VERSION) > $(PERF_TARNAME)/version -# $(TAR) rf $(PERF_TARNAME).tar \ -# $(PERF_TARNAME)/perf.spec \ -# $(PERF_TARNAME)/configure \ -# $(PERF_TARNAME)/version -# @$(RM) -r $(PERF_TARNAME) -# gzip -f -9 $(PERF_TARNAME).tar -# -# htmldocs = perf-htmldocs-$(PERF_VERSION) -# manpages = perf-manpages-$(PERF_VERSION) -# dist-doc: -# $(RM) -r .doc-tmp-dir -# mkdir .doc-tmp-dir -# $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc -# cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar . -# gzip -n -9 -f $(htmldocs).tar -# : -# $(RM) -r .doc-tmp-dir -# mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7 -# $(MAKE) -C Documentation DESTDIR=./ \ -# man1dir=../.doc-tmp-dir/man1 \ -# man5dir=../.doc-tmp-dir/man5 \ -# man7dir=../.doc-tmp-dir/man7 \ -# install -# cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar . -# gzip -n -9 -f $(manpages).tar -# $(RM) -r .doc-tmp-dir -# -# rpm: dist -# $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz - ### Cleaning rules -distclean: clean -# $(RM) configure - clean: $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive} - $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf - $(RM) $(TEST_PROGRAMS) + $(RM) $(ALL_PROGRAMS) perf $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* - $(RM) -r $(PERF_TARNAME) .doc-tmp-dir - $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz - $(RM) $(htmldocs).tar.gz $(manpages).tar.gz $(MAKE) -C Documentation/ clean - $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-BUILD-OPTIONS + $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS @python util/setup.py clean --build-lib='$(OUTPUT)python' \ --build-temp='$(OUTPUT)python/temp' .PHONY: all install clean strip .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS -.PHONY: .FORCE-PERF-BUILD-OPTIONS - -### Make sure built-ins do not have dups and listed in perf.c -# -check-builtins:: - ./check-builtins.sh - -### Test suite coverage testing -# -# None right now -# -# .PHONY: coverage coverage-clean coverage-build coverage-report -# -# coverage: -# $(MAKE) coverage-build -# $(MAKE) coverage-report -# -# coverage-clean: -# rm -f *.gcda *.gcno -# -# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs -# COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov -# -# coverage-build: coverage-clean -# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all -# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \ -# -j1 test -# -# coverage-report: -# gcov -b *.c */*.c -# grep '^function.*called 0 ' *.c.gcov */*.c.gcov \ -# | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \ -# | tee coverage-untested-functions diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c index 67eeff5..7adf4ad 100644 --- a/tools/perf/util/exec_cmd.c +++ b/tools/perf/util/exec_cmd.c @@ -11,31 +11,12 @@ static const char *argv0_path; const char *system_path(const char *path) { -#ifdef RUNTIME_PREFIX - static const char *prefix; -#else static const char *prefix = PREFIX; -#endif struct strbuf d = STRBUF_INIT; if (is_absolute_path(path)) return path; -#ifdef RUNTIME_PREFIX - assert(argv0_path); - assert(is_absolute_path(argv0_path)); - - if (!prefix && - !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) && - !(prefix = strip_path_suffix(argv0_path, BINDIR)) && - !(prefix = strip_path_suffix(argv0_path, "perf"))) { - prefix = PREFIX; - fprintf(stderr, "RUNTIME_PREFIX requested, " - "but prefix computation failed. " - "Using static fallback '%s'.\n", prefix); - } -#endif - strbuf_addf(&d, "%s/%s", prefix, path); path = strbuf_detach(&d, NULL); return path; -- cgit v0.10.2 From fbee632d0ca9f4073a3fefb9a843eac8af036b0f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Feb 2011 13:23:57 -0300 Subject: perf probe: Fix error propagation leading to segfault There are two hunks in this patch that stops probe processing as soon as one error is found, breaking out of loops, the other fix an error propagation that should return a negative error number but instead was returning the result of "ret < 0", which is 1 and thus made several error checks fail because they test agains < 0. The problem could be triggered by asking for a variable that was optimized out, fact that should stop the whole probe processing but instead was segfaulting while installing broken probes: [root@emilia ~]# probe perf_mmap:55 user_lock_limit Failed to find the location of user_lock_limit at this address. Perhaps, it has been optimized out. Failed to find 'user_lock_limit' in this function. Add new events: probe:perf_mmap (on perf_mmap:55 with user_lock_limit) probe:perf_mmap_1 (on perf_mmap:55 with user_lock_limit) Segmentation fault (core dumped) [root@emilia ~]# perf probe -l probe:perf_mmap (on perf_mmap:55@git/linux/kernel/perf_event.c with user_lock_limit) probe:perf_mmap_1 (on perf_mmap:55@git/linux/kernel/perf_event.c with user_lock_limit) [root@emilia ~]# After the fix: [root@emilia ~]# probe perf_mmap:55 user_lock_limit Failed to find the location of user_lock_limit at this address. Perhaps, it has been optimized out. Failed to find 'user_lock_limit' in this function. Error: Failed to add events. (-2) [root@emilia ~]# Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0e3ea13..369ddc6 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1832,9 +1832,12 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, } /* Loop 2: add all events */ - for (i = 0; i < npevs && ret >= 0; i++) + for (i = 0; i < npevs && ret >= 0; i++) { ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs, pkgs[i].ntevs, force_add); + if (ret < 0) + break; + } end: /* Loop 3: cleanup and free trace events */ for (i = 0; i < npevs; i++) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index fe461f6..eecbdca 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1262,7 +1262,7 @@ static int probe_point_line_walker(const char *fname, int lineno, ret = call_probe_finder(NULL, pf); /* Continue if no error, because the line will be in inline function */ - return ret < 0 ?: 0; + return ret < 0 ? ret : 0; } /* Find probe point from its line number */ @@ -1484,6 +1484,8 @@ static int find_probes(int fd, struct probe_finder *pf) pf->lno = pp->line; ret = find_probe_point_by_line(pf); } + if (ret != DWARF_CB_OK) + break; } off = noff; } -- cgit v0.10.2 From e603dc15072c7fec0ae263597e6dabc3bb4c5c5b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 21 Feb 2011 16:05:50 -0300 Subject: perf evsel: Fix inverted test for fixing up attr.inherit flag The kernel refuses mmapping an event with the inherit flag set for something that is systemwide (cpu == -1), and the evsel layer got this reversed at some point, fix it. The symtom was that the --pid and --tid parameters for 'perf record' and 'perf top' returned with -EINVAL, like: # /tmp/build-perf/perf record -v -fo/tmp/perf.data -p 1042 Warning: ... trying to fall back to cpu-clock-ticks Fatal: failed to mmap with 22 (Invalid argument) Reported-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 63cadaf..8083d51 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -179,8 +179,19 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, for (cpu = 0; cpu < cpus->nr; cpu++) { int group_fd = -1; - - evsel->attr.inherit = (cpus->map[cpu] < 0) && inherit; + /* + * Don't allow mmap() of inherited per-task counters. This + * would create a performance issue due to all children writing + * to the same buffer. + * + * FIXME: + * Proper fix is not to pass 'inherit' to perf_evsel__open*, + * but a 'flags' parameter, with 'group' folded there as well, + * then introduce a PERF_O_{MMAP,GROUP,INHERIT} enum, and if + * O_MMAP is set, emit a warning if cpu < 0 and O_INHERIT is + * set. Lets go for the minimal fix first tho. + */ + evsel->attr.inherit = (cpus->map[cpu] >= 0) && inherit; for (thread = 0; thread < threads->nr; thread++) { -- cgit v0.10.2 From 8635bf6ea3402154eec64763e6ed14972013c1c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Feb 2011 06:56:18 -0300 Subject: perf probe: Remove redundant checks While fixing an error propagating problem in f809b25 I added two redundant checks. I did that because I didn't expect the checks to be on the while and for loop condition expression, where they are tested before we run the loop, where the 'ret' variable is set. So remove it from there and leave it just after it is actually set, eliminating unneded tests. Reported-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 369ddc6..5ddee66 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1832,7 +1832,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, } /* Loop 2: add all events */ - for (i = 0; i < npevs && ret >= 0; i++) { + for (i = 0; i < npevs; i++) { ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs, pkgs[i].ntevs, force_add); if (ret < 0) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index eecbdca..17f9c4a 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1462,8 +1462,7 @@ static int find_probes(int fd, struct probe_finder *pf) off = 0; line_list__init(&pf->lcache); /* Loop on CUs (Compilation Unit) */ - while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) && - ret >= 0) { + while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) { /* Get the DIE(Debugging Information Entry) of this CU */ diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die); if (!diep) @@ -1484,7 +1483,7 @@ static int find_probes(int fd, struct probe_finder *pf) pf->lno = pp->line; ret = find_probe_point_by_line(pf); } - if (ret != DWARF_CB_OK) + if (ret < 0) break; } off = noff; -- cgit v0.10.2 From c97cf42219b7b6037d2f96c27a5f114f2383f828 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Feb 2011 12:02:07 -0300 Subject: perf top: Live TUI Annotation Now one has just to press the right key, 'a' or Enter on the main 'perf top --tui' screen to live annotate the symbol under the cursor. The annotate window starts centered on the hottest line (the one with most samples so far) then TAB and shift+TAB can be used to go to the prev/next hot line. Pressing 'H' at any point will center again the screen on the hottest line. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c9fd66d..f88a263 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -92,7 +92,6 @@ static bool dump_symtab = false; static struct winsize winsize; static const char *sym_filter = NULL; -struct sym_entry *sym_filter_entry = NULL; struct sym_entry *sym_filter_entry_sched = NULL; static int sym_pcnt_filter = 5; @@ -168,18 +167,19 @@ static int parse_source(struct sym_entry *syme) pthread_mutex_lock(¬es->lock); if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) { + pthread_mutex_unlock(¬es->lock); pr_err("Not enough memory for annotating '%s' symbol!\n", sym->name); sleep(1); - goto out_unlock; + return err; } err = symbol__annotate(sym, syme->map, 0); if (err == 0) { out_assign: - sym_filter_entry = syme; + top.sym_filter_entry = syme; } -out_unlock: + pthread_mutex_unlock(¬es->lock); return err; } @@ -195,7 +195,7 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) struct annotation *notes; struct symbol *sym; - if (syme != sym_filter_entry) + if (syme != top.sym_filter_entry) return; sym = sym_entry__symbol(syme); @@ -275,8 +275,8 @@ static void print_sym_table(struct perf_session *session) session->hists.stats.total_lost); } - if (sym_filter_entry) { - show_details(sym_filter_entry); + if (top.sym_filter_entry) { + show_details(top.sym_filter_entry); return; } @@ -417,8 +417,8 @@ static void print_mapped_keys(void) { char *name = NULL; - if (sym_filter_entry) { - struct symbol *sym = sym_entry__symbol(sym_filter_entry); + if (top.sym_filter_entry) { + struct symbol *sym = sym_entry__symbol(top.sym_filter_entry); name = sym->name; } @@ -549,15 +549,15 @@ static void handle_keypress(struct perf_session *session, int c) perf_session__fprintf_dsos(session, stderr); exit(0); case 's': - prompt_symbol(&sym_filter_entry, "Enter details symbol"); + prompt_symbol(&top.sym_filter_entry, "Enter details symbol"); break; case 'S': - if (!sym_filter_entry) + if (!top.sym_filter_entry) break; else { - struct sym_entry *syme = sym_filter_entry; + struct sym_entry *syme = top.sym_filter_entry; - sym_filter_entry = NULL; + top.sym_filter_entry = NULL; __zero_source_counters(syme); } break; @@ -656,7 +656,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) syme->map = map; symbol__annotate_init(map, sym); - if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { + if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { /* schedule initial sym_filter_entry setup */ sym_filter_entry_sched = syme; sym_filter = NULL; @@ -750,13 +750,13 @@ static void perf_event__process_sample(const union perf_event *event, /* let's see, whether we need to install initial sym_filter_entry */ if (sym_filter_entry_sched) { - sym_filter_entry = sym_filter_entry_sched; + top.sym_filter_entry = sym_filter_entry_sched; sym_filter_entry_sched = NULL; - if (parse_source(sym_filter_entry) < 0) { - struct symbol *sym = sym_entry__symbol(sym_filter_entry); + if (parse_source(top.sym_filter_entry) < 0) { + struct symbol *sym = sym_entry__symbol(top.sym_filter_entry); pr_err("Can't annotate %s", sym->name); - if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { + if (top.sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { pr_err(": No vmlinux file was found in the path:\n"); machine__fprintf_vmlinux_path(machine, stderr); } else diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index e848803..c2c2868 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -90,12 +90,14 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx, #ifdef NO_NEWT_SUPPORT static inline int symbol__tui_annotate(struct symbol *sym __used, - struct map *map __used, int evidx __used) + struct map *map __used, + int evidx __used, int refresh __used) { return 0; } #else -int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx); +int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, + int refresh); #endif #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 4f769f4..e8d28e2 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -44,6 +44,7 @@ struct perf_top { pid_t target_pid, target_tid; bool hide_kernel_symbols, hide_user_symbols, zero; const char *cpu_list; + struct sym_entry *sym_filter_entry; struct perf_evsel *sym_evsel; }; diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index cfb5a27..8c17a87 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -6,6 +6,7 @@ #include "../../sort.h" #include "../../symbol.h" #include "../../annotate.h" +#include static void ui__error_window(const char *fmt, ...) { @@ -138,46 +139,108 @@ static void annotate_browser__set_top(struct annotate_browser *self, self->curr_hot = nd; } -static int annotate_browser__run(struct annotate_browser *self) +static void annotate_browser__calc_percent(struct annotate_browser *browser, + int evidx) { - struct rb_node *nd; + struct symbol *sym = browser->b.priv; + struct annotation *notes = symbol__annotation(sym); + struct objdump_line *pos; + + browser->entries = RB_ROOT; + + pthread_mutex_lock(¬es->lock); + + list_for_each_entry(pos, ¬es->src->source, node) { + struct objdump_line_rb_node *rbpos = objdump_line__rb(pos); + rbpos->percent = objdump_line__calc_percent(pos, sym, evidx); + if (rbpos->percent < 0.01) { + RB_CLEAR_NODE(&rbpos->rb_node); + continue; + } + objdump__insert_line(&browser->entries, rbpos); + } + pthread_mutex_unlock(¬es->lock); + + browser->curr_hot = rb_last(&browser->entries); +} + +static int annotate_browser__run(struct annotate_browser *self, int evidx, + int refresh) +{ + struct rb_node *nd = NULL; struct symbol *sym = self->b.priv; + /* + * RIGHT To allow builtin-annotate to cycle thru multiple symbols by + * examining the exit key for this function. + */ + int exit_keys[] = { 'H', NEWT_KEY_TAB, NEWT_KEY_UNTAB, + NEWT_KEY_RIGHT, 0 }; int key; if (ui_browser__show(&self->b, sym->name, - "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) + "<-, -> or ESC: exit, TAB/shift+TAB: " + "cycle hottest lines, H: Hottest") < 0) return -1; - /* - * To allow builtin-annotate to cycle thru multiple symbols by - * examining the exit key for this function. - */ - ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT); + + ui_browser__add_exit_keys(&self->b, exit_keys); + annotate_browser__calc_percent(self, evidx); + + if (self->curr_hot) + annotate_browser__set_top(self, self->curr_hot); nd = self->curr_hot; - if (nd) { - int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 }; - ui_browser__add_exit_keys(&self->b, tabs); - } + + if (refresh != 0) + newtFormSetTimer(self->b.form, refresh); while (1) { key = ui_browser__run(&self->b); + if (refresh != 0) { + annotate_browser__calc_percent(self, evidx); + /* + * Current line focus got out of the list of most active + * lines, NULL it so that if TAB|UNTAB is pressed, we + * move to curr_hot (current hottest line). + */ + if (nd != NULL && RB_EMPTY_NODE(nd)) + nd = NULL; + } + switch (key) { + case -1: + /* + * FIXME we need to check if it was + * es.reason == NEWT_EXIT_TIMER + */ + if (refresh != 0) + symbol__annotate_decay_histogram(sym, evidx); + continue; case NEWT_KEY_TAB: - nd = rb_prev(nd); - if (nd == NULL) - nd = rb_last(&self->entries); - annotate_browser__set_top(self, nd); + if (nd != NULL) { + nd = rb_prev(nd); + if (nd == NULL) + nd = rb_last(&self->entries); + } else + nd = self->curr_hot; break; case NEWT_KEY_UNTAB: - nd = rb_next(nd); - if (nd == NULL) - nd = rb_first(&self->entries); - annotate_browser__set_top(self, nd); + if (nd != NULL) + nd = rb_next(nd); + if (nd == NULL) + nd = rb_first(&self->entries); + else + nd = self->curr_hot; + break; + case 'H': + nd = self->curr_hot; break; default: goto out; } + + if (nd != NULL) + annotate_browser__set_top(self, nd); } out: ui_browser__hide(&self->b); @@ -186,13 +249,13 @@ out: int hist_entry__tui_annotate(struct hist_entry *he, int evidx) { - return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx); + return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, 0); } -int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx) +int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx, + int refresh) { struct objdump_line *pos, *n; - struct objdump_line_rb_node *rbpos; struct annotation *notes = symbol__annotation(sym); struct annotate_browser browser = { .b = { @@ -211,7 +274,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx) if (map->dso->annotate_warned) return -1; - if (symbol__annotate(sym, map, sizeof(*rbpos)) < 0) { + if (symbol__annotate(sym, map, sizeof(struct objdump_line_rb_node)) < 0) { ui__error_window(ui_helpline__last_msg); return -1; } @@ -219,26 +282,17 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx) ui_helpline__push("Press <- or ESC to exit"); list_for_each_entry(pos, ¬es->src->source, node) { + struct objdump_line_rb_node *rbpos; size_t line_len = strlen(pos->line); + if (browser.b.width < line_len) browser.b.width = line_len; rbpos = objdump_line__rb(pos); rbpos->idx = browser.b.nr_entries++; - rbpos->percent = objdump_line__calc_percent(pos, sym, evidx); - if (rbpos->percent < 0.01) - continue; - objdump__insert_line(&browser.entries, rbpos); } - /* - * Position the browser at the hottest line. - */ - browser.curr_hot = rb_last(&browser.entries); - if (browser.curr_hot) - annotate_browser__set_top(&browser, browser.curr_hot); - browser.b.width += 18; /* Percentage */ - ret = annotate_browser__run(&browser); + ret = annotate_browser__run(&browser, evidx, refresh); list_for_each_entry_safe(pos, n, ¬es->src->source, node) { list_del(&pos->node); objdump_line__free(pos); diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c index ca60624..377ff58 100644 --- a/tools/perf/util/ui/browsers/top.c +++ b/tools/perf/util/ui/browsers/top.c @@ -7,6 +7,7 @@ * Released under the GPL v2. (and only v2, not any later version) */ #include "../browser.h" +#include "../../annotate.h" #include "../helpline.h" #include "../libslang.h" #include "../../evlist.h" @@ -18,6 +19,7 @@ struct perf_top_browser { struct ui_browser b; struct rb_root root; + struct sym_entry *selection; float sum_ksamples; int dso_width; int dso_short_width; @@ -60,6 +62,9 @@ static void perf_top_browser__write(struct ui_browser *browser, void *entry, int slsmg_write_nstring(width >= syme->map->dso->long_name_len ? syme->map->dso->long_name : syme->map->dso->short_name, width); + + if (current_entry) + top_browser->selection = syme; } static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser) @@ -80,21 +85,52 @@ static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser) browser->b.nr_entries = top->rb_entries; } +static void perf_top_browser__annotate(struct perf_top_browser *browser) +{ + struct sym_entry *syme = browser->selection; + struct symbol *sym = sym_entry__symbol(syme); + struct annotation *notes = symbol__annotation(sym); + struct perf_top *top = browser->b.priv; + + if (notes->src != NULL) + goto do_annotation; + + pthread_mutex_lock(¬es->lock); + + top->sym_filter_entry = NULL; + + if (symbol__alloc_hist(sym, top->evlist->nr_entries) < 0) { + pr_err("Not enough memory for annotating '%s' symbol!\n", + sym->name); + pthread_mutex_unlock(¬es->lock); + return; + } + + top->sym_filter_entry = syme; + + pthread_mutex_unlock(¬es->lock); +do_annotation: + symbol__tui_annotate(sym, syme->map, 0, top->delay_secs * 1000); +} + static int perf_top_browser__run(struct perf_top_browser *browser) { int key; char title[160]; struct perf_top *top = browser->b.priv; int delay_msecs = top->delay_secs * 1000; + int exit_keys[] = { 'a', NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, }; perf_top_browser__update_rb_tree(browser); perf_top__header_snprintf(top, title, sizeof(title)); perf_top__reset_sample_counters(top); - if (ui_browser__show(&browser->b, title, "ESC: exit") < 0) + if (ui_browser__show(&browser->b, title, + "ESC: exit, ENTER|->|a: Live Annotate") < 0) return -1; newtFormSetTimer(browser->b.form, delay_msecs); + ui_browser__add_exit_keys(&browser->b, exit_keys); while (1) { key = ui_browser__run(&browser->b); @@ -109,7 +145,12 @@ static int perf_top_browser__run(struct perf_top_browser *browser) SLsmg_gotorc(0, 0); slsmg_write_nstring(title, browser->b.width); break; - case NEWT_KEY_TAB: + case 'a': + case NEWT_KEY_RIGHT: + case NEWT_KEY_ENTER: + if (browser->selection) + perf_top_browser__annotate(browser); + break; default: goto out; } -- cgit v0.10.2 From 6435a5e39d3e01a1a73a925ed53ee18619b0a368 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 23 Feb 2011 07:25:02 -0300 Subject: perf top browser: Adjust the browser indexes when refreshing This is not a problem when we're not at the bottom of the active symbols list, so was not noticed, but at the end of the screen it falls apart. Fix it by adjusting the ui_browser indexes according to the new number of entries in the rb_tree and by seeking from the start of the rb_tree to find the new symbol at the top of the screen. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c index 377ff58..2f47224 100644 --- a/tools/perf/util/ui/browsers/top.c +++ b/tools/perf/util/ui/browsers/top.c @@ -70,6 +70,7 @@ static void perf_top_browser__write(struct ui_browser *browser, void *entry, int static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser) { struct perf_top *top = browser->b.priv; + u64 top_idx = browser->b.top_idx; browser->root = RB_ROOT; browser->b.top = NULL; @@ -82,7 +83,29 @@ static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser) if (browser->sym_width + browser->dso_width > browser->b.width - 29) browser->sym_width = browser->b.width - browser->dso_width - 29; } + + /* + * Adjust the ui_browser indexes since the entries in the browser->root + * rb_tree may have changed, then seek it from start, so that we get a + * possible new top of the screen. + */ browser->b.nr_entries = top->rb_entries; + + if (top_idx >= browser->b.nr_entries) { + if (browser->b.height >= browser->b.nr_entries) + top_idx = browser->b.nr_entries - browser->b.height; + else + top_idx = 0; + } + + if (browser->b.index >= top_idx + browser->b.height) + browser->b.index = top_idx + browser->b.index - browser->b.top_idx; + + if (browser->b.index >= browser->b.nr_entries) + browser->b.index = browser->b.nr_entries - 1; + + browser->b.top_idx = top_idx; + browser->b.seek(&browser->b, top_idx, SEEK_SET); } static void perf_top_browser__annotate(struct perf_top_browser *browser) -- cgit v0.10.2 From 9826e8329bc160e4cc58b83019f3f056965e42d0 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Tue, 22 Feb 2011 21:53:12 +0100 Subject: perf lock: Document valid sort keys Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20110222205312.GA18474@joi.lan> Signed-off-by: Marcin Slusarz Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 921de25..4a26a2f 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -24,8 +24,8 @@ and statistics with this 'perf lock' command. 'perf lock report' reports statistical data. -OPTIONS -------- +COMMON OPTIONS +-------------- -i:: --input=:: @@ -39,6 +39,14 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. +REPORT OPTIONS +-------------- + +-k:: +--key=:: + Sorting key. Possible values: acquired (default), contended, + wait_total, wait_max, wait_min. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index e00d938..2e93f99 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -893,7 +893,7 @@ static const char * const report_usage[] = { static const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", - "key for sorting"), + "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"), /* TODO: type */ OPT_END() }; -- cgit v0.10.2 From 3f7cce3c18188a067d463749168bdda5abc5b0f7 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 18 Feb 2011 14:40:01 +0200 Subject: perf_events: Fix rcu and locking issues with cgroup support This patches ensures that we do not end up calling perf_cgroup_from_task() when there is no cgroup event. This avoids potential RCU and locking issues. The change in perf_cgroup_set_timestamp() ensures we check against ctx->nr_cgroups. It also avoids calling perf_clock() tiwce in a row. It also ensures we do need to grab ctx->lock before calling the function. We drop update_cgrp_time() from task_clock_event_read() because it is not needed. This also avoids having to deal with perf_cgroup_from_task(). Thanks to Peter Zijlstra for his help on this. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <4d5e76b8.815bdf0a.7ac3.774f@mx.google.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index a0a6987..dadeaea 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -201,6 +201,11 @@ __get_cpu_context(struct perf_event_context *ctx) #ifdef CONFIG_CGROUP_PERF +/* + * Must ensure cgroup is pinned (css_get) before calling + * this function. In other words, we cannot call this function + * if there is no cgroup event for the current CPU context. + */ static inline struct perf_cgroup * perf_cgroup_from_task(struct task_struct *task) { @@ -268,28 +273,41 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) static inline void update_cgrp_time_from_event(struct perf_event *event) { - struct perf_cgroup *cgrp = perf_cgroup_from_task(current); + struct perf_cgroup *cgrp; + /* - * do not update time when cgroup is not active + * ensure we access cgroup data only when needed and + * when we know the cgroup is pinned (css_get) */ - if (!event->cgrp || cgrp != event->cgrp) + if (!is_cgroup_event(event)) return; - __update_cgrp_time(event->cgrp); + cgrp = perf_cgroup_from_task(current); + /* + * Do not update time when cgroup is not active + */ + if (cgrp == event->cgrp) + __update_cgrp_time(event->cgrp); } static inline void -perf_cgroup_set_timestamp(struct task_struct *task, u64 now) +perf_cgroup_set_timestamp(struct task_struct *task, + struct perf_event_context *ctx) { struct perf_cgroup *cgrp; struct perf_cgroup_info *info; - if (!task) + /* + * ctx->lock held by caller + * ensure we do not access cgroup data + * unless we have the cgroup pinned (css_get) + */ + if (!task || !ctx->nr_cgroups) return; cgrp = perf_cgroup_from_task(task); info = this_cpu_ptr(cgrp->info); - info->timestamp = now; + info->timestamp = ctx->timestamp; } #define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */ @@ -494,7 +512,8 @@ static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event, } static inline void -perf_cgroup_set_timestamp(struct task_struct *task, u64 now) +perf_cgroup_set_timestamp(struct task_struct *task, + struct perf_event_context *ctx) { } @@ -1613,7 +1632,7 @@ static int __perf_event_enable(void *info) /* * set current task's cgroup time reference point */ - perf_cgroup_set_timestamp(current, perf_clock()); + perf_cgroup_set_timestamp(current, ctx); __perf_event_mark_enabled(event, ctx); @@ -2048,7 +2067,7 @@ ctx_sched_in(struct perf_event_context *ctx, now = perf_clock(); ctx->timestamp = now; - perf_cgroup_set_timestamp(task, now); + perf_cgroup_set_timestamp(task, ctx); /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. @@ -5795,7 +5814,6 @@ static void task_clock_event_read(struct perf_event *event) if (!in_nmi()) { update_context_time(event->ctx); - update_cgrp_time_from_event(event); time = event->ctx->time; } else { u64 now = perf_clock(); -- cgit v0.10.2 From 768a06e2ca49cdf72389208cfc056a36cf8bc5e3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Feb 2011 16:52:24 +0100 Subject: perf: Simplify task_clock_event_read() There is no point in us having different code paths for nmi and !nmi here, so remove the !nmi one. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index dadeaea..64a018e 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5810,16 +5810,9 @@ static void task_clock_event_del(struct perf_event *event, int flags) static void task_clock_event_read(struct perf_event *event) { - u64 time; - - if (!in_nmi()) { - update_context_time(event->ctx); - time = event->ctx->time; - } else { - u64 now = perf_clock(); - u64 delta = now - event->ctx->timestamp; - time = event->ctx->time + delta; - } + u64 now = perf_clock(); + u64 delta = now - event->ctx->timestamp; + u64 time = event->ctx->time + delta; task_clock_event_update(event, time); } -- cgit v0.10.2 From 170ae6bc24e1d7f9bd921a484ec9ea2825497970 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 23 Feb 2011 11:08:59 -0300 Subject: perf annotate: Show better message when no vmlinux is found In both --tui and --stdio, in 'annotate', 'top', 'report' when trying to annotate a kernel symbol having just access to a kallsyms file, that doesn't have the DWARF info needed for annotation. Suggested-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 70ec422..0d0830c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -295,12 +295,23 @@ fallback: } if (dso->origin == DSO__ORIG_KERNEL) { + char bf[BUILD_ID_SIZE * 2 + 16] = " with build id "; + char *build_id_msg = NULL; + if (dso->annotate_warned) goto out_free_filename; + + if (dso->has_build_id) { + build_id__sprintf(dso->build_id, + sizeof(dso->build_id), bf + 15); + build_id_msg = bf; + } err = -ENOENT; dso->annotate_warned = 1; - pr_err("Can't annotate %s: No vmlinux file was found in the " - "path\n", sym->name); + pr_err("Can't annotate %s: No vmlinux file%s was found in the " + "path.\nPlease use 'perf buildid-cache -av vmlinux' or " + "--vmlinux vmlinux.\n", + sym->name, build_id_msg ?: ""); goto out_free_filename; } diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 294b495..497b3c4 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -924,14 +924,6 @@ int hists__browse(struct hists *self, const char *helpline, if (choice == annotate) { struct hist_entry *he; do_annotate: - if (browser->selection->map->dso->origin == DSO__ORIG_KERNEL) { - browser->selection->map->dso->annotate_warned = 1; - ui_helpline__puts("No vmlinux file found, can't " - "annotate with just a " - "kallsyms file"); - continue; - } - he = hist_browser__selected_entry(browser); if (he == NULL) continue; -- cgit v0.10.2 From c16bfe9ac389b13a37ff617a09682ecc0685960f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 25 Feb 2011 09:30:29 -0300 Subject: perf top browser: Fix up exit keys The left key was exiting 'perf top --tui' when it really shouldn't, it was too easy to leave the live annotation window and then press one too many <- and get out of the tool altogether. Do just like the report TUI does, ignore the left key for exit and also ask the user when pressing ESC if that is really what is wanted. Reported-by: Mike Galbraith Suggested-by: Ingo Molnar Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c index 2f47224..e9381ec 100644 --- a/tools/perf/util/ui/browsers/top.c +++ b/tools/perf/util/ui/browsers/top.c @@ -10,6 +10,7 @@ #include "../../annotate.h" #include "../helpline.h" #include "../libslang.h" +#include "../util.h" #include "../../evlist.h" #include "../../hist.h" #include "../../sort.h" @@ -174,6 +175,12 @@ static int perf_top_browser__run(struct perf_top_browser *browser) if (browser->selection) perf_top_browser__annotate(browser); break; + case NEWT_KEY_LEFT: + continue; + case NEWT_KEY_ESCAPE: + if (!ui__dialog_yesno("Do you really want to exit?")) + continue; + /* Fall thru */ default: goto out; } -- cgit v0.10.2 From b210b3bb1b002f27165325a5edb6ebce3c168e92 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 25 Feb 2011 11:33:31 -0300 Subject: perf ui browser: Introduce ui_browser__show_title Needed because we were only showing the title in ui_browser__show, not in ui_browser__run, and in the run loop we may be calling other browsers that would then change the title, when we go back to the previous browser, we need to redraw the title. We could have done this as the Newt help line, with pop, etc, but I don't think its worth, doing it explicitely, when needed (some browsers may not use the title area at all) seems enough/more flexible. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c index 60d6c81..611219f 100644 --- a/tools/perf/util/ui/browser.c +++ b/tools/perf/util/ui/browser.c @@ -157,6 +157,20 @@ void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]) } } +void __ui_browser__show_title(struct ui_browser *browser, const char *title) +{ + SLsmg_gotorc(0, 0); + ui_browser__set_color(browser, NEWT_COLORSET_ROOT); + slsmg_write_nstring(title, browser->width); +} + +void ui_browser__show_title(struct ui_browser *browser, const char *title) +{ + pthread_mutex_lock(&ui__lock); + __ui_browser__show_title(browser, title); + pthread_mutex_unlock(&ui__lock); +} + int ui_browser__show(struct ui_browser *self, const char *title, const char *helpline, ...) { @@ -180,9 +194,7 @@ int ui_browser__show(struct ui_browser *self, const char *title, return -1; pthread_mutex_lock(&ui__lock); - SLsmg_gotorc(0, 0); - ui_browser__set_color(self, NEWT_COLORSET_ROOT); - slsmg_write_nstring(title, self->width); + __ui_browser__show_title(self, title); ui_browser__add_exit_keys(self, keys); newtFormAddComponent(self->form, self->sb); diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h index 0dc7e4d..fc63dda 100644 --- a/tools/perf/util/ui/browser.h +++ b/tools/perf/util/ui/browser.h @@ -24,7 +24,6 @@ struct ui_browser { u32 nr_entries; }; - void ui_browser__set_color(struct ui_browser *self, int color); void ui_browser__set_percent_color(struct ui_browser *self, double percent, bool current); @@ -35,6 +34,8 @@ void ui_browser__reset_index(struct ui_browser *self); void ui_browser__gotorc(struct ui_browser *self, int y, int x); void ui_browser__add_exit_key(struct ui_browser *self, int key); void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]); +void __ui_browser__show_title(struct ui_browser *browser, const char *title); +void ui_browser__show_title(struct ui_browser *browser, const char *title); int ui_browser__show(struct ui_browser *self, const char *title, const char *helpline, ...); void ui_browser__hide(struct ui_browser *self); -- cgit v0.10.2 From 3166fc8fb6a2f52273d545e970297524e02c3e39 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Mar 2011 10:21:44 -0300 Subject: perf top browser: Handle empty active symbols list Fixing a SEGV. An empty list could happen when not being able to resolve symbols, for instance when --vmlinux invalid-file is used. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c index e9381ec..5a06538 100644 --- a/tools/perf/util/ui/browsers/top.c +++ b/tools/perf/util/ui/browsers/top.c @@ -76,6 +76,12 @@ static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser) browser->root = RB_ROOT; browser->b.top = NULL; browser->sum_ksamples = perf_top__decay_samples(top, &browser->root); + /* + * No active symbols + */ + if (top->rb_entries == 0) + return; + perf_top__find_widths(top, &browser->root, &browser->dso_width, &browser->dso_short_width, &browser->sym_width); -- cgit v0.10.2 From a1ceb741cf86ef433006379742db81c00b450bae Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Mar 2011 10:24:43 -0300 Subject: perf tui: Make ui__warning modal By taking the ui__lock so that no other screen updates take place while waiting for the user. That was happening when handling an invalid --vmlinux parameter in 'perf top --tui', with the screen refresh routine repainting the screen and removing the warning window. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c index 7b5a892..fdf1fc8 100644 --- a/tools/perf/util/ui/util.c +++ b/tools/perf/util/ui/util.c @@ -9,6 +9,7 @@ #include "../debug.h" #include "browser.h" #include "helpline.h" +#include "ui.h" #include "util.h" static void newt_form__set_exit_keys(newtComponent self) @@ -118,10 +119,12 @@ void ui__warning(const char *format, ...) va_list args; va_start(args, format); - if (use_browser > 0) + if (use_browser > 0) { + pthread_mutex_lock(&ui__lock); newtWinMessagev((char *)warning_str, (char *)ok, (char *)format, args); - else + pthread_mutex_unlock(&ui__lock); + } else vfprintf(stderr, format, args); va_end(args); } -- cgit v0.10.2 From 374cfe56892701f062586d6a6de6cb71777a4184 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Mar 2011 10:27:27 -0300 Subject: perf top: Fix reporting of invalid --vmlinux Using ui__warning, that will, in --tui, show a window with the message, waiting for the user to press Ok. Also run exit_browser() to let newt do its final cleaning of the screen. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f88a263..0b07cc3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -740,8 +740,9 @@ static void perf_event__process_sample(const union perf_event *event, */ if (al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { - pr_err("The %s file can't be used\n", - symbol_conf.vmlinux_name); + ui__warning("The %s file can't be used\n", + symbol_conf.vmlinux_name); + exit_browser(0); exit(1); } -- cgit v0.10.2 From 5807806a92450fd57f8063868efae9d4af74db02 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 1 Mar 2011 10:43:03 -0300 Subject: perf top tui: Wait till the first sample to refresh the screen. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0b07cc3..417f757 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -72,6 +72,7 @@ static struct perf_top top = { .target_tid = -1, .active_symbols = LIST_HEAD_INIT(top.active_symbols), .active_symbols_lock = PTHREAD_MUTEX_INITIALIZER, + .active_symbols_cond = PTHREAD_COND_INITIALIZER, .freq = 1000, /* 1 KHz */ }; @@ -577,7 +578,17 @@ static void handle_keypress(struct perf_session *session, int c) static void *display_thread_tui(void *arg __used) { - perf_top__tui_browser(&top); + int err = 0; + pthread_mutex_lock(&top.active_symbols_lock); + while (list_empty(&top.active_symbols)) { + err = pthread_cond_wait(&top.active_symbols_cond, + &top.active_symbols_lock); + if (err) + break; + } + pthread_mutex_unlock(&top.active_symbols_lock); + if (!err) + perf_top__tui_browser(&top); exit_browser(0); exit(0); return NULL; @@ -776,8 +787,14 @@ static void perf_event__process_sample(const union perf_event *event, syme->count[evsel->idx]++; record_precise_ip(syme, evsel->idx, ip); pthread_mutex_lock(&top.active_symbols_lock); - if (list_empty(&syme->node) || !syme->node.next) + if (list_empty(&syme->node) || !syme->node.next) { + static bool first = true; __list_insert_active_sym(syme); + if (first) { + pthread_cond_broadcast(&top.active_symbols_cond); + first = false; + } + } pthread_mutex_unlock(&top.active_symbols_lock); } } diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index e8d28e2..96d1cb7 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -35,6 +35,7 @@ struct perf_top { */ struct list_head active_symbols; pthread_mutex_t active_symbols_lock; + pthread_cond_t active_symbols_cond; u64 samples; u64 kernel_samples, us_samples; u64 exact_samples; -- cgit v0.10.2 From b06b3d49699a52e8f9ca056c4f96e81b1987d78e Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 2 Mar 2011 21:27:04 +0800 Subject: perf, x86: Add Intel SandyBridge CPU support This patch adds basic SandyBridge support, including hardware cache events and PEBS events support. It has been tested on SandyBridge CPUs with perf stat and also with PEBS based profiling - both work fine. The patch does not affect other models. v2 -> v3: - fix PEBS event 0xd0 with right umask combinations - move snb pebs constraint assignment to intel_pmu_init v1 -> v2: - add more raw and PEBS events constraints - use offcore events for LLC-* cache events - remove the call to Nehalem workaround enable_all function Signed-off-by: Lin Ming Acked-by: Peter Zijlstra Cc: Stephane Eranian Cc: Andi Kleen LKML-Reference: <1299072424.2175.24.camel@localhost> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d977a2..390fa6d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -166,8 +166,10 @@ struct cpu_hw_events { /* * Constraint on the Event code + UMask */ -#define PEBS_EVENT_CONSTRAINT(c, n) \ +#define INTEL_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define PEBS_EVENT_CONSTRAINT(c, n) \ + INTEL_UEVENT_CONSTRAINT(c, n) #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 008835c..d00f386 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -76,6 +76,19 @@ static struct event_constraint intel_westmere_event_constraints[] = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_snb_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ + INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_gen_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -89,6 +102,106 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } +static __initconst const u64 snb_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ + [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + /* + * TBD: Need Off-core Response Performance Monitoring support + */ + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + [ C(OP_WRITE) ] = { + /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + [ C(OP_PREFETCH) ] = { + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + static __initconst const u64 westmere_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -1062,6 +1175,17 @@ static __init int intel_pmu_init(void) pr_cont("Westmere events, "); break; + case 42: /* SandyBridge */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + intel_pmu_lbr_init_nhm(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_snb_pebs_events; + pr_cont("SandyBridge events, "); + break; + default: /* * default constraints for v2 and up diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b7dcd9f..8251998 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -388,6 +388,44 @@ static struct event_constraint intel_nehalem_pebs_events[] = { EVENT_CONSTRAINT_END }; +static struct event_constraint intel_snb_pebs_events[] = { + PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */ + PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */ + PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */ + PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */ + PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */ + PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ + PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ + PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */ + PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */ + PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ + PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ + PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ + PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ + PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ + PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ + PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ + PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ + PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ + PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */ + PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ + PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ + PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ + PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */ + PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */ + PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ + EVENT_CONSTRAINT_END +}; + static struct event_constraint * intel_pebs_constraints(struct perf_event *event) { -- cgit v0.10.2 From 0a10247914a5cad3caf7ef8a255c54c4d3ed2062 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 26 Feb 2011 04:51:54 +0100 Subject: perf: Set filters before mmaping events We currently set the filters after we mmap the events, this is a race that let undesired events record themselves in the buffer before we had the time to set the filters. So set the filters before they can be recorded. That also librarizes the filters setting so that filtering can be done more easily from other tools than perf record later. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index db4cd1e..d40a81e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -180,12 +180,10 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n static void create_counter(struct perf_evsel *evsel, int cpu) { - char *filter = evsel->filter; struct perf_event_attr *attr = &evsel->attr; struct perf_header_attr *h_attr; struct perf_sample_id *sid; int thread_index; - int ret; for (thread_index = 0; thread_index < evsel_list->threads->nr; thread_index++) { h_attr = get_header_attr(attr, evsel->idx); @@ -204,16 +202,6 @@ static void create_counter(struct perf_evsel *evsel, int cpu) pr_warning("Not enough memory to add id\n"); exit(-1); } - - if (filter != NULL) { - ret = ioctl(FD(evsel, cpu, thread_index), - PERF_EVENT_IOC_SET_FILTER, filter); - if (ret) { - error("failed to set filter with %d (%s)\n", errno, - strerror(errno)); - exit(-1); - } - } } if (!sample_type) @@ -367,6 +355,12 @@ try_again: } } + if (perf_evlist__set_filters(evlist)) { + error("failed to set filter with %d (%s)\n", errno, + strerror(errno)); + exit(-1); + } + if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) die("failed to mmap with %d (%s)\n", errno, strerror(errno)); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 95b21fe..030ae7f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -348,3 +348,31 @@ void perf_evlist__delete_maps(struct perf_evlist *evlist) evlist->cpus = NULL; evlist->threads = NULL; } + +int perf_evlist__set_filters(struct perf_evlist *evlist) +{ + const struct thread_map *threads = evlist->threads; + const struct cpu_map *cpus = evlist->cpus; + struct perf_evsel *evsel; + char *filter; + int thread; + int cpu; + int err; + int fd; + + list_for_each_entry(evsel, &evlist->entries, node) { + filter = evsel->filter; + if (!filter) + continue; + for (cpu = 0; cpu < cpus->nr; cpu++) { + for (thread = 0; thread < threads->nr; thread++) { + fd = FD(evsel, cpu, thread); + err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter); + if (err) + return err; + } + } + } + + return 0; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c988405..b75805a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -60,5 +60,6 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, pid_t target_tid, const char *cpu_list); void perf_evlist__delete_maps(struct perf_evlist *evlist); +int perf_evlist__set_filters(struct perf_evlist *evlist); #endif /* __PERF_EVLIST_H */ -- cgit v0.10.2 From ff9ae1babd8ce88c3f90db6278ea5f55bdcb4624 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 25 Feb 2011 21:57:04 +0100 Subject: perf: Fix missing strndup declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit is included first without _GNU_SOURCE, so it ends up including without declaring strndup(). And further declarations, even with _GNU_SOURCE defined, are of course without effect. Therefore: util/strfilter.c: Dans la fonction «strfilter_node__new» : util/strfilter.c:134: attention : déclaration implicite de la fonction « «strndup» » util/strfilter.c:134: attention : incompatible implicit declaration of built-in function «strndup» make: *** [util/strfilter.o] Erreur 1 Just don't include ctype.h as it doesn't appear to be necessary anyway. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index 4064b7d..834c8eb 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -1,4 +1,3 @@ -#include #include "util.h" #include "string.h" #include "strfilter.h" -- cgit v0.10.2 From cfff2d909cbdaf8c467bd321aa0502a548ec8f7e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 25 Feb 2011 21:30:16 +0100 Subject: perf: Fix undefined PyVarObject_HEAD_INIT in python 2.5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PyVarObject_HEAD_INIT is undefined in python 2.5, resulting in a build crash: util/python.c:81: attention : déclaration implicite de la fonction « «PyVarObject_HEAD_INIT» » util/python.c:82: erreur: request for member «tp_name» in something not a structure or union util/python.c:117: erreur: request for member «tp_name» in something not a structure or union util/python.c:146: erreur: request for member «tp_name» in something not a structure or union util/python.c:177: erreur: request for member «tp_name» in something not a structure or union util/python.c:290: erreur: request for member «tp_name» in something not a structure or union util/python.c:359: erreur: request for member «tp_name» in something not a structure or union util/python.c:532: erreur: request for member «tp_name» in something not a structure or union util/python.c:761: erreur: request for member «tp_name» in something not a structure or union error: command 'gcc' failed with exit status 1 make: *** [python/perf.so] Erreur 1 We can fix that by defining PyVarObject_HEAD_INIT as a wrapper on PyObject_HEAD_INIT, thanks to a trick found on biopython: https://github.com/biopython/biopython/commit/d4eaf57946c7b4c32eca8d18821edf32f83e300d Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 5317ef2..a9f2d7e 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -8,6 +8,11 @@ #include "cpumap.h" #include "thread_map.h" +/* Define PyVarObject_HEAD_INIT for python 2.5 */ +#ifndef PyVarObject_HEAD_INIT +# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, +#endif + struct throttle_event { struct perf_event_header header; u64 time; -- cgit v0.10.2 From 940c5b2971de443df22eed0441bc74fb0116e9f5 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sun, 27 Feb 2011 21:13:31 +0800 Subject: perf: Fix the missing event initialization when pmu is found in idr Currently, the event is not initialized if pmu is found in idr. This never causes bug just because now no pmu is associated with the idr id. Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra LKML-Reference: <1298812411.2699.9.camel@localhost> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 64a018e..821ce82 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -6098,17 +6098,22 @@ struct pmu *perf_init_event(struct perf_event *event) { struct pmu *pmu = NULL; int idx; + int ret; idx = srcu_read_lock(&pmus_srcu); rcu_read_lock(); pmu = idr_find(&pmu_idr, event->attr.type); rcu_read_unlock(); - if (pmu) + if (pmu) { + ret = pmu->event_init(event); + if (ret) + pmu = ERR_PTR(ret); goto unlock; + } list_for_each_entry_rcu(pmu, &pmus, entry) { - int ret = pmu->event_init(event); + ret = pmu->event_init(event); if (!ret) goto unlock; -- cgit v0.10.2 From 3db272c0494900fcb905a201180a78cae3addd6e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 3 Mar 2011 14:25:37 +0800 Subject: perf cgroup: Fix leak of file reference count In perf_cgroup_connect(), fput_light() is missing in a failure path. Signed-off-by: Li Zefan Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <4D6F3461.6060406@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 821ce82..7c999e8 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -404,8 +404,10 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, return -EBADF; css = cgroup_css_from_dir(file, perf_subsys_id); - if (IS_ERR(css)) - return PTR_ERR(css); + if (IS_ERR(css)) { + ret = PTR_ERR(css); + goto out; + } cgrp = container_of(css, struct perf_cgroup, css); event->cgrp = cgrp; @@ -422,6 +424,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, /* must be done before we fput() the file */ perf_get_cgroup(event); } +out: fput_light(file, fput_needed); return ret; } -- cgit v0.10.2 From f75e18cb9627b1d3d752b83a0b5563da0042c50a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 3 Mar 2011 14:25:50 +0800 Subject: perf cgroup: Fix unmatched call to perf_detach_cgroup() In the failure path, we call perf_detach_cgroup(), but we didn't call perf_get_cgroup() prio to it. Signed-off-by: Li Zefan Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <4D6F346E.9070606@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 7c999e8..b002095 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -412,6 +412,9 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, cgrp = container_of(css, struct perf_cgroup, css); event->cgrp = cgrp; + /* must be done before we fput() the file */ + perf_get_cgroup(event); + /* * all events in a group must monitor * the same cgroup because a task belongs @@ -420,9 +423,6 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, if (group_leader && group_leader->cgrp != cgrp) { perf_detach_cgroup(event); ret = -EINVAL; - } else { - /* must be done before we fput() the file */ - perf_get_cgroup(event); } out: fput_light(file, fput_needed); -- cgit v0.10.2 From 1b15d0558e82df9b3659804ceb44187b98eda354 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 3 Mar 2011 14:26:06 +0800 Subject: perf cgroup: Clean up perf_cgroup_create() - Use kzalloc() to replace kmalloc() + memset(). - Remove redundant initialization, since alloc_percpu() returns zero-filled percpu memory. Signed-off-by: Li Zefan Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <4D6F347E.2010806@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index b002095..193b190 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -7346,26 +7346,17 @@ static struct cgroup_subsys_state *perf_cgroup_create( struct cgroup_subsys *ss, struct cgroup *cont) { struct perf_cgroup *jc; - struct perf_cgroup_info *t; - int c; - jc = kmalloc(sizeof(*jc), GFP_KERNEL); + jc = kzalloc(sizeof(*jc), GFP_KERNEL); if (!jc) return ERR_PTR(-ENOMEM); - memset(jc, 0, sizeof(*jc)); - jc->info = alloc_percpu(struct perf_cgroup_info); if (!jc->info) { kfree(jc); return ERR_PTR(-ENOMEM); } - for_each_possible_cpu(c) { - t = per_cpu_ptr(jc->info, c); - t->time = 0; - t->timestamp = 0; - } return &jc->css; } -- cgit v0.10.2 From 2d0f25201ee210a0666ec9c41538ba05a07f8bc6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 3 Mar 2011 14:26:20 +0800 Subject: perf cgroup: Fix a typo in kernel config s/specificied/specified Signed-off-by: Li Zefan Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <4D6F348C.2050804@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/init/Kconfig b/init/Kconfig index 20d6bd9..4c4edf2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -688,7 +688,7 @@ config CGROUP_PERF depends on PERF_EVENTS && CGROUPS help This option extends the per-cpu mode to restrict monitoring to - threads which belong to the cgroup specificied and run on the + threads which belong to the cgroup specified and run on the designated cpu. Say N if unsure. -- cgit v0.10.2 From 08309379b7083a9ceec0f9bb96a629058fb623c4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Mar 2011 11:31:20 +0100 Subject: perf: Fix cgroup vs jump_label problem Li Zefan reported that the jump label code sleeps and we're calling it under a spinlock, *fail* ;-) Reported-by: Li Zefan Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 193b190..ed253aa 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -820,16 +820,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) list_add_tail(&event->group_entry, list); } - if (is_cgroup_event(event)) { + if (is_cgroup_event(event)) ctx->nr_cgroups++; - /* - * one more event: - * - that has cgroup constraint on event->cpu - * - that may need work on context switch - */ - atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_inc(&perf_sched_events); - } list_add_rcu(&event->event_entry, &ctx->event_list); if (!ctx->nr_events) @@ -957,11 +949,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) event->attach_state &= ~PERF_ATTACH_CONTEXT; - if (is_cgroup_event(event)) { + if (is_cgroup_event(event)) ctx->nr_cgroups--; - atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_dec(&perf_sched_events); - } ctx->nr_events--; if (event->attr.inherit_stat) @@ -2903,6 +2892,10 @@ static void free_event(struct perf_event *event) atomic_dec(&nr_task_events); if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) put_callchain_buffers(); + if (is_cgroup_event(event)) { + atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); + jump_label_dec(&perf_sched_events); + } } if (event->buffer) { @@ -6478,6 +6471,13 @@ SYSCALL_DEFINE5(perf_event_open, err = perf_cgroup_connect(pid, event, &attr, group_leader); if (err) goto err_alloc; + /* + * one more event: + * - that has cgroup constraint on event->cpu + * - that may need work on context switch + */ + atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); + jump_label_inc(&perf_sched_events); } /* -- cgit v0.10.2 From 17e3162972cbb9796035fff1e2fd30669b0eef65 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 2 Mar 2011 17:05:01 +0200 Subject: perf_events: Update PEBS event constraints This patch updates PEBS event constraints for Intel Atom, Nehalem, Westmere. This patch also reorganizes the PEBS format/constraint detection code. It is now based on processor model and not PEBS format. Two processors may use the same PEBS format without have the same list of PEBS events. In this second version, we simplified the initialization of the PEBS constraints by leveraging the existing switch() statement in perf_event_intel.c. We also renamed the constraint tables to be more consistent with regular constraints. In this 3rd version, we drop BR_INST_RETIRED.MISPRED from Intel Atom as it does not seem to work. Use MISPREDICTED_BRANCH_RETIRED instead. Also add FP_ASSIST.* o both Intel Nehalem and Westmere. I misssed those in the earlier patches. Events were tested using libpfm4 perf_examples. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <4d6e6b02.815bdf0a.637b.07a7@mx.google.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ba8aad1..c3ce053 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1137,6 +1137,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_core(); x86_pmu.event_constraints = intel_core2_event_constraints; + x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; pr_cont("Core2 events, "); break; @@ -1149,6 +1150,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_nehalem_event_constraints; + x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; pr_cont("Nehalem events, "); break; @@ -1160,6 +1162,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_atom(); x86_pmu.event_constraints = intel_gen_event_constraints; + x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; pr_cont("Atom events, "); break; @@ -1172,6 +1175,7 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_westmere_event_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; + x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; pr_cont("Westmere events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 8251998..b95c66a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -361,30 +361,50 @@ static int intel_pmu_drain_bts_buffer(void) /* * PEBS */ - -static struct event_constraint intel_core_pebs_events[] = { - PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ +static struct event_constraint intel_core2_pebs_event_constraints[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ - PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ - PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ - PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ EVENT_CONSTRAINT_END }; -static struct event_constraint intel_nehalem_pebs_events[] = { - PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ - PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ - PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ - PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ - PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ +static struct event_constraint intel_atom_pebs_event_constraints[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_nehalem_pebs_event_constraints[] = { + INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ + INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_westmere_pebs_event_constraints[] = { + INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ EVENT_CONSTRAINT_END }; @@ -733,20 +753,17 @@ static void intel_ds_init(void) printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; - x86_pmu.pebs_constraints = intel_core_pebs_events; break; case 1: printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; - x86_pmu.pebs_constraints = intel_nehalem_pebs_events; break; default: printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; - break; } } } -- cgit v0.10.2 From a7e3ed1e470116c9d12c2f778431a481a6be8ab6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 3 Mar 2011 10:34:47 +0800 Subject: perf: Add support for supplementary event registers Change logs against Andi's original version: - Extends perf_event_attr:config to config{,1,2} (Peter Zijlstra) - Fixed a major event scheduling issue. There cannot be a ref++ on an event that has already done ref++ once and without calling put_constraint() in between. (Stephane Eranian) - Use thread_cpumask for percore allocation. (Lin Ming) - Use MSR names in the extra reg lists. (Lin Ming) - Remove redundant "c = NULL" in intel_percore_constraints - Fix comment of perf_event_attr::config1 Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event that can be used to monitor any offcore accesses from a core. This is a very useful event for various tunings, and it's also needed to implement the generic LLC-* events correctly. Unfortunately this event requires programming a mask in a separate register. And worse this separate register is per core, not per CPU thread. This patch: - Teaches perf_events that OFFCORE_RESPONSE needs extra parameters. The extra parameters are passed by user space in the perf_event_attr::config1 field. - Adds support to the Intel perf_event core to schedule per core resources. This adds fairly generic infrastructure that can be also used for other per core resources. The basic code has is patterned after the similar AMD northbridge constraints code. Thanks to Stephane Eranian who pointed out some problems in the original version and suggested improvements. Signed-off-by: Andi Kleen Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra LKML-Reference: <1299119690-13991-2-git-send-email-ming.m.lin@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4d0dfa0..d25e74c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -47,6 +47,9 @@ #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 + #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ea03c72..ec6a6db 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -93,6 +93,8 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; +struct intel_percore; + #define MAX_LBR_ENTRIES 16 struct cpu_hw_events { @@ -128,6 +130,13 @@ struct cpu_hw_events { struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; /* + * Intel percore register state. + * Coordinate shared resources between HT threads. + */ + int percore_used; /* Used by this CPU? */ + struct intel_percore *per_core; + + /* * AMD specific bits */ struct amd_nb *amd_nb; @@ -177,6 +186,28 @@ struct cpu_hw_events { #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->weight; (e)++) +/* + * Extra registers for specific events. + * Some events need large masks and require external MSRs. + * Define a mapping to these extra registers. + */ +struct extra_reg { + unsigned int event; + unsigned int msr; + u64 config_mask; + u64 valid_mask; +}; + +#define EVENT_EXTRA_REG(e, ms, m, vm) { \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + } +#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) + union perf_capabilities { struct { u64 lbr_format : 6; @@ -221,6 +252,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; + struct event_constraint *percore_constraints; void (*quirks)(void); int perfctr_second_write; @@ -249,6 +281,11 @@ struct x86_pmu { */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ int lbr_nr; /* hardware stack size */ + + /* + * Extra registers for events + */ + struct extra_reg *extra_regs; }; static struct x86_pmu x86_pmu __read_mostly; @@ -341,6 +378,31 @@ static inline unsigned int x86_pmu_event_addr(int index) return x86_pmu.perfctr + x86_pmu_addr_offset(index); } +/* + * Find and validate any extra registers to set up. + */ +static int x86_pmu_extra_regs(u64 config, struct perf_event *event) +{ + struct extra_reg *er; + + event->hw.extra_reg = 0; + event->hw.extra_config = 0; + + if (!x86_pmu.extra_regs) + return 0; + + for (er = x86_pmu.extra_regs; er->msr; er++) { + if (er->event != (config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + event->hw.extra_reg = er->msr; + event->hw.extra_config = event->attr.config1; + break; + } + return 0; +} + static atomic_t active_events; static DEFINE_MUTEX(pmc_reserve_mutex); @@ -665,6 +727,8 @@ static void x86_pmu_disable(struct pmu *pmu) static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { + if (hwc->extra_reg) + wrmsrl(hwc->extra_reg, hwc->extra_config); wrmsrl(hwc->config_base, hwc->config | enable_mask); } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c3ce053..13cb6cf 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1,5 +1,27 @@ #ifdef CONFIG_CPU_SUP_INTEL +#define MAX_EXTRA_REGS 2 + +/* + * Per register state. + */ +struct er_account { + int ref; /* reference count */ + unsigned int extra_reg; /* extra MSR number */ + u64 extra_config; /* extra MSR config */ +}; + +/* + * Per core state + * This used to coordinate shared registers for HT threads. + */ +struct intel_percore { + raw_spinlock_t lock; /* protect structure */ + struct er_account regs[MAX_EXTRA_REGS]; + int refcnt; /* number of threads */ + unsigned core_id; +}; + /* * Intel PerfMon, used on Core and later. */ @@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] = EVENT_CONSTRAINT_END }; +static struct extra_reg intel_nehalem_extra_regs[] = +{ + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_nehalem_percore_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xb7, 0), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_westmere_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -89,6 +123,20 @@ static struct event_constraint intel_snb_event_constraints[] = EVENT_CONSTRAINT_END }; +static struct extra_reg intel_westmere_extra_regs[] = +{ + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_westmere_percore_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xb7, 0), + INTEL_EVENT_CONSTRAINT(0xbb, 0), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_gen_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -907,6 +955,67 @@ intel_bts_constraints(struct perf_event *event) } static struct event_constraint * +intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; + struct event_constraint *c; + struct intel_percore *pc; + struct er_account *era; + int i; + int free_slot; + int found; + + if (!x86_pmu.percore_constraints || hwc->extra_alloc) + return NULL; + + for (c = x86_pmu.percore_constraints; c->cmask; c++) { + if (e != c->code) + continue; + + /* + * Allocate resource per core. + */ + pc = cpuc->per_core; + if (!pc) + break; + c = &emptyconstraint; + raw_spin_lock(&pc->lock); + free_slot = -1; + found = 0; + for (i = 0; i < MAX_EXTRA_REGS; i++) { + era = &pc->regs[i]; + if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { + /* Allow sharing same config */ + if (hwc->extra_config == era->extra_config) { + era->ref++; + cpuc->percore_used = 1; + hwc->extra_alloc = 1; + c = NULL; + } + /* else conflict */ + found = 1; + break; + } else if (era->ref == 0 && free_slot == -1) + free_slot = i; + } + if (!found && free_slot != -1) { + era = &pc->regs[free_slot]; + era->ref = 1; + era->extra_reg = hwc->extra_reg; + era->extra_config = hwc->extra_config; + cpuc->percore_used = 1; + hwc->extra_alloc = 1; + c = NULL; + } + raw_spin_unlock(&pc->lock); + return c; + } + + return NULL; +} + +static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { struct event_constraint *c; @@ -919,9 +1028,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event if (c) return c; + c = intel_percore_constraints(cpuc, event); + if (c) + return c; + return x86_get_event_constraints(cpuc, event); } +static void intel_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct extra_reg *er; + struct intel_percore *pc; + struct er_account *era; + struct hw_perf_event *hwc = &event->hw; + int i, allref; + + if (!cpuc->percore_used) + return; + + for (er = x86_pmu.extra_regs; er->msr; er++) { + if (er->event != (hwc->config & er->config_mask)) + continue; + + pc = cpuc->per_core; + raw_spin_lock(&pc->lock); + for (i = 0; i < MAX_EXTRA_REGS; i++) { + era = &pc->regs[i]; + if (era->ref > 0 && + era->extra_config == hwc->extra_config && + era->extra_reg == er->msr) { + era->ref--; + hwc->extra_alloc = 0; + break; + } + } + allref = 0; + for (i = 0; i < MAX_EXTRA_REGS; i++) + allref += pc->regs[i].ref; + if (allref == 0) + cpuc->percore_used = 0; + raw_spin_unlock(&pc->lock); + break; + } +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -993,11 +1144,43 @@ static __initconst const struct x86_pmu core_pmu = { */ .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, }; +static int intel_pmu_cpu_prepare(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), + GFP_KERNEL, cpu_to_node(cpu)); + if (!cpuc->per_core) + return NOTIFY_BAD; + + raw_spin_lock_init(&cpuc->per_core->lock); + cpuc->per_core->core_id = -1; + return NOTIFY_OK; +} + static void intel_pmu_cpu_starting(int cpu) { + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int core_id = topology_core_id(cpu); + int i; + + for_each_cpu(i, topology_thread_cpumask(cpu)) { + struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; + + if (pc && pc->core_id == core_id) { + kfree(cpuc->per_core); + cpuc->per_core = pc; + break; + } + } + + cpuc->per_core->core_id = core_id; + cpuc->per_core->refcnt++; + init_debug_store_on_cpu(cpu); /* * Deal with CPUs that don't clear their LBRs on power-up. @@ -1007,6 +1190,15 @@ static void intel_pmu_cpu_starting(int cpu) static void intel_pmu_cpu_dying(int cpu) { + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct intel_percore *pc = cpuc->per_core; + + if (pc) { + if (pc->core_id == -1 || --pc->refcnt == 0) + kfree(pc); + cpuc->per_core = NULL; + } + fini_debug_store_on_cpu(cpu); } @@ -1031,7 +1223,9 @@ static __initconst const struct x86_pmu intel_pmu = { */ .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, + .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, }; @@ -1151,7 +1345,9 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_nehalem_event_constraints; x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; + x86_pmu.percore_constraints = intel_nehalem_percore_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; + x86_pmu.extra_regs = intel_nehalem_extra_regs; pr_cont("Nehalem events, "); break; @@ -1174,8 +1370,10 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_westmere_event_constraints; + x86_pmu.percore_constraints = intel_westmere_percore_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; + x86_pmu.extra_regs = intel_westmere_extra_regs; pr_cont("Westmere events, "); break; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8ceb5a6..614615b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -225,8 +225,14 @@ struct perf_event_attr { }; __u32 bp_type; - __u64 bp_addr; - __u64 bp_len; + union { + __u64 bp_addr; + __u64 config1; /* extension of config */ + }; + union { + __u64 bp_len; + __u64 config2; /* extension of config1 */ + }; }; /* @@ -541,6 +547,9 @@ struct hw_perf_event { unsigned long event_base; int idx; int last_cpu; + unsigned int extra_reg; + u64 extra_config; + int extra_alloc; }; struct { /* software */ struct hrtimer hrtimer; -- cgit v0.10.2 From e994d7d23a0bae34cd28834e85522ed4e782faf7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 3 Mar 2011 10:34:48 +0800 Subject: perf: Fix LLC-* events on Intel Nehalem/Westmere On Intel Nehalem and Westmere CPUs the generic perf LLC-* events count the L2 caches, not the real L3 LLC - this was inconsistent with behavior on other CPUs. Fixing this requires the use of the special OFFCORE_RESPONSE events which need a separate mask register. This has been implemented by the previous patch, now use this infrastructure to set correct events for the LLC-* on Nehalem and Westmere. Signed-off-by: Andi Kleen Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra LKML-Reference: <1299119690-13991-3-git-send-email-ming.m.lin@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ec6a6db..4d6ce5d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -310,6 +310,10 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; +static u64 __read_mostly hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; /* * Propagate event elapsed time into the generic event. @@ -524,8 +528,9 @@ static inline int x86_pmu_initialized(void) } static inline int -set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) { + struct perf_event_attr *attr = &event->attr; unsigned int cache_type, cache_op, cache_result; u64 config, val; @@ -552,8 +557,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return -EINVAL; hwc->config |= val; - - return 0; + attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; + return x86_pmu_extra_regs(val, event); } static int x86_setup_perfctr(struct perf_event *event) @@ -578,10 +583,10 @@ static int x86_setup_perfctr(struct perf_event *event) } if (attr->type == PERF_TYPE_RAW) - return 0; + return x86_pmu_extra_regs(event->attr.config, event); if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, attr); + return set_ext_hw_attr(hwc, event); if (attr->config >= x86_pmu.max_events) return -EINVAL; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 13cb6cf..6e9b676 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -285,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, }, + /* + * Use RFO, not WRITEBACK, because a write miss would typically occur + * on RFO. + */ [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01bb, + /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, }, }, [ C(DTLB) ] = { @@ -341,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids }, }; +/* + * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 + */ + +#define DMND_DATA_RD (1 << 0) +#define DMND_RFO (1 << 1) +#define DMND_WB (1 << 3) +#define PF_DATA_RD (1 << 4) +#define PF_DATA_RFO (1 << 5) +#define RESP_UNCORE_HIT (1 << 8) +#define RESP_MISS (0xf600) /* non uncore hit */ + +static __initconst const u64 nehalem_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, + }, + } +}; + static __initconst const u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -376,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, + /* + * Use RFO, not WRITEBACK, because a write miss would typically occur + * on RFO. + */ [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -1340,6 +1393,8 @@ static __init int intel_pmu_init(void) case 46: /* 45 nm nehalem-ex, "Beckton" */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_nhm(); @@ -1366,6 +1421,8 @@ static __init int intel_pmu_init(void) case 44: /* 32 nm nehalem, "Gulftown" */ memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_nhm(); -- cgit v0.10.2 From 6909262429b70a162e9e7053672cfd8024c9275d Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 3 Mar 2011 10:34:50 +0800 Subject: perf: Avoid the percore allocations if the CPU is not HT capable Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra LKML-Reference: <1299119690-13991-5-git-send-email-ming.m.lin@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1f46951..c1bbfa8 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -17,10 +17,20 @@ #endif #include #include +#include extern int smp_num_siblings; extern unsigned int num_processors; +static inline bool cpu_has_ht_siblings(void) +{ + bool has_siblings = false; +#ifdef CONFIG_SMP + has_siblings = cpu_has_ht && smp_num_siblings > 1; +#endif + return has_siblings; +} + DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4d6ce5d..2660418 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -30,6 +30,7 @@ #include #include #include +#include #if 0 #undef wrmsrl diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 6e9b676..8fc2b2c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1205,6 +1205,9 @@ static int intel_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + if (!cpu_has_ht_siblings()) + return NOTIFY_OK; + cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), GFP_KERNEL, cpu_to_node(cpu)); if (!cpuc->per_core) @@ -1221,6 +1224,15 @@ static void intel_pmu_cpu_starting(int cpu) int core_id = topology_core_id(cpu); int i; + init_debug_store_on_cpu(cpu); + /* + * Deal with CPUs that don't clear their LBRs on power-up. + */ + intel_pmu_lbr_reset(); + + if (!cpu_has_ht_siblings()) + return; + for_each_cpu(i, topology_thread_cpumask(cpu)) { struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; @@ -1233,12 +1245,6 @@ static void intel_pmu_cpu_starting(int cpu) cpuc->per_core->core_id = core_id; cpuc->per_core->refcnt++; - - init_debug_store_on_cpu(cpu); - /* - * Deal with CPUs that don't clear their LBRs on power-up. - */ - intel_pmu_lbr_reset(); } static void intel_pmu_cpu_dying(int cpu) -- cgit v0.10.2 From a03f35ceeb3d279da35c5a914ac01a4b1effb0a1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 3 Mar 2011 16:43:03 -0300 Subject: perf report tui: Fix multi event switching TAB/UNTAB were not hotkeys, so didn't exit hists__browse back to hists__tui_browse_tree, allowing just the first event to be browsed. Reported-by: William Cohen Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: William Cohen LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index 497b3c4..c27ab35 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -292,7 +292,8 @@ static int hist_browser__run(struct hist_browser *self, const char *title) { int key; int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', - NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, }; + NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, + NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0, }; self->b.entries = &self->hists->entries; self->b.nr_entries = self->hists->nr_entries; @@ -859,6 +860,7 @@ int hists__browse(struct hists *self, const char *helpline, "E Expand all callchains\n" "d Zoom into current DSO\n" "t Zoom into current Thread\n" + "TAB/UNTAB Switch events\n" "q/CTRL+C Exit browser"); continue; case NEWT_KEY_ENTER: @@ -997,6 +999,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help, int evidx) if (nd == first) continue; nd = rb_prev(nd); + break; default: return key; } -- cgit v0.10.2 From d7603d5122d9700fb8f36fa08b04f4e900fef059 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Mar 2011 14:51:33 -0300 Subject: perf hists: Remove needless global col lenght calcs To support multiple events we need to do these calcs per 'struct hists' instance, and it turns out we already do that at: __hists__add_entry hists__inc_nr_entries hists__calc_col_len for all the unfiltered hist_entry instances we stash in the rb tree, so trow away the dead code. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fbf5754..2b15c36 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -424,33 +424,6 @@ int perf_event__synthesize_kernel_mmap(perf_event__handler_t process, return err; } -static void thread__comm_adjust(struct thread *self, struct hists *hists) -{ - char *comm = self->comm; - - if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep && - (!symbol_conf.comm_list || - strlist__has_entry(symbol_conf.comm_list, comm))) { - u16 slen = strlen(comm); - - if (hists__new_col_len(hists, HISTC_COMM, slen)) - hists__set_col_len(hists, HISTC_THREAD, slen + 6); - } -} - -static int thread__set_comm_adjust(struct thread *self, const char *comm, - struct hists *hists) -{ - int ret = thread__set_comm(self, comm); - - if (ret) - return ret; - - thread__comm_adjust(self, hists); - - return 0; -} - int perf_event__process_comm(union perf_event *event, struct perf_sample *sample __used, struct perf_session *session) @@ -459,8 +432,7 @@ int perf_event__process_comm(union perf_event *event, dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid); - if (thread == NULL || thread__set_comm_adjust(thread, event->comm.comm, - &session->hists)) { + if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } @@ -760,18 +732,6 @@ void thread__find_addr_location(struct thread *self, al->sym = NULL; } -static void dso__calc_col_width(struct dso *self, struct hists *hists) -{ - if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep && - (!symbol_conf.dso_list || - strlist__has_entry(symbol_conf.dso_list, self->name))) { - u16 slen = dso__name_len(self); - hists__new_col_len(hists, HISTC_DSO, slen); - } - - self->slen_calculated = 1; -} - int perf_event__preprocess_sample(const union perf_event *event, struct perf_session *session, struct addr_location *al, @@ -817,23 +777,8 @@ int perf_event__preprocess_sample(const union perf_event *event, strlist__has_entry(symbol_conf.dso_list, al->map->dso->long_name))))) goto out_filtered; - /* - * We have to do this here as we may have a dso with no symbol - * hit that has a name longer than the ones with symbols - * sampled. - */ - if (!sort_dso.elide && !al->map->dso->slen_calculated) - dso__calc_col_width(al->map->dso, &session->hists); al->sym = map__find_symbol(al->map, al->addr, filter); - } else { - const unsigned int unresolved_col_width = BITS_PER_LONG / 4; - - if (hists__col_len(&session->hists, HISTC_DSO) < unresolved_col_width && - !symbol_conf.col_width_list_str && !symbol_conf.field_sep && - !symbol_conf.dso_list) - hists__set_col_len(&session->hists, HISTC_DSO, - unresolved_col_width); } if (symbol_conf.sym_list && al->sym && diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index da2899e8..f7ad6bd 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -50,6 +50,15 @@ static void hists__calc_col_len(struct hists *self, struct hist_entry *h) if (h->ms.sym) hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen); + else { + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; + + if (hists__col_len(self, HISTC_DSO) < unresolved_col_width && + !symbol_conf.col_width_list_str && !symbol_conf.field_sep && + !symbol_conf.dso_list) + hists__set_col_len(self, HISTC_DSO, + unresolved_col_width); + } len = thread__comm_len(h->thread); if (hists__new_col_len(self, HISTC_COMM, len)) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ba6d489..00014e3 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -207,7 +207,6 @@ struct dso *dso__new(const char *name) dso__set_short_name(self, self->name); for (i = 0; i < MAP__NR_TYPES; ++i) self->symbols[i] = self->symbol_names[i] = RB_ROOT; - self->slen_calculated = 0; self->origin = DSO__ORIG_NOT_FOUND; self->loaded = 0; self->sorted_by_name = 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 670cd1c..4d7ed09 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -132,7 +132,6 @@ struct dso { struct rb_root symbol_names[MAP__NR_TYPES]; enum dso_kernel_type kernel; u8 adjust_symbols:1; - u8 slen_calculated:1; u8 has_build_id:1; u8 hit:1; u8 annotate_warned:1; -- cgit v0.10.2 From 60098917c06d154d06ce030c125266eab9e60768 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Mar 2011 21:19:21 -0300 Subject: perf hists browser: Handle browsing empty hists tree Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index c27ab35..c98e6f8 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -639,6 +639,9 @@ static void ui_browser__hists_seek(struct ui_browser *self, struct rb_node *nd; bool first = true; + if (self->nr_entries == 0) + return; + switch (whence) { case SEEK_SET: nd = hists__filter_entries(rb_first(self->entries)); @@ -820,8 +823,8 @@ int hists__browse(struct hists *self, const char *helpline, hists__browser_title(self, msg, sizeof(msg), ev_name, dso_filter, thread_filter); while (1) { - const struct thread *thread; - const struct dso *dso; + const struct thread *thread = NULL; + const struct dso *dso = NULL; char *options[16]; int nr_options = 0, choice = 0, i, annotate = -2, zoom_dso = -2, zoom_thread = -2, @@ -829,8 +832,10 @@ int hists__browse(struct hists *self, const char *helpline, key = hist_browser__run(browser, msg); - thread = hist_browser__selected_thread(browser); - dso = browser->selection->map ? browser->selection->map->dso : NULL; + if (browser->he_selection != NULL) { + thread = hist_browser__selected_thread(browser); + dso = browser->selection->map ? browser->selection->map->dso : NULL; + } switch (key) { case NEWT_KEY_TAB: @@ -841,7 +846,8 @@ int hists__browse(struct hists *self, const char *helpline, */ goto out_free_stack; case 'a': - if (browser->selection->map == NULL && + if (browser->selection == NULL || + browser->selection->map == NULL || browser->selection->map->dso->annotate_warned) continue; goto do_annotate; @@ -887,7 +893,8 @@ int hists__browse(struct hists *self, const char *helpline, goto out_free_stack; } - if (browser->selection->sym != NULL && + if (browser->selection != NULL && + browser->selection->sym != NULL && !browser->selection->map->dso->annotate_warned && asprintf(&options[nr_options], "Annotate %s", browser->selection->sym->name) > 0) @@ -906,7 +913,8 @@ int hists__browse(struct hists *self, const char *helpline, (dso->kernel ? "the Kernel" : dso->short_name)) > 0) zoom_dso = nr_options++; - if (browser->selection->map != NULL && + if (browser->selection != NULL && + browser->selection->map != NULL && asprintf(&options[nr_options], "Browse map details") > 0) browse_map = nr_options++; -- cgit v0.10.2 From 3d3b5e95997208067c963923db90ed1517565d14 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Mar 2011 22:29:39 -0300 Subject: perf evlist: Split perf_evlist__id_hash The previous situation was to receive an fd from where to read the event ID. Spin off a routine for when we have the ID handy, not having to read it from some fd. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 030ae7f..190c64c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -106,12 +106,24 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) evlist->nr_fds++; } -static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd) +void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, + int cpu, int thread, u64 id) +{ + int hash; + struct perf_sample_id *sid = SID(evsel, cpu, thread); + + sid->id = id; + sid->evsel = evsel; + hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); + hlist_add_head(&sid->node, &evlist->heads[hash]); +} + +static int perf_evlist__id_hash_fd(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, int fd) { - struct perf_sample_id *sid; u64 read_data[4] = { 0, }; - int hash, id_idx = 1; /* The first entry is the counter value */ + int id_idx = 1; /* The first entry is the counter value */ if (!(evsel->attr.read_format & PERF_FORMAT_ID) || read(fd, &read_data, sizeof(read_data)) == -1) @@ -122,11 +134,7 @@ static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *e if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) ++id_idx; - sid = SID(evsel, cpu, thread); - sid->id = read_data[id_idx]; - sid->evsel = evsel; - hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); - hlist_add_head(&sid->node, &evlist->heads[hash]); + perf_evlist__id_hash(evlist, evsel, cpu, thread, read_data[id_idx]); return 0; } @@ -300,7 +308,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) goto out_unmap; if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_hash(evlist, evsel, cpu, thread, fd) < 0) + perf_evlist__id_hash_fd(evlist, evsel, cpu, thread, fd) < 0) goto out_unmap; } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b75805a..078d512 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -38,6 +38,9 @@ void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); +void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, + int cpu, int thread, u64 id); + int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); -- cgit v0.10.2 From e248de331a452f8771eda6ed4bb30d92c82df28b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 5 Mar 2011 21:40:06 -0300 Subject: perf tools: Improve support for sessions with multiple events By creating an perf_evlist out of the attributes in the perf.data file header, so that we can use evlists and evsels when reading recorded sessions in addition to when we record sessions. More work is needed to allow tools to allow the user to select which events are wanted when browsing sessions, be it just one or a subset of them, aggregated or showed at the same time but with different indications on the UI to allow seeing workloads thru different views at the same time. But the overall goal/trend is to more uniformly use evsels and evlists. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4271829..695de4b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -19,6 +19,8 @@ #include "perf.h" #include "util/debug.h" +#include "util/evlist.h" +#include "util/evsel.h" #include "util/annotate.h" #include "util/event.h" #include "util/parse-options.h" @@ -38,9 +40,13 @@ static bool print_line; static const char *sym_hist_filter; -static int hists__add_entry(struct hists *self, struct addr_location *al) +static int perf_evlist__add_sample(struct perf_evlist *evlist, + struct perf_sample *sample, + struct addr_location *al) { + struct perf_evsel *evsel; struct hist_entry *he; + int ret; if (sym_hist_filter != NULL && (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) { @@ -53,23 +59,35 @@ static int hists__add_entry(struct hists *self, struct addr_location *al) return 0; } - he = __hists__add_entry(self, al, NULL, 1); + evsel = perf_evlist__id2evsel(evlist, sample->id); + if (evsel == NULL) { + /* + * FIXME: Propagate this back, but at least we're in a builtin, + * where exit() is allowed. ;-) + */ + ui__warning("Invalid %s file, contains samples with id not in " + "its header!\n", input_name); + exit_browser(0); + exit(1); + } + + he = __hists__add_entry(&evsel->hists, al, NULL, 1); if (he == NULL) return -ENOMEM; + ret = 0; if (he->ms.sym != NULL) { - /* - * All aggregated on the first sym_hist. - */ struct annotation *notes = symbol__annotation(he->ms.sym); if (notes->src == NULL && - symbol__alloc_hist(he->ms.sym, 1) < 0) + symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0) return -ENOMEM; - return hist_entry__inc_addr_samples(he, 0, al->addr); + ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); } - return 0; + evsel->hists.stats.total_period += sample->period; + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + return ret; } static int process_sample_event(union perf_event *event, @@ -85,7 +103,7 @@ static int process_sample_event(union perf_event *event, return -1; } - if (!al.filtered && hists__add_entry(&session->hists, &al)) { + if (!al.filtered && perf_evlist__add_sample(session->evlist, sample, &al)) { pr_warning("problem incrementing symbol count, " "skipping event\n"); return -1; @@ -100,7 +118,7 @@ static int hist_entry__tty_annotate(struct hist_entry *he, int evidx) print_line, full_paths, 0, 0); } -static void hists__find_annotations(struct hists *self) +static void hists__find_annotations(struct hists *self, int evidx) { struct rb_node *nd = rb_first(&self->entries), *next; int key = KEY_RIGHT; @@ -123,8 +141,7 @@ find_next: } if (use_browser > 0) { - /* For now all is aggregated on the first */ - key = hist_entry__tui_annotate(he, 0); + key = hist_entry__tui_annotate(he, evidx); switch (key) { case KEY_RIGHT: next = rb_next(nd); @@ -139,8 +156,7 @@ find_next: if (next != NULL) nd = next; } else { - /* For now all is aggregated on the first */ - hist_entry__tty_annotate(he, 0); + hist_entry__tty_annotate(he, evidx); nd = rb_next(nd); /* * Since we have a hist_entry per IP for the same @@ -166,6 +182,8 @@ static int __cmd_annotate(void) { int ret; struct perf_session *session; + struct perf_evsel *pos; + u64 total_nr_samples; session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); if (session == NULL) @@ -186,12 +204,36 @@ static int __cmd_annotate(void) if (verbose > 2) perf_session__fprintf_dsos(session, stdout); - hists__collapse_resort(&session->hists); - hists__output_resort(&session->hists); - hists__find_annotations(&session->hists); -out_delete: - perf_session__delete(session); + total_nr_samples = 0; + list_for_each_entry(pos, &session->evlist->entries, node) { + struct hists *hists = &pos->hists; + u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + + if (nr_samples > 0) { + total_nr_samples += nr_samples; + hists__collapse_resort(hists); + hists__output_resort(hists); + hists__find_annotations(hists, pos->idx); + } + } + if (total_nr_samples == 0) { + ui__warning("The %s file has no samples!\n", input_name); + goto out_delete; + } +out_delete: + /* + * Speed up the exit process, for large files this can + * take quite a while. + * + * XXX Enable this when using valgrind or if we ever + * librarize this command. + * + * Also experiment with obstacks to see how much speed + * up we'll get here. + * + * perf_session__delete(session); + */ return ret; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index dddcc7e..1c399ea 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -21,6 +21,8 @@ #include "perf.h" #include "util/debug.h" +#include "util/evlist.h" +#include "util/evsel.h" #include "util/header.h" #include "util/session.h" @@ -46,39 +48,6 @@ static const char *pretty_printing_style = default_pretty_printing_style; static char callchain_default_opt[] = "fractal,0.5"; static symbol_filter_t annotate_init; -static struct hists *perf_session__hists_findnew(struct perf_session *self, - u64 event_stream, u32 type, - u64 config) -{ - struct rb_node **p = &self->hists_tree.rb_node; - struct rb_node *parent = NULL; - struct hists *iter, *new; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hists, rb_node); - if (iter->config == config) - return iter; - - - if (config > iter->config) - p = &(*p)->rb_right; - else - p = &(*p)->rb_left; - } - - new = malloc(sizeof(struct hists)); - if (new == NULL) - return NULL; - memset(new, 0, sizeof(struct hists)); - new->event_stream = event_stream; - new->config = config; - new->type = type; - rb_link_node(&new->rb_node, parent, p); - rb_insert_color(&new->rb_node, &self->hists_tree); - return new; -} - static int perf_session__add_hist_entry(struct perf_session *session, struct addr_location *al, struct perf_sample *sample) @@ -86,8 +55,7 @@ static int perf_session__add_hist_entry(struct perf_session *session, struct symbol *parent = NULL; int err = 0; struct hist_entry *he; - struct hists *hists; - struct perf_event_attr *attr; + struct perf_evsel *evsel; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { err = perf_session__resolve_callchain(session, al->thread, @@ -96,15 +64,19 @@ static int perf_session__add_hist_entry(struct perf_session *session, return err; } - attr = perf_header__find_attr(sample->id, &session->header); - if (attr) - hists = perf_session__hists_findnew(session, sample->id, attr->type, attr->config); - else - hists = perf_session__hists_findnew(session, sample->id, 0, 0); - if (hists == NULL) - return -ENOMEM; + evsel = perf_evlist__id2evsel(session->evlist, sample->id); + if (evsel == NULL) { + /* + * FIXME: Propagate this back, but at least we're in a builtin, + * where exit() is allowed. ;-) + */ + ui__warning("Invalid %s file, contains samples with id not in " + "its header!\n", input_name); + exit_browser(0); + exit(1); + } - he = __hists__add_entry(hists, al, parent, sample->period); + he = __hists__add_entry(&evsel->hists, al, parent, sample->period); if (he == NULL) return -ENOMEM; @@ -120,52 +92,30 @@ static int perf_session__add_hist_entry(struct perf_session *session, * code will not use it. */ if (al->sym != NULL && use_browser > 0) { - /* - * All aggregated on the first sym_hist. - */ struct annotation *notes = symbol__annotation(he->ms.sym); + + assert(evsel != NULL); + + err = -ENOMEM; if (notes->src == NULL && - symbol__alloc_hist(he->ms.sym, 1) < 0) - err = -ENOMEM; - else - err = hist_entry__inc_addr_samples(he, 0, al->addr); + symbol__alloc_hist(he->ms.sym, session->evlist->nr_entries) < 0) + goto out; + + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); } + evsel->hists.stats.total_period += sample->period; + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); +out: return err; } -static int add_event_total(struct perf_session *session, - struct perf_sample *sample, - struct perf_event_attr *attr) -{ - struct hists *hists; - - if (attr) - hists = perf_session__hists_findnew(session, sample->id, - attr->type, attr->config); - else - hists = perf_session__hists_findnew(session, sample->id, 0, 0); - - if (!hists) - return -ENOMEM; - - hists->stats.total_period += sample->period; - /* - * FIXME: add_event_total should be moved from here to - * perf_session__process_event so that the proper hist is passed to - * the event_op methods. - */ - hists__inc_nr_events(hists, PERF_RECORD_SAMPLE); - session->hists.stats.total_period += sample->period; - return 0; -} static int process_sample_event(union perf_event *event, struct perf_sample *sample, struct perf_session *session) { struct addr_location al; - struct perf_event_attr *attr; if (perf_event__preprocess_sample(event, session, &al, sample, annotate_init) < 0) { @@ -182,27 +132,17 @@ static int process_sample_event(union perf_event *event, return -1; } - attr = perf_header__find_attr(sample->id, &session->header); - - if (add_event_total(session, sample, attr)) { - pr_debug("problem adding event period\n"); - return -1; - } - return 0; } static int process_read_event(union perf_event *event, struct perf_sample *sample __used, - struct perf_session *session __used) + struct perf_session *session) { - struct perf_event_attr *attr; - - attr = perf_header__find_attr(event->read.id, &session->header); - + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, + event->read.id); if (show_threads) { - const char *name = attr ? __event_name(attr->type, attr->config) - : "unknown"; + const char *name = evsel ? event_name(evsel) : "unknown"; perf_read_values_add_value(&show_threads_values, event->read.pid, event->read.tid, event->read.id, @@ -211,7 +151,7 @@ static int process_read_event(union perf_event *event, } dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, - attr ? __event_name(attr->type, attr->config) : "FAIL", + evsel ? event_name(evsel) : "FAIL", event->read.value); return 0; @@ -282,21 +222,20 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self, return ret + fprintf(fp, "\n#\n"); } -static int hists__tty_browse_tree(struct rb_root *tree, const char *help) +static int hists__tty_browse_tree(struct perf_evlist *evlist, const char *help) { - struct rb_node *next = rb_first(tree); + struct perf_evsel *pos; - while (next) { - struct hists *hists = rb_entry(next, struct hists, rb_node); + list_for_each_entry(pos, &evlist->entries, node) { + struct hists *hists = &pos->hists; const char *evname = NULL; if (rb_first(&hists->entries) != rb_last(&hists->entries)) - evname = __event_name(hists->type, hists->config); + evname = event_name(pos); hists__fprintf_nr_sample_events(hists, evname, stdout); hists__fprintf(hists, NULL, false, stdout); fprintf(stdout, "\n\n"); - next = rb_next(&hists->rb_node); } if (sort_order == default_sort_order && @@ -317,8 +256,9 @@ static int hists__tty_browse_tree(struct rb_root *tree, const char *help) static int __cmd_report(void) { int ret = -EINVAL; + u64 nr_samples; struct perf_session *session; - struct rb_node *next; + struct perf_evsel *pos; const char *help = "For a higher level overview, try: perf report --sort comm,dso"; signal(SIGINT, sig_handler); @@ -349,26 +289,24 @@ static int __cmd_report(void) if (verbose > 2) perf_session__fprintf_dsos(session, stdout); - next = rb_first(&session->hists_tree); - - if (next == NULL) { - ui__warning("The %s file has no samples!\n", input_name); - goto out_delete; - } - - while (next) { - struct hists *hists; + nr_samples = 0; + list_for_each_entry(pos, &session->evlist->entries, node) { + struct hists *hists = &pos->hists; - hists = rb_entry(next, struct hists, rb_node); hists__collapse_resort(hists); hists__output_resort(hists); - next = rb_next(&hists->rb_node); + nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE]; + } + + if (nr_samples == 0) { + ui__warning("The %s file has no samples!\n", input_name); + goto out_delete; } if (use_browser > 0) - hists__tui_browse_tree(&session->hists_tree, help, 0); + hists__tui_browse_tree(session->evlist, help); else - hists__tty_browse_tree(&session->hists_tree, help); + hists__tty_browse_tree(session->evlist, help); out_delete: /* diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f6fc8f6..281b60e 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -7,6 +7,7 @@ #include "types.h" #include "xyarray.h" #include "cgroup.h" +#include "hist.h" struct perf_counts_values { union { @@ -51,6 +52,7 @@ struct perf_evsel { struct xyarray *id; struct perf_counts *counts; int idx; + struct hists hists; char *name; void *priv; struct cgroup_sel *cgrp; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 72c124d..108b0db 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -969,37 +969,6 @@ bool perf_header__sample_id_all(const struct perf_header *header) return value; } -struct perf_event_attr * -perf_header__find_attr(u64 id, struct perf_header *header) -{ - int i; - - /* - * We set id to -1 if the data file doesn't contain sample - * ids. This can happen when the data file contains one type - * of event and in that case, the header can still store the - * event attribute information. Check for this and avoid - * walking through the entire list of ids which may be large. - */ - if (id == -1ULL) { - if (header->attrs > 0) - return &header->attr[0]->attr; - return NULL; - } - - for (i = 0; i < header->attrs; i++) { - struct perf_header_attr *attr = header->attr[i]; - int j; - - for (j = 0; j < attr->ids; j++) { - if (attr->id[j] == id) - return &attr->attr; - } - } - - return NULL; -} - int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, perf_event__handler_t process, struct perf_session *session) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index f042ceb..2fab133 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -85,8 +85,6 @@ int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); bool perf_header__sample_id_all(const struct perf_header *header); -struct perf_event_attr * -perf_header__find_attr(u64 id, struct perf_header *header); void perf_header__set_feat(struct perf_header *self, int feat); void perf_header__clear_feat(struct perf_header *self, int feat); bool perf_header__has_feat(const struct perf_header *self, int feat); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f7ad6bd..627a02e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -984,8 +984,12 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp) size_t ret = 0; for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { - const char *name = perf_event__name(i); + const char *name; + if (self->stats.nr_events[i] == 0) + continue; + + name = perf_event__name(i); if (!strcmp(name, "UNKNOWN")) continue; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 37c7908..0d38b43 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -42,13 +42,10 @@ enum hist_column { }; struct hists { - struct rb_node rb_node; struct rb_root entries; u64 nr_entries; struct events_stats stats; - u64 config; u64 event_stream; - u32 type; u16 col_len[HISTC_NR_COLS]; /* Best would be to reuse the session callchain cursor */ struct callchain_cursor callchain_cursor; @@ -87,6 +84,8 @@ u16 hists__col_len(struct hists *self, enum hist_column col); void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); +struct perf_evlist; + #ifdef NO_NEWT_SUPPORT static inline int hists__browse(struct hists *self __used, const char *helpline __used, @@ -95,9 +94,8 @@ static inline int hists__browse(struct hists *self __used, return 0; } -static inline int hists__tui_browse_tree(struct rb_root *self __used, - const char *help __used, - int evidx __used) +static inline int hists__tui_browse_tree(struct perf_evlist *evlist __used, + const char *help __used) { return 0; } @@ -118,7 +116,7 @@ int hist_entry__tui_annotate(struct hist_entry *self, int evidx); #define KEY_LEFT NEWT_KEY_LEFT #define KEY_RIGHT NEWT_KEY_RIGHT -int hists__tui_browse_tree(struct rb_root *self, const char *help, int evidx); +int hists__tui_browse_tree(struct perf_evlist *evlist, const char *help); #endif unsigned int hists__sort_list_width(struct hists *self); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a3a871f..0d41419 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -7,10 +7,52 @@ #include #include +#include "evlist.h" +#include "evsel.h" #include "session.h" #include "sort.h" #include "util.h" +static int perf_session__read_evlist(struct perf_session *session) +{ + int i, j; + + session->evlist = perf_evlist__new(NULL, NULL); + if (session->evlist == NULL) + return -ENOMEM; + + for (i = 0; i < session->header.attrs; ++i) { + struct perf_header_attr *hattr = session->header.attr[i]; + struct perf_evsel *evsel = perf_evsel__new(&hattr->attr, i); + + if (evsel == NULL) + goto out_delete_evlist; + /* + * Do it before so that if perf_evsel__alloc_id fails, this + * entry gets purged too at perf_evlist__delete(). + */ + perf_evlist__add(session->evlist, evsel); + /* + * We don't have the cpu and thread maps on the header, so + * for allocating the perf_sample_id table we fake 1 cpu and + * hattr->ids threads. + */ + if (perf_evsel__alloc_id(evsel, 1, hattr->ids)) + goto out_delete_evlist; + + for (j = 0; j < hattr->ids; ++j) + perf_evlist__id_hash(session->evlist, evsel, 0, j, + hattr->id[j]); + } + + return 0; + +out_delete_evlist: + perf_evlist__delete(session->evlist); + session->evlist = NULL; + return -ENOMEM; +} + static int perf_session__open(struct perf_session *self, bool force) { struct stat input_stat; @@ -56,6 +98,11 @@ static int perf_session__open(struct perf_session *self, bool force) goto out_close; } + if (perf_session__read_evlist(self) < 0) { + pr_err("Not enough memory to read the event selector list\n"); + goto out_close; + } + self->size = input_stat.st_size; return 0; @@ -141,7 +188,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, memcpy(self->filename, filename, len); self->threads = RB_ROOT; INIT_LIST_HEAD(&self->dead_threads); - self->hists_tree = RB_ROOT; self->last_match = NULL; /* * On 64bit we can mmap the data file in one go. No need for tiny mmap @@ -1137,3 +1183,18 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp, size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits); return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits); } + +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) +{ + struct perf_evsel *pos; + size_t ret = fprintf(fp, "Aggregated stats:\n"); + + ret += hists__fprintf_nr_events(&session->hists, fp); + + list_for_each_entry(pos, &session->evlist->entries, node) { + ret += fprintf(fp, "%s stats:\n", event_name(pos)); + ret += hists__fprintf_nr_events(&pos->hists, fp); + } + + return ret; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 977b3a1..05dd7bc 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -34,12 +34,12 @@ struct perf_session { struct thread *last_match; struct machine host_machine; struct rb_root machines; - struct rb_root hists_tree; + struct perf_evlist *evlist; /* - * FIXME: should point to the first entry in hists_tree and - * be a hists instance. Right now its only 'report' - * that is using ->hists_tree while all the rest use - * ->hists. + * FIXME: Need to split this up further, we need global + * stats + per event stats. 'perf diff' also needs + * to properly support multiple events in a single + * perf.data file. */ struct hists hists; u64 sample_type; @@ -151,11 +151,7 @@ size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp); size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp, bool with_hits); -static inline -size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp) -{ - return hists__fprintf_nr_events(&self->hists, fp); -} +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); static inline int perf_session__parse_sample(struct perf_session *session, const union perf_event *event, diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index c98e6f8..f3af4fe 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -7,6 +7,8 @@ #include #include +#include "../../evsel.h" +#include "../../evlist.h" #include "../../hist.h" #include "../../pstack.h" #include "../../sort.h" @@ -987,31 +989,33 @@ out: return key; } -int hists__tui_browse_tree(struct rb_root *self, const char *help, int evidx) +int hists__tui_browse_tree(struct perf_evlist *evlist, const char *help) { - struct rb_node *first = rb_first(self), *nd = first, *next; - int key = 0; + struct perf_evsel *pos; - while (nd) { - struct hists *hists = rb_entry(nd, struct hists, rb_node); - const char *ev_name = __event_name(hists->type, hists->config); + pos = list_entry(evlist->entries.next, struct perf_evsel, node); + while (pos) { + struct hists *hists = &pos->hists; + const char *ev_name = event_name(pos); + int key = hists__browse(hists, help, ev_name, pos->idx); - key = hists__browse(hists, help, ev_name, evidx); switch (key) { case NEWT_KEY_TAB: - next = rb_next(nd); - if (next) - nd = next; + if (pos->node.next == &evlist->entries) + pos = list_entry(evlist->entries.next, struct perf_evsel, node); + else + pos = list_entry(pos->node.next, struct perf_evsel, node); break; case NEWT_KEY_UNTAB: - if (nd == first) - continue; - nd = rb_prev(nd); + if (pos->node.prev == &evlist->entries) + pos = list_entry(evlist->entries.prev, struct perf_evsel, node); + else + pos = list_entry(pos->node.prev, struct perf_evsel, node); break; default: return key; } } - return key; + return 0; } -- cgit v0.10.2 From 7f0030b211579939461468f25b80c73e293c46e0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 6 Mar 2011 13:07:30 -0300 Subject: perf report tui: Improve multi event session support When multiple events were used in 'perf record', allow the user to choose which one is wanted before showing the per event histograms. Annotations will be performed on the chosen event. Allow going back and forth from event to event quickly using just the arrow keys and enter. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: William Cohen LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1c399ea..e9b5d51 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -222,7 +222,8 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self, return ret + fprintf(fp, "\n#\n"); } -static int hists__tty_browse_tree(struct perf_evlist *evlist, const char *help) +static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, + const char *help) { struct perf_evsel *pos; @@ -304,9 +305,9 @@ static int __cmd_report(void) } if (use_browser > 0) - hists__tui_browse_tree(session->evlist, help); + perf_evlist__tui_browse_hists(session->evlist, help); else - hists__tty_browse_tree(session->evlist, help); + perf_evlist__tty_browse_hists(session->evlist, help); out_delete: /* diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0d38b43..cb6858a 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -87,15 +87,9 @@ bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); struct perf_evlist; #ifdef NO_NEWT_SUPPORT -static inline int hists__browse(struct hists *self __used, - const char *helpline __used, - const char *ev_name __used, int evidx __used) -{ - return 0; -} - -static inline int hists__tui_browse_tree(struct perf_evlist *evlist __used, - const char *help __used) +static inline +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used, + const char *help __used) { return 0; } @@ -109,14 +103,12 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used, #define KEY_RIGHT -2 #else #include -int hists__browse(struct hists *self, const char *helpline, - const char *ev_name, int evidx); int hist_entry__tui_annotate(struct hist_entry *self, int evidx); #define KEY_LEFT NEWT_KEY_LEFT #define KEY_RIGHT NEWT_KEY_RIGHT -int hists__tui_browse_tree(struct perf_evlist *evlist, const char *help); +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help); #endif unsigned int hists__sort_list_width(struct hists *self); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index f3af4fe..798efdc 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -803,9 +803,11 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, return printed; } -int hists__browse(struct hists *self, const char *helpline, - const char *ev_name, int evidx) +static int perf_evsel__hists_browse(struct perf_evsel *evsel, + const char *helpline, const char *ev_name, + bool left_exits) { + struct hists *self = &evsel->hists; struct hist_browser *browser = hist_browser__new(self); struct pstack *fstack; const struct thread *thread_filter = NULL; @@ -878,8 +880,14 @@ int hists__browse(struct hists *self, const char *helpline, case NEWT_KEY_LEFT: { const void *top; - if (pstack__empty(fstack)) + if (pstack__empty(fstack)) { + /* + * Go back to the perf_evsel_menu__run or other user + */ + if (left_exits) + goto out_free_stack; continue; + } top = pstack__pop(fstack); if (top == &dso_filter) goto zoom_out_dso; @@ -888,7 +896,8 @@ int hists__browse(struct hists *self, const char *helpline, continue; } case NEWT_KEY_ESCAPE: - if (!ui__dialog_yesno("Do you really want to exit?")) + if (!left_exits && + !ui__dialog_yesno("Do you really want to exit?")) continue; /* Fall thru */ default: @@ -940,7 +949,7 @@ do_annotate: if (he == NULL) continue; - hist_entry__tui_annotate(he, evidx); + hist_entry__tui_annotate(he, evsel->idx); } else if (choice == browse_map) map__browse(browser->selection->map); else if (choice == zoom_dso) { @@ -989,15 +998,71 @@ out: return key; } -int hists__tui_browse_tree(struct perf_evlist *evlist, const char *help) +struct perf_evsel_menu { + struct ui_browser b; + struct perf_evsel *selection; +}; + +static void perf_evsel_menu__write(struct ui_browser *browser, + void *entry, int row) +{ + struct perf_evsel_menu *menu = container_of(browser, + struct perf_evsel_menu, b); + struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node); + bool current_entry = ui_browser__is_current_entry(browser, row); + unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE]; + const char *ev_name = event_name(evsel); + char bf[256], unit; + + ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : + HE_COLORSET_NORMAL); + + nr_events = convert_unit(nr_events, &unit); + snprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, + unit, unit == ' ' ? "" : " ", ev_name); + slsmg_write_nstring(bf, browser->width); + + if (current_entry) + menu->selection = evsel; +} + +static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help) { + int exit_keys[] = { NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, }; + struct perf_evlist *evlist = menu->b.priv; struct perf_evsel *pos; + const char *ev_name, *title = "Available samples"; + int key; - pos = list_entry(evlist->entries.next, struct perf_evsel, node); - while (pos) { - struct hists *hists = &pos->hists; - const char *ev_name = event_name(pos); - int key = hists__browse(hists, help, ev_name, pos->idx); + if (ui_browser__show(&menu->b, title, + "ESC: exit, ENTER|->: Browse histograms") < 0) + return -1; + + ui_browser__add_exit_keys(&menu->b, exit_keys); + + while (1) { + key = ui_browser__run(&menu->b); + + switch (key) { + case NEWT_KEY_RIGHT: + case NEWT_KEY_ENTER: + if (!menu->selection) + continue; + pos = menu->selection; +browse_hists: + ev_name = event_name(pos); + key = perf_evsel__hists_browse(pos, help, ev_name, true); + ui_browser__show_title(&menu->b, title); + break; + case NEWT_KEY_LEFT: + continue; + case NEWT_KEY_ESCAPE: + if (!ui__dialog_yesno("Do you really want to exit?")) + continue; + /* Fall thru */ + default: + goto out; + } switch (key) { case NEWT_KEY_TAB: @@ -1005,17 +1070,69 @@ int hists__tui_browse_tree(struct perf_evlist *evlist, const char *help) pos = list_entry(evlist->entries.next, struct perf_evsel, node); else pos = list_entry(pos->node.next, struct perf_evsel, node); - break; + goto browse_hists; case NEWT_KEY_UNTAB: if (pos->node.prev == &evlist->entries) pos = list_entry(evlist->entries.prev, struct perf_evsel, node); else pos = list_entry(pos->node.prev, struct perf_evsel, node); - break; + goto browse_hists; + case 'q': + case CTRL('c'): + goto out; default: - return key; + break; } } - return 0; +out: + ui_browser__hide(&menu->b); + return key; +} + +static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, + const char *help) +{ + struct perf_evsel *pos; + struct perf_evsel_menu menu = { + .b = { + .entries = &evlist->entries, + .refresh = ui_browser__list_head_refresh, + .seek = ui_browser__list_head_seek, + .write = perf_evsel_menu__write, + .nr_entries = evlist->nr_entries, + .priv = evlist, + }, + }; + + ui_helpline__push("Press ESC to exit"); + + list_for_each_entry(pos, &evlist->entries, node) { + const char *ev_name = event_name(pos); + size_t line_len = strlen(ev_name) + 7; + + if (menu.b.width < line_len) + menu.b.width = line_len; + /* + * Cache the evsel name, tracepoints have a _high_ cost per + * event_name() call. + */ + if (pos->name == NULL) + pos->name = strdup(ev_name); + } + + return perf_evsel_menu__run(&menu, help); +} + +int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help) +{ + + if (evlist->nr_entries == 1) { + struct perf_evsel *first = list_entry(evlist->entries.next, + struct perf_evsel, node); + const char *ev_name = event_name(first); + return perf_evsel__hists_browse(first, help, ev_name, false); + } + + return __perf_evlist__tui_browse_hists(evlist, help); } -- cgit v0.10.2 From ea7145477a461e09d8d194cac4b996dc4f449107 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Mar 2011 19:10:39 +0100 Subject: x86: Separate out entry text section Put x86 entry code into a separate link section: .entry.text. Separating the entry text section seems to have performance benefits - caused by more efficient instruction cache usage. Running hackbench with perf stat --repeat showed that the change compresses the icache footprint. The icache load miss rate went down by about 15%: before patch: 19417627 L1-icache-load-misses ( +- 0.147% ) after patch: 16490788 L1-icache-load-misses ( +- 0.180% ) The motivation of the patch was to fix a particular kprobes bug that relates to the entry text section, the performance advantage was discovered accidentally. Whole perf output follows: - results for current tip tree: Performance counter stats for './hackbench/hackbench 10' (500 runs): 19417627 L1-icache-load-misses ( +- 0.147% ) 2676914223 instructions # 0.497 IPC ( +- 0.079% ) 5389516026 cycles ( +- 0.144% ) 0.206267711 seconds time elapsed ( +- 0.138% ) - results for current tip tree with the patch applied: Performance counter stats for './hackbench/hackbench 10' (500 runs): 16490788 L1-icache-load-misses ( +- 0.180% ) 2717734941 instructions # 0.502 IPC ( +- 0.079% ) 5414756975 cycles ( +- 0.148% ) 0.206747566 seconds time elapsed ( +- 0.137% ) Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Nick Piggin Cc: Eric Dumazet Cc: masami.hiramatsu.pt@hitachi.com Cc: ananth@in.ibm.com Cc: davem@davemloft.net Cc: 2nddept-manager@sdl.hitachi.co.jp LKML-Reference: <20110307181039.GB15197@jolsa.redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 518bb99..f729b2e 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -25,6 +25,8 @@ #define sysretl_audit ia32_ret_from_sys_call #endif + .section .entry.text, "ax" + #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) .macro IA32_ARG_FIXUP noebp=0 diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c8b4efa..f5accf8 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -65,6 +65,8 @@ #define sysexit_audit syscall_exit_work #endif + .section .entry.text, "ax" + /* * We use macros for low-level operations which need to be overridden * for paravirtualization. The following will never clobber any registers: @@ -788,7 +790,7 @@ ENDPROC(ptregs_clone) */ .section .init.rodata,"a" ENTRY(interrupt) -.text +.section .entry.text, "ax" .p2align 5 .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) @@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR .endif .previous .long 1b - .text + .section .entry.text, "ax" vector=vector+1 .endif .endr diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index aed1ffb..0a0ed79 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -61,6 +61,8 @@ #define __AUDIT_ARCH_LE 0x40000000 .code64 + .section .entry.text, "ax" + #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) @@ -744,7 +746,7 @@ END(stub_rt_sigreturn) */ .section .init.rodata,"a" ENTRY(interrupt) - .text + .section .entry.text .p2align 5 .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) @@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR .endif .previous .quad 1b - .text + .section .entry.text vector=vector+1 .endif .endr diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bf47007..6d4341d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -105,6 +105,7 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT + ENTRY_TEXT IRQENTRY_TEXT *(.fixup) *(.gnu.warning) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index b3bfabc..c1a1216 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[]; extern char _end[]; extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; +extern char __entry_text_start[], __entry_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fe77e33..906c3ce 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -424,6 +424,12 @@ *(.kprobes.text) \ VMLINUX_SYMBOL(__kprobes_text_end) = .; +#define ENTRY_TEXT \ + ALIGN_FUNCTION(); \ + VMLINUX_SYMBOL(__entry_text_start) = .; \ + *(.entry.text) \ + VMLINUX_SYMBOL(__entry_text_end) = .; + #ifdef CONFIG_FUNCTION_GRAPH_TRACER #define IRQENTRY_TEXT \ ALIGN_FUNCTION(); \ -- cgit v0.10.2 From 2a8247a2600c3e087a568fc68a6ec4eedac27ef1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 21 Feb 2011 15:25:13 +0100 Subject: kprobes: Disabling optimized kprobes for entry text section You can crash the kernel (with root/admin privileges) using kprobe tracer by running: echo "p system_call_after_swapgs" > ./kprobe_events echo 1 > ./events/kprobes/enable The reason is that at the system_call_after_swapgs label, the kernel stack is not set up. If optimized kprobes are enabled, the user space stack is being used in this case (see optimized kprobe template) and this might result in a crash. There are several places like this over the entry code (entry_$BIT). As it seems there's no any reasonable/maintainable way to disable only those places where the stack is not ready, I switched off the whole entry code from kprobe optimizing. Signed-off-by: Jiri Olsa Acked-by: Masami Hiramatsu Cc: acme@redhat.com Cc: fweisbec@gmail.com Cc: ananth@in.ibm.com Cc: davem@davemloft.net Cc: a.p.zijlstra@chello.nl Cc: eric.dumazet@gmail.com Cc: 2nddept-manager@sdl.hitachi.co.jp LKML-Reference: <1298298313-5980-3-git-send-email-jolsa@redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index d91c477..c969fd9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr) if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) return 0; + /* + * Do not optimize in the entry code due to the unstable + * stack handling. + */ + if ((paddr >= (unsigned long )__entry_text_start) && + (paddr < (unsigned long )__entry_text_end)) + return 0; + /* Check there is enough space for a relative jump. */ if (size - offset < RELATIVEJUMP_SIZE) return 0; -- cgit v0.10.2 From 750912fa366312e9c5bc83eab352898a26750401 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Wed, 8 Dec 2010 13:46:47 -0800 Subject: tracing: Add an 'overwrite' trace_option. Add an "overwrite" trace_option for ftrace to control whether the buffer should be overwritten on overflow or not. The default remains to overwrite old events when the buffer is full. This patch adds the option to instead discard newest events when the buffer is full. This is useful to get a snapshot of traces just after enabling traces. Dropping the current event is also a simpler code path. Signed-off-by: David Sharp LKML-Reference: <1291844807-15481-1-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 67f1cc4..1ebc24c 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -454,6 +454,11 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6] latencies, as described in "Latency trace format". + overwrite - This controls what happens when the trace buffer is + full. If "1" (default), the oldest events are + discarded and overwritten. If "0", then the newest + events are discarded. + ftrace_enabled -------------- diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 8d3a248..ab38ac8 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -100,6 +100,8 @@ void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); +void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val); + struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length); int ring_buffer_unlock_commit(struct ring_buffer *buffer, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bd1c35a..269db80 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1429,6 +1429,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) } EXPORT_SYMBOL_GPL(ring_buffer_resize); +void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) +{ + mutex_lock(&buffer->mutex); + if (val) + buffer->flags |= RB_FL_OVERWRITE; + else + buffer->flags &= ~RB_FL_OVERWRITE; + mutex_unlock(&buffer->mutex); +} +EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); + static inline void * __rb_data_page_index(struct buffer_data_page *bpage, unsigned index) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8dc8da6..85e3ee1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -41,8 +41,6 @@ #include "trace.h" #include "trace_output.h" -#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) - /* * On boot up, the ring buffer is set to the minimum size, so that * we do not waste memory on systems that are not using tracing. @@ -340,7 +338,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | - TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD; + TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE; static int trace_stop_count; static DEFINE_SPINLOCK(tracing_start_lock); @@ -425,6 +423,7 @@ static const char *trace_options[] = { "sleep-time", "graph-time", "record-cmd", + "overwrite", NULL }; @@ -2529,6 +2528,9 @@ static void set_tracer_flags(unsigned int mask, int enabled) if (mask == TRACE_ITER_RECORD_CMD) trace_event_enable_cmd_record(enabled); + + if (mask == TRACE_ITER_OVERWRITE) + ring_buffer_change_overwrite(global_trace.buffer, enabled); } static ssize_t @@ -4555,9 +4557,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) __init static int tracer_alloc_buffers(void) { int ring_buf_size; + enum ring_buffer_flags rb_flags; int i; int ret = -ENOMEM; + if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) goto out; @@ -4570,12 +4574,13 @@ __init static int tracer_alloc_buffers(void) else ring_buf_size = 1; + rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; + cpumask_copy(tracing_buffer_mask, cpu_possible_mask); cpumask_copy(tracing_cpumask, cpu_all_mask); /* TODO: make the number of buffers hot pluggable with CPUS */ - global_trace.buffer = ring_buffer_alloc(ring_buf_size, - TRACE_BUFFER_FLAGS); + global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags); if (!global_trace.buffer) { printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); WARN_ON(1); @@ -4585,7 +4590,7 @@ __init static int tracer_alloc_buffers(void) #ifdef CONFIG_TRACER_MAX_TRACE - max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS); + max_tr.buffer = ring_buffer_alloc(1, rb_flags); if (!max_tr.buffer) { printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); WARN_ON(1); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 856e73c..951d0b7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -606,6 +606,7 @@ enum trace_iterator_flags { TRACE_ITER_SLEEP_TIME = 0x40000, TRACE_ITER_GRAPH_TIME = 0x80000, TRACE_ITER_RECORD_CMD = 0x100000, + TRACE_ITER_OVERWRITE = 0x200000, }; /* -- cgit v0.10.2 From de29be5e712dc8b7eef2bef9417af3bb6a88e47a Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 3 Dec 2010 16:13:16 -0800 Subject: ring-buffer: Remove unused #include Signed-off-by: David Sharp LKML-Reference: <1291421609-14665-3-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 269db80..3237d96 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5,7 +5,6 @@ */ #include #include -#include #include #include #include -- cgit v0.10.2 From b9a46bba88001504235459c8410f17e6a7e38008 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Mar 2011 21:13:40 +0100 Subject: perf top: Fix events overflow in top command The snprintf function returns number of printed characters even if it cross the size parameter. So passing enough events via '-e' parameter will cause segmentation fault. It's reproduced by following command: perf top -e `perf list | grep Tracepoint | awk -F'[' '\ {gsub(/[[:space:]]+/,"",$1);array[FNR]=$1}END{outputs=array[1];\ for (i=2;i<=FNR;i++){ outputs=outputs "," array[i];};print outputs}'` Attached patch is adding SNPRINTF macro that provides the overflow check and returns actuall number of printed characters. Reported-by: Han Pingtian Cc: Han Pingtian Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1299528821-17521-2-git-send-email-jolsa@redhat.com> Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 70a9c13..4f869da 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -61,6 +61,12 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) rb_insert_color(&se->rb_node, tree); } +#define SNPRINTF(buf, size, fmt, args...) \ +({ \ + size_t r = snprintf(buf, size, fmt, ## args); \ + r > size ? size : r; \ +}) + size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) { struct perf_evsel *counter; @@ -70,7 +76,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) size_t ret = 0; if (!perf_guest) { - ret = snprintf(bf, size, + ret = SNPRINTF(bf, size, " PerfTop:%8.0f irqs/sec kernel:%4.1f%%" " exact: %4.1f%% [", samples_per_sec, 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) / @@ -81,7 +87,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs; float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs; - ret = snprintf(bf, size, + ret = SNPRINTF(bf, size, " PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%" " guest kernel:%4.1f%% guest us:%4.1f%%" " exact: %4.1f%% [", samples_per_sec, @@ -101,38 +107,38 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) if (top->evlist->nr_entries == 1 || !top->display_weighted) { struct perf_evsel *first; first = list_entry(top->evlist->entries.next, struct perf_evsel, node); - ret += snprintf(bf + ret, size - ret, "%" PRIu64 "%s ", + ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", (uint64_t)first->attr.sample_period, top->freq ? "Hz" : ""); } if (!top->display_weighted) { - ret += snprintf(bf + ret, size - ret, "%s", + ret += SNPRINTF(bf + ret, size - ret, "%s", event_name(top->sym_evsel)); } else list_for_each_entry(counter, &top->evlist->entries, node) { - ret += snprintf(bf + ret, size - ret, "%s%s", + ret += SNPRINTF(bf + ret, size - ret, "%s%s", counter->idx ? "/" : "", event_name(counter)); } - ret += snprintf(bf + ret, size - ret, "], "); + ret += SNPRINTF(bf + ret, size - ret, "], "); if (top->target_pid != -1) - ret += snprintf(bf + ret, size - ret, " (target_pid: %d", + ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d", top->target_pid); else if (top->target_tid != -1) - ret += snprintf(bf + ret, size - ret, " (target_tid: %d", + ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", top->target_tid); else - ret += snprintf(bf + ret, size - ret, " (all"); + ret += SNPRINTF(bf + ret, size - ret, " (all"); if (top->cpu_list) - ret += snprintf(bf + ret, size - ret, ", CPU%s: %s)", + ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); else { if (top->target_tid != -1) - ret += snprintf(bf + ret, size - ret, ")"); + ret += SNPRINTF(bf + ret, size - ret, ")"); else - ret += snprintf(bf + ret, size - ret, ", %d CPU%s)", + ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", top->evlist->cpus->nr, top->evlist->cpus->nr > 1 ? "s" : ""); } -- cgit v0.10.2 From 6547250381eb315acff3d52b4872ad775359407c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Mar 2011 21:13:41 +0100 Subject: perf top: Don't let events to eat up whole header line Passing multiple events might force out information about pid/tid/cpu. Attached patch leaves 30 characters for this info at the expense of the events' names. Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Han Pingtian LKML-Reference: <1299528821-17521-3-git-send-email-jolsa@redhat.com> Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 4f869da..75cfe4d 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -115,9 +115,23 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) if (!top->display_weighted) { ret += SNPRINTF(bf + ret, size - ret, "%s", event_name(top->sym_evsel)); - } else list_for_each_entry(counter, &top->evlist->entries, node) { - ret += SNPRINTF(bf + ret, size - ret, "%s%s", - counter->idx ? "/" : "", event_name(counter)); + } else { + /* + * Don't let events eat all the space. Leaving 30 bytes + * for the rest should be enough. + */ + size_t last_pos = size - 30; + + list_for_each_entry(counter, &top->evlist->entries, node) { + ret += SNPRINTF(bf + ret, size - ret, "%s%s", + counter->idx ? "/" : "", + event_name(counter)); + if (ret > last_pos) { + sprintf(bf + last_pos - 3, ".."); + ret = last_pos - 1; + break; + } + } } ret += SNPRINTF(bf + ret, size - ret, "], "); -- cgit v0.10.2 From a91e5431d54f5359fccb5ec2512f252eb217707e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 10 Mar 2011 11:15:54 -0300 Subject: perf session: Use evlist/evsel for managing perf.data attributes So that we can reuse things like the id to attr lookup routine (perf_evlist__id2evsel) that uses a hash table instead of the linear lookup done in the older perf_header_attr routines, etc. Also to make evsels/evlist more pervasive an API, simplyfing using the emerging perf lib. cc: Arun Sharma Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d40a81e..6febcc1 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -31,7 +31,6 @@ #include #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -#define SID(e, x, y) xyarray__entry(e->id, x, y) enum write_mode_t { WRITE_FORCE, @@ -40,7 +39,6 @@ enum write_mode_t { static u64 user_interval = ULLONG_MAX; static u64 default_interval = 0; -static u64 sample_type; static unsigned int page_size; static unsigned int mmap_pages = 128; @@ -160,54 +158,6 @@ static void sig_atexit(void) kill(getpid(), signr); } -static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) -{ - struct perf_header_attr *h_attr; - - if (nr < session->header.attrs) { - h_attr = session->header.attr[nr]; - } else { - h_attr = perf_header_attr__new(a); - if (h_attr != NULL) - if (perf_header__add_attr(&session->header, h_attr) < 0) { - perf_header_attr__delete(h_attr); - h_attr = NULL; - } - } - - return h_attr; -} - -static void create_counter(struct perf_evsel *evsel, int cpu) -{ - struct perf_event_attr *attr = &evsel->attr; - struct perf_header_attr *h_attr; - struct perf_sample_id *sid; - int thread_index; - - for (thread_index = 0; thread_index < evsel_list->threads->nr; thread_index++) { - h_attr = get_header_attr(attr, evsel->idx); - if (h_attr == NULL) - die("nomem\n"); - - if (!file_new) { - if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { - fprintf(stderr, "incompatible append\n"); - exit(-1); - } - } - - sid = SID(evsel, cpu, thread_index); - if (perf_header_attr__add_id(h_attr, sid->id) < 0) { - pr_warning("Not enough memory to add id\n"); - exit(-1); - } - } - - if (!sample_type) - sample_type = attr->sample_type; -} - static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist) { struct perf_event_attr *attr = &evsel->attr; @@ -278,10 +228,28 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist) } } +static bool perf_evlist__equal(struct perf_evlist *evlist, + struct perf_evlist *other) +{ + struct perf_evsel *pos, *pair; + + if (evlist->nr_entries != other->nr_entries) + return false; + + pair = list_entry(other->entries.next, struct perf_evsel, node); + + list_for_each_entry(pos, &evlist->entries, node) { + if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0)) + return false; + pair = list_entry(pair->node.next, struct perf_evsel, node); + } + + return true; +} + static void open_counters(struct perf_evlist *evlist) { struct perf_evsel *pos; - int cpu; list_for_each_entry(pos, &evlist->entries, node) { struct perf_event_attr *attr = &pos->attr; @@ -364,10 +332,16 @@ try_again: if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) die("failed to mmap with %d (%s)\n", errno, strerror(errno)); - for (cpu = 0; cpu < evsel_list->cpus->nr; ++cpu) { - list_for_each_entry(pos, &evlist->entries, node) - create_counter(pos, cpu); - } + if (file_new) + session->evlist = evlist; + else { + if (!perf_evlist__equal(session->evlist, evlist)) { + fprintf(stderr, "incompatible append\n"); + exit(-1); + } + } + + perf_session__update_sample_type(session); } static int process_buildids(void) @@ -390,7 +364,7 @@ static void atexit_header(void) if (!no_buildid) process_buildids(); - perf_header__write(&session->header, evsel_list, output, true); + perf_session__write_header(session, evsel_list, output, true); perf_session__delete(session); perf_evlist__delete(evsel_list); symbol__exit(); @@ -524,7 +498,7 @@ static int __cmd_record(int argc, const char **argv) perf_header__set_feat(&session->header, HEADER_BUILD_ID); if (!file_new) { - err = perf_header__read(session, output); + err = perf_session__read_header(session, output); if (err < 0) goto out_delete_session; } @@ -588,8 +562,6 @@ static int __cmd_record(int argc, const char **argv) open_counters(evsel_list); - perf_session__set_sample_type(session, sample_type); - /* * perf_session__delete(session) will be called at atexit_header() */ @@ -600,20 +572,17 @@ static int __cmd_record(int argc, const char **argv) if (err < 0) return err; } else if (file_new) { - err = perf_header__write(&session->header, evsel_list, - output, false); + err = perf_session__write_header(session, evsel_list, + output, false); if (err < 0) return err; } post_processing_offset = lseek(output, 0, SEEK_CUR); - perf_session__set_sample_id_all(session, sample_id_all_avail); - if (pipe_output) { - err = perf_event__synthesize_attrs(&session->header, - process_synthesized_event, - session); + err = perf_session__synthesize_attrs(session, + process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); return err; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e9b5d51..b1b8200 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -70,8 +70,8 @@ static int perf_session__add_hist_entry(struct perf_session *session, * FIXME: Propagate this back, but at least we're in a builtin, * where exit() is allowed. ;-) */ - ui__warning("Invalid %s file, contains samples with id not in " - "its header!\n", input_name); + ui__warning("Invalid %s file, contains samples with id %" PRIu64 " not in " + "its header!\n", input_name, sample->id); exit_browser(0); exit(1); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 417f757..80c9e06 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -883,7 +883,6 @@ try_again: static int __cmd_top(void) { pthread_t thread; - struct perf_evsel *first; int ret __used; /* * FIXME: perf_session__new should allow passing a O_MMAP, so that all this @@ -900,8 +899,8 @@ static int __cmd_top(void) perf_event__synthesize_threads(perf_event__process, session); start_counters(top.evlist); - first = list_entry(top.evlist->entries.next, struct perf_evsel, node); - perf_session__set_sample_type(session, first->attr.sample_type); + session->evlist = top.evlist; + perf_session__update_sample_type(session); /* Wait for a minimal set of events before starting the snapshot */ poll(top.evlist->pollfd, top.evlist->nr_fds, 100); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 190c64c..d852cef 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -19,7 +19,7 @@ #include #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -#define SID(e, x, y) xyarray__entry(e->id, x, y) +#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) @@ -106,8 +106,9 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) evlist->nr_fds++; } -void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) +static void perf_evlist__id_hash(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, u64 id) { int hash; struct perf_sample_id *sid = SID(evsel, cpu, thread); @@ -118,9 +119,16 @@ void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, hlist_add_head(&sid->node, &evlist->heads[hash]); } -static int perf_evlist__id_hash_fd(struct perf_evlist *evlist, - struct perf_evsel *evsel, - int cpu, int thread, int fd) +void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, + int cpu, int thread, u64 id) +{ + perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + evsel->id[evsel->ids++] = id; +} + +static int perf_evlist__id_add_fd(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, int fd) { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ @@ -134,7 +142,7 @@ static int perf_evlist__id_hash_fd(struct perf_evlist *evlist, if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) ++id_idx; - perf_evlist__id_hash(evlist, evsel, cpu, thread, read_data[id_idx]); + perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]); return 0; } @@ -292,7 +300,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) list_for_each_entry(evsel, &evlist->entries, node) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && - evsel->id == NULL && + evsel->sample_id == NULL && perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) return -ENOMEM; @@ -308,7 +316,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) goto out_unmap; if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_hash_fd(evlist, evsel, cpu, thread, fd) < 0) + perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) goto out_unmap; } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 078d512..8b1cb7a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -38,8 +38,8 @@ void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); -void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id); +void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, + int cpu, int thread, u64 id); int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8083d51..662596a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -41,8 +41,18 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { - evsel->id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); - return evsel->id != NULL ? 0 : -ENOMEM; + evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); + if (evsel->sample_id == NULL) + return -ENOMEM; + + evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); + if (evsel->id == NULL) { + xyarray__delete(evsel->sample_id); + evsel->sample_id = NULL; + return -ENOMEM; + } + + return 0; } int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) @@ -60,7 +70,9 @@ void perf_evsel__free_fd(struct perf_evsel *evsel) void perf_evsel__free_id(struct perf_evsel *evsel) { - xyarray__delete(evsel->id); + xyarray__delete(evsel->sample_id); + evsel->sample_id = NULL; + free(evsel->id); evsel->id = NULL; } @@ -79,7 +91,8 @@ void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); xyarray__delete(evsel->fd); - xyarray__delete(evsel->id); + xyarray__delete(evsel->sample_id); + free(evsel->id); } void perf_evsel__delete(struct perf_evsel *evsel) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 281b60e..6710ab5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -49,12 +49,17 @@ struct perf_evsel { struct perf_event_attr attr; char *filter; struct xyarray *fd; - struct xyarray *id; + struct xyarray *sample_id; + u64 *id; struct perf_counts *counts; int idx; + int ids; struct hists hists; char *name; - void *priv; + union { + void *priv; + off_t id_offset; + }; struct cgroup_sel *cgrp; }; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 108b0db..40b10e4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -9,6 +9,7 @@ #include #include "evlist.h" +#include "evsel.h" #include "util.h" #include "header.h" #include "../perf.h" @@ -19,89 +20,6 @@ static bool no_buildid_cache = false; -/* - * Create new perf.data header attribute: - */ -struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr) -{ - struct perf_header_attr *self = malloc(sizeof(*self)); - - if (self != NULL) { - self->attr = *attr; - self->ids = 0; - self->size = 1; - self->id = malloc(sizeof(u64)); - if (self->id == NULL) { - free(self); - self = NULL; - } - } - - return self; -} - -void perf_header_attr__delete(struct perf_header_attr *self) -{ - free(self->id); - free(self); -} - -int perf_header_attr__add_id(struct perf_header_attr *self, u64 id) -{ - int pos = self->ids; - - self->ids++; - if (self->ids > self->size) { - int nsize = self->size * 2; - u64 *nid = realloc(self->id, nsize * sizeof(u64)); - - if (nid == NULL) - return -1; - - self->size = nsize; - self->id = nid; - } - self->id[pos] = id; - return 0; -} - -int perf_header__init(struct perf_header *self) -{ - self->size = 1; - self->attr = malloc(sizeof(void *)); - return self->attr == NULL ? -ENOMEM : 0; -} - -void perf_header__exit(struct perf_header *self) -{ - int i; - for (i = 0; i < self->attrs; ++i) - perf_header_attr__delete(self->attr[i]); - free(self->attr); -} - -int perf_header__add_attr(struct perf_header *self, - struct perf_header_attr *attr) -{ - if (self->frozen) - return -1; - - if (self->attrs == self->size) { - int nsize = self->size * 2; - struct perf_header_attr **nattr; - - nattr = realloc(self->attr, nsize * sizeof(void *)); - if (nattr == NULL) - return -1; - - self->size = nsize; - self->attr = nattr; - } - - self->attr[self->attrs++] = attr; - return 0; -} - static int event_count; static struct perf_trace_event_type *events; @@ -515,33 +433,41 @@ int perf_header__write_pipe(int fd) return 0; } -int perf_header__write(struct perf_header *self, struct perf_evlist *evlist, - int fd, bool at_exit) +int perf_session__write_header(struct perf_session *session, + struct perf_evlist *evlist, + int fd, bool at_exit) { struct perf_file_header f_header; struct perf_file_attr f_attr; - struct perf_header_attr *attr; - int i, err; + struct perf_header *self = &session->header; + struct perf_evsel *attr, *pair = NULL; + int err; lseek(fd, sizeof(f_header), SEEK_SET); - for (i = 0; i < self->attrs; i++) { - attr = self->attr[i]; + if (session->evlist != evlist) + pair = list_entry(session->evlist->entries.next, struct perf_evsel, node); + list_for_each_entry(attr, &evlist->entries, node) { attr->id_offset = lseek(fd, 0, SEEK_CUR); err = do_write(fd, attr->id, attr->ids * sizeof(u64)); if (err < 0) { +out_err_write: pr_debug("failed to write perf header\n"); return err; } + if (session->evlist != evlist) { + err = do_write(fd, pair->id, pair->ids * sizeof(u64)); + if (err < 0) + goto out_err_write; + attr->ids += pair->ids; + pair = list_entry(pair->node.next, struct perf_evsel, node); + } } - self->attr_offset = lseek(fd, 0, SEEK_CUR); - for (i = 0; i < self->attrs; i++) { - attr = self->attr[i]; - + list_for_each_entry(attr, &evlist->entries, node) { f_attr = (struct perf_file_attr){ .attr = attr->attr, .ids = { @@ -580,7 +506,7 @@ int perf_header__write(struct perf_header *self, struct perf_evlist *evlist, .attr_size = sizeof(f_attr), .attrs = { .offset = self->attr_offset, - .size = self->attrs * sizeof(f_attr), + .size = evlist->nr_entries * sizeof(f_attr), }, .data = { .offset = self->data_offset, @@ -861,7 +787,7 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) return 0; } -int perf_header__read(struct perf_session *session, int fd) +int perf_session__read_header(struct perf_session *session, int fd) { struct perf_header *self = &session->header; struct perf_file_header f_header; @@ -869,6 +795,10 @@ int perf_header__read(struct perf_session *session, int fd) u64 f_id; int nr_attrs, nr_ids, i, j; + session->evlist = perf_evlist__new(NULL, NULL); + if (session->evlist == NULL) + return -ENOMEM; + if (session->fd_pipe) return perf_header__read_pipe(session, fd); @@ -881,33 +811,39 @@ int perf_header__read(struct perf_session *session, int fd) lseek(fd, f_header.attrs.offset, SEEK_SET); for (i = 0; i < nr_attrs; i++) { - struct perf_header_attr *attr; + struct perf_evsel *evsel; off_t tmp; if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr))) goto out_errno; tmp = lseek(fd, 0, SEEK_CUR); + evsel = perf_evsel__new(&f_attr.attr, i); - attr = perf_header_attr__new(&f_attr.attr); - if (attr == NULL) - return -ENOMEM; + if (evsel == NULL) + goto out_delete_evlist; + /* + * Do it before so that if perf_evsel__alloc_id fails, this + * entry gets purged too at perf_evlist__delete(). + */ + perf_evlist__add(session->evlist, evsel); nr_ids = f_attr.ids.size / sizeof(u64); + /* + * We don't have the cpu and thread maps on the header, so + * for allocating the perf_sample_id table we fake 1 cpu and + * hattr->ids threads. + */ + if (perf_evsel__alloc_id(evsel, 1, nr_ids)) + goto out_delete_evlist; + lseek(fd, f_attr.ids.offset, SEEK_SET); for (j = 0; j < nr_ids; j++) { if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id))) goto out_errno; - if (perf_header_attr__add_id(attr, f_id) < 0) { - perf_header_attr__delete(attr); - return -ENOMEM; - } - } - if (perf_header__add_attr(self, attr) < 0) { - perf_header_attr__delete(attr); - return -ENOMEM; + perf_evlist__id_add(session->evlist, evsel, 0, j, f_id); } lseek(fd, tmp, SEEK_SET); @@ -932,37 +868,38 @@ int perf_header__read(struct perf_session *session, int fd) return 0; out_errno: return -errno; + +out_delete_evlist: + perf_evlist__delete(session->evlist); + session->evlist = NULL; + return -ENOMEM; } -u64 perf_header__sample_type(struct perf_header *header) +u64 perf_evlist__sample_type(struct perf_evlist *evlist) { + struct perf_evsel *pos; u64 type = 0; - int i; - - for (i = 0; i < header->attrs; i++) { - struct perf_header_attr *attr = header->attr[i]; + list_for_each_entry(pos, &evlist->entries, node) { if (!type) - type = attr->attr.sample_type; - else if (type != attr->attr.sample_type) + type = pos->attr.sample_type; + else if (type != pos->attr.sample_type) die("non matching sample_type"); } return type; } -bool perf_header__sample_id_all(const struct perf_header *header) +bool perf_evlist__sample_id_all(const struct perf_evlist *evlist) { bool value = false, first = true; - int i; - - for (i = 0; i < header->attrs; i++) { - struct perf_header_attr *attr = header->attr[i]; + struct perf_evsel *pos; + list_for_each_entry(pos, &evlist->entries, node) { if (first) { - value = attr->attr.sample_id_all; + value = pos->attr.sample_id_all; first = false; - } else if (value != attr->attr.sample_id_all) + } else if (value != pos->attr.sample_id_all) die("non matching sample_id_all"); } @@ -1000,16 +937,13 @@ int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, return err; } -int perf_event__synthesize_attrs(struct perf_header *self, - perf_event__handler_t process, - struct perf_session *session) +int perf_session__synthesize_attrs(struct perf_session *session, + perf_event__handler_t process) { - struct perf_header_attr *attr; - int i, err = 0; - - for (i = 0; i < self->attrs; i++) { - attr = self->attr[i]; + struct perf_evsel *attr; + int err = 0; + list_for_each_entry(attr, &session->evlist->entries, node) { err = perf_event__synthesize_attr(&attr->attr, attr->ids, attr->id, process, session); if (err) { @@ -1024,27 +958,36 @@ int perf_event__synthesize_attrs(struct perf_header *self, int perf_event__process_attr(union perf_event *event, struct perf_session *session) { - struct perf_header_attr *attr; unsigned int i, ids, n_ids; + struct perf_evsel *evsel; - attr = perf_header_attr__new(&event->attr.attr); - if (attr == NULL) + if (session->evlist == NULL) { + session->evlist = perf_evlist__new(NULL, NULL); + if (session->evlist == NULL) + return -ENOMEM; + } + + evsel = perf_evsel__new(&event->attr.attr, + session->evlist->nr_entries); + if (evsel == NULL) return -ENOMEM; + perf_evlist__add(session->evlist, evsel); + ids = event->header.size; ids -= (void *)&event->attr.id - (void *)event; n_ids = ids / sizeof(u64); + /* + * We don't have the cpu and thread maps on the header, so + * for allocating the perf_sample_id table we fake 1 cpu and + * hattr->ids threads. + */ + if (perf_evsel__alloc_id(evsel, 1, n_ids)) + return -ENOMEM; for (i = 0; i < n_ids; i++) { - if (perf_header_attr__add_id(attr, event->attr.id[i]) < 0) { - perf_header_attr__delete(attr); - return -ENOMEM; - } - } - - if (perf_header__add_attr(&session->header, attr) < 0) { - perf_header_attr__delete(attr); - return -ENOMEM; + perf_evlist__id_add(session->evlist, evsel, 0, i, + event->attr.id[i]); } perf_session__update_sample_type(session); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2fab133..4cc2675 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -9,13 +9,6 @@ #include -struct perf_header_attr { - struct perf_event_attr attr; - int ids, size; - u64 *id; - off_t id_offset; -}; - enum { HEADER_TRACE_INFO = 1, HEADER_BUILD_ID, @@ -51,9 +44,7 @@ int perf_file_header__read(struct perf_file_header *self, struct perf_header { int frozen; - int attrs, size; bool needs_swap; - struct perf_header_attr **attr; s64 attr_offset; u64 data_offset; u64 data_size; @@ -62,29 +53,19 @@ struct perf_header { DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; -int perf_header__init(struct perf_header *self); -void perf_header__exit(struct perf_header *self); - struct perf_evlist; -int perf_header__read(struct perf_session *session, int fd); -int perf_header__write(struct perf_header *self, struct perf_evlist *evlist, - int fd, bool at_exit); +int perf_session__read_header(struct perf_session *session, int fd); +int perf_session__write_header(struct perf_session *session, + struct perf_evlist *evlist, + int fd, bool at_exit); int perf_header__write_pipe(int fd); -int perf_header__add_attr(struct perf_header *self, - struct perf_header_attr *attr); - int perf_header__push_event(u64 id, const char *name); char *perf_header__find_event(u64 id); -struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); -void perf_header_attr__delete(struct perf_header_attr *self); - -int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); - -u64 perf_header__sample_type(struct perf_header *header); -bool perf_header__sample_id_all(const struct perf_header *header); +u64 perf_evlist__sample_type(struct perf_evlist *evlist); +bool perf_evlist__sample_id_all(const struct perf_evlist *evlist); void perf_header__set_feat(struct perf_header *self, int feat); void perf_header__clear_feat(struct perf_header *self, int feat); bool perf_header__has_feat(const struct perf_header *self, int feat); @@ -101,9 +82,8 @@ int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, perf_event__handler_t process, struct perf_session *session); -int perf_event__synthesize_attrs(struct perf_header *self, - perf_event__handler_t process, - struct perf_session *session); +int perf_session__synthesize_attrs(struct perf_session *session, + perf_event__handler_t process); int perf_event__process_attr(union perf_event *event, struct perf_session *session); int perf_event__synthesize_event_type(u64 event_id, char *name, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0d41419..f26639f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -13,46 +13,6 @@ #include "sort.h" #include "util.h" -static int perf_session__read_evlist(struct perf_session *session) -{ - int i, j; - - session->evlist = perf_evlist__new(NULL, NULL); - if (session->evlist == NULL) - return -ENOMEM; - - for (i = 0; i < session->header.attrs; ++i) { - struct perf_header_attr *hattr = session->header.attr[i]; - struct perf_evsel *evsel = perf_evsel__new(&hattr->attr, i); - - if (evsel == NULL) - goto out_delete_evlist; - /* - * Do it before so that if perf_evsel__alloc_id fails, this - * entry gets purged too at perf_evlist__delete(). - */ - perf_evlist__add(session->evlist, evsel); - /* - * We don't have the cpu and thread maps on the header, so - * for allocating the perf_sample_id table we fake 1 cpu and - * hattr->ids threads. - */ - if (perf_evsel__alloc_id(evsel, 1, hattr->ids)) - goto out_delete_evlist; - - for (j = 0; j < hattr->ids; ++j) - perf_evlist__id_hash(session->evlist, evsel, 0, j, - hattr->id[j]); - } - - return 0; - -out_delete_evlist: - perf_evlist__delete(session->evlist); - session->evlist = NULL; - return -ENOMEM; -} - static int perf_session__open(struct perf_session *self, bool force) { struct stat input_stat; @@ -61,7 +21,7 @@ static int perf_session__open(struct perf_session *self, bool force) self->fd_pipe = true; self->fd = STDIN_FILENO; - if (perf_header__read(self, self->fd) < 0) + if (perf_session__read_header(self, self->fd) < 0) pr_err("incompatible file format"); return 0; @@ -93,16 +53,11 @@ static int perf_session__open(struct perf_session *self, bool force) goto out_close; } - if (perf_header__read(self, self->fd) < 0) { + if (perf_session__read_header(self, self->fd) < 0) { pr_err("incompatible file format"); goto out_close; } - if (perf_session__read_evlist(self) < 0) { - pr_err("Not enough memory to read the event selector list\n"); - goto out_close; - } - self->size = input_stat.st_size; return 0; @@ -139,21 +94,10 @@ out: session->id_hdr_size = size; } -void perf_session__set_sample_id_all(struct perf_session *session, bool value) -{ - session->sample_id_all = value; - perf_session__id_header_size(session); -} - -void perf_session__set_sample_type(struct perf_session *session, u64 type) -{ - session->sample_type = type; -} - void perf_session__update_sample_type(struct perf_session *self) { - self->sample_type = perf_header__sample_type(&self->header); - self->sample_id_all = perf_header__sample_id_all(&self->header); + self->sample_type = perf_evlist__sample_type(self->evlist); + self->sample_id_all = perf_evlist__sample_id_all(self->evlist); perf_session__id_header_size(self); } @@ -182,9 +126,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, if (self == NULL) goto out; - if (perf_header__init(&self->header) < 0) - goto out_free; - memcpy(self->filename, filename, len); self->threads = RB_ROOT; INIT_LIST_HEAD(&self->dead_threads); @@ -208,6 +149,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, if (mode == O_RDONLY) { if (perf_session__open(self, force) < 0) goto out_delete; + perf_session__update_sample_type(self); } else if (mode == O_WRONLY) { /* * In O_RDONLY mode this will be performed when reading the @@ -217,8 +159,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, goto out_delete; } - perf_session__update_sample_type(self); - if (ops && ops->ordering_requires_timestamps && ops->ordered_samples && !self->sample_id_all) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); @@ -227,9 +167,6 @@ struct perf_session *perf_session__new(const char *filename, int mode, out: return self; -out_free: - free(self); - return NULL; out_delete: perf_session__delete(self); return NULL; @@ -260,7 +197,6 @@ static void perf_session__delete_threads(struct perf_session *self) void perf_session__delete(struct perf_session *self) { - perf_header__exit(&self->header); perf_session__destroy_kernel_maps(self); perf_session__delete_dead_threads(self); perf_session__delete_threads(self); diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 05dd7bc..b5b148b 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -112,8 +112,6 @@ void mem_bswap_64(void *src, int byte_size); int perf_session__create_kernel_maps(struct perf_session *self); void perf_session__update_sample_type(struct perf_session *self); -void perf_session__set_sample_id_all(struct perf_session *session, bool value); -void perf_session__set_sample_type(struct perf_session *session, u64 type); void perf_session__remove_thread(struct perf_session *self, struct thread *th); static inline -- cgit v0.10.2 From 1c0b04d10bbe35279c50e3b36cf5b8ec2a0050d8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Mar 2011 08:13:19 -0300 Subject: perf header: Stop using 'self' Stop using this python/OOP convention, doesn't really helps. Will do more from time to time till we get it cleaned up in all of tools/perf. Suggested-by: Thomas Gleixner LKML-Reference: Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tom Zanussi Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 40b10e4..5a72d42 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -66,19 +66,19 @@ struct perf_file_attr { struct perf_file_section ids; }; -void perf_header__set_feat(struct perf_header *self, int feat) +void perf_header__set_feat(struct perf_header *header, int feat) { - set_bit(feat, self->adds_features); + set_bit(feat, header->adds_features); } -void perf_header__clear_feat(struct perf_header *self, int feat) +void perf_header__clear_feat(struct perf_header *header, int feat) { - clear_bit(feat, self->adds_features); + clear_bit(feat, header->adds_features); } -bool perf_header__has_feat(const struct perf_header *self, int feat) +bool perf_header__has_feat(const struct perf_header *header, int feat) { - return test_bit(feat, self->adds_features); + return test_bit(feat, header->adds_features); } static int do_write(int fd, const void *buf, size_t size) @@ -147,22 +147,22 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, return 0; } -static int machine__write_buildid_table(struct machine *self, int fd) +static int machine__write_buildid_table(struct machine *machine, int fd) { int err; u16 kmisc = PERF_RECORD_MISC_KERNEL, umisc = PERF_RECORD_MISC_USER; - if (!machine__is_host(self)) { + if (!machine__is_host(machine)) { kmisc = PERF_RECORD_MISC_GUEST_KERNEL; umisc = PERF_RECORD_MISC_GUEST_USER; } - err = __dsos__write_buildid_table(&self->kernel_dsos, self->pid, + err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid, kmisc, fd); if (err == 0) - err = __dsos__write_buildid_table(&self->user_dsos, - self->pid, umisc, fd); + err = __dsos__write_buildid_table(&machine->user_dsos, + machine->pid, umisc, fd); return err; } @@ -280,12 +280,12 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *self, const char *debugdir) +static int dso__cache_build_id(struct dso *dso, const char *debugdir) { - bool is_kallsyms = self->kernel && self->long_name[0] != '/'; + bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; - return build_id_cache__add_b(self->build_id, sizeof(self->build_id), - self->long_name, debugdir, is_kallsyms); + return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), + dso->long_name, debugdir, is_kallsyms); } static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) @@ -300,14 +300,14 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) return err; } -static int machine__cache_build_ids(struct machine *self, const char *debugdir) +static int machine__cache_build_ids(struct machine *machine, const char *debugdir) { - int ret = __dsos__cache_build_ids(&self->kernel_dsos, debugdir); - ret |= __dsos__cache_build_ids(&self->user_dsos, debugdir); + int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir); + ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir); return ret; } -static int perf_session__cache_build_ids(struct perf_session *self) +static int perf_session__cache_build_ids(struct perf_session *session) { struct rb_node *nd; int ret; @@ -318,28 +318,28 @@ static int perf_session__cache_build_ids(struct perf_session *self) if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&self->host_machine, debugdir); + ret = machine__cache_build_ids(&session->host_machine, debugdir); - for (nd = rb_first(&self->machines); nd; nd = rb_next(nd)) { + for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret |= machine__cache_build_ids(pos, debugdir); } return ret ? -1 : 0; } -static bool machine__read_build_ids(struct machine *self, bool with_hits) +static bool machine__read_build_ids(struct machine *machine, bool with_hits) { - bool ret = __dsos__read_build_ids(&self->kernel_dsos, with_hits); - ret |= __dsos__read_build_ids(&self->user_dsos, with_hits); + bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits); + ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits); return ret; } -static bool perf_session__read_build_ids(struct perf_session *self, bool with_hits) +static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) { struct rb_node *nd; - bool ret = machine__read_build_ids(&self->host_machine, with_hits); + bool ret = machine__read_build_ids(&session->host_machine, with_hits); - for (nd = rb_first(&self->machines); nd; nd = rb_next(nd)) { + for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret |= machine__read_build_ids(pos, with_hits); } @@ -347,7 +347,7 @@ static bool perf_session__read_build_ids(struct perf_session *self, bool with_hi return ret; } -static int perf_header__adds_write(struct perf_header *self, +static int perf_header__adds_write(struct perf_header *header, struct perf_evlist *evlist, int fd) { int nr_sections; @@ -357,13 +357,13 @@ static int perf_header__adds_write(struct perf_header *self, u64 sec_start; int idx = 0, err; - session = container_of(self, struct perf_session, header); + session = container_of(header, struct perf_session, header); - if (perf_header__has_feat(self, HEADER_BUILD_ID && + if (perf_header__has_feat(header, HEADER_BUILD_ID && !perf_session__read_build_ids(session, true))) - perf_header__clear_feat(self, HEADER_BUILD_ID); + perf_header__clear_feat(header, HEADER_BUILD_ID); - nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); + nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); if (!nr_sections) return 0; @@ -373,10 +373,10 @@ static int perf_header__adds_write(struct perf_header *self, sec_size = sizeof(*feat_sec) * nr_sections; - sec_start = self->data_offset + self->data_size; + sec_start = header->data_offset + header->data_size; lseek(fd, sec_start + sec_size, SEEK_SET); - if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { + if (perf_header__has_feat(header, HEADER_TRACE_INFO)) { struct perf_file_section *trace_sec; trace_sec = &feat_sec[idx++]; @@ -387,14 +387,14 @@ static int perf_header__adds_write(struct perf_header *self, trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; } - if (perf_header__has_feat(self, HEADER_BUILD_ID)) { + if (perf_header__has_feat(header, HEADER_BUILD_ID)) { struct perf_file_section *buildid_sec; buildid_sec = &feat_sec[idx++]; /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); - err = dsos__write_buildid_table(self, fd); + err = dsos__write_buildid_table(header, fd); if (err < 0) { pr_debug("failed to write buildid table\n"); goto out_free; @@ -439,7 +439,7 @@ int perf_session__write_header(struct perf_session *session, { struct perf_file_header f_header; struct perf_file_attr f_attr; - struct perf_header *self = &session->header; + struct perf_header *header = &session->header; struct perf_evsel *attr, *pair = NULL; int err; @@ -465,7 +465,7 @@ out_err_write: } } - self->attr_offset = lseek(fd, 0, SEEK_CUR); + header->attr_offset = lseek(fd, 0, SEEK_CUR); list_for_each_entry(attr, &evlist->entries, node) { f_attr = (struct perf_file_attr){ @@ -482,20 +482,20 @@ out_err_write: } } - self->event_offset = lseek(fd, 0, SEEK_CUR); - self->event_size = event_count * sizeof(struct perf_trace_event_type); + header->event_offset = lseek(fd, 0, SEEK_CUR); + header->event_size = event_count * sizeof(struct perf_trace_event_type); if (events) { - err = do_write(fd, events, self->event_size); + err = do_write(fd, events, header->event_size); if (err < 0) { pr_debug("failed to write perf header events\n"); return err; } } - self->data_offset = lseek(fd, 0, SEEK_CUR); + header->data_offset = lseek(fd, 0, SEEK_CUR); if (at_exit) { - err = perf_header__adds_write(self, evlist, fd); + err = perf_header__adds_write(header, evlist, fd); if (err < 0) return err; } @@ -505,20 +505,20 @@ out_err_write: .size = sizeof(f_header), .attr_size = sizeof(f_attr), .attrs = { - .offset = self->attr_offset, + .offset = header->attr_offset, .size = evlist->nr_entries * sizeof(f_attr), }, .data = { - .offset = self->data_offset, - .size = self->data_size, + .offset = header->data_offset, + .size = header->data_size, }, .event_types = { - .offset = self->event_offset, - .size = self->event_size, + .offset = header->event_offset, + .size = header->event_size, }, }; - memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); + memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features)); lseek(fd, 0, SEEK_SET); err = do_write(fd, &f_header, sizeof(f_header)); @@ -526,26 +526,26 @@ out_err_write: pr_debug("failed to write perf header\n"); return err; } - lseek(fd, self->data_offset + self->data_size, SEEK_SET); + lseek(fd, header->data_offset + header->data_size, SEEK_SET); - self->frozen = 1; + header->frozen = 1; return 0; } -static int perf_header__getbuffer64(struct perf_header *self, +static int perf_header__getbuffer64(struct perf_header *header, int fd, void *buf, size_t size) { if (readn(fd, buf, size) <= 0) return -1; - if (self->needs_swap) + if (header->needs_swap) mem_bswap_64(buf, size); return 0; } -int perf_header__process_sections(struct perf_header *self, int fd, - int (*process)(struct perf_file_section *self, +int perf_header__process_sections(struct perf_header *header, int fd, + int (*process)(struct perf_file_section *section, struct perf_header *ph, int feat, int fd)) { @@ -555,7 +555,7 @@ int perf_header__process_sections(struct perf_header *self, int fd, int idx = 0; int err = -1, feat = 1; - nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); + nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); if (!nr_sections) return 0; @@ -565,17 +565,17 @@ int perf_header__process_sections(struct perf_header *self, int fd, sec_size = sizeof(*feat_sec) * nr_sections; - lseek(fd, self->data_offset + self->data_size, SEEK_SET); + lseek(fd, header->data_offset + header->data_size, SEEK_SET); - if (perf_header__getbuffer64(self, fd, feat_sec, sec_size)) + if (perf_header__getbuffer64(header, fd, feat_sec, sec_size)) goto out_free; err = 0; while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { - if (perf_header__has_feat(self, feat)) { + if (perf_header__has_feat(header, feat)) { struct perf_file_section *sec = &feat_sec[idx++]; - err = process(sec, self, feat, fd); + err = process(sec, header, feat, fd); if (err < 0) break; } @@ -586,35 +586,35 @@ out_free: return err; } -int perf_file_header__read(struct perf_file_header *self, +int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd) { lseek(fd, 0, SEEK_SET); - if (readn(fd, self, sizeof(*self)) <= 0 || - memcmp(&self->magic, __perf_magic, sizeof(self->magic))) + if (readn(fd, header, sizeof(*header)) <= 0 || + memcmp(&header->magic, __perf_magic, sizeof(header->magic))) return -1; - if (self->attr_size != sizeof(struct perf_file_attr)) { - u64 attr_size = bswap_64(self->attr_size); + if (header->attr_size != sizeof(struct perf_file_attr)) { + u64 attr_size = bswap_64(header->attr_size); if (attr_size != sizeof(struct perf_file_attr)) return -1; - mem_bswap_64(self, offsetof(struct perf_file_header, + mem_bswap_64(header, offsetof(struct perf_file_header, adds_features)); ph->needs_swap = true; } - if (self->size != sizeof(*self)) { + if (header->size != sizeof(*header)) { /* Support the previous format */ - if (self->size == offsetof(typeof(*self), adds_features)) - bitmap_zero(self->adds_features, HEADER_FEAT_BITS); + if (header->size == offsetof(typeof(*header), adds_features)) + bitmap_zero(header->adds_features, HEADER_FEAT_BITS); else return -1; } - memcpy(&ph->adds_features, &self->adds_features, + memcpy(&ph->adds_features, &header->adds_features, sizeof(ph->adds_features)); /* * FIXME: hack that assumes that if we need swap the perf.data file @@ -628,10 +628,10 @@ int perf_file_header__read(struct perf_file_header *self, perf_header__set_feat(ph, HEADER_BUILD_ID); } - ph->event_offset = self->event_types.offset; - ph->event_size = self->event_types.size; - ph->data_offset = self->data.offset; - ph->data_size = self->data.size; + ph->event_offset = header->event_types.offset; + ph->event_size = header->event_types.size; + ph->data_offset = header->data.offset; + ph->data_size = header->data.size; return 0; } @@ -690,11 +690,10 @@ out: return err; } -static int perf_header__read_build_ids(struct perf_header *self, - int input, u64 offset, u64 size) +static int perf_header__read_build_ids(struct perf_header *header, + int input, u64 offset, u64 size) { - struct perf_session *session = container_of(self, - struct perf_session, header); + struct perf_session *session = container_of(header, struct perf_session, header); struct build_id_event bev; char filename[PATH_MAX]; u64 limit = offset + size; @@ -706,7 +705,7 @@ static int perf_header__read_build_ids(struct perf_header *self, if (read(input, &bev, sizeof(bev)) != sizeof(bev)) goto out; - if (self->needs_swap) + if (header->needs_swap) perf_event_header__bswap(&bev.header); len = bev.header.size - sizeof(bev); @@ -722,13 +721,13 @@ out: return err; } -static int perf_file_section__process(struct perf_file_section *self, +static int perf_file_section__process(struct perf_file_section *section, struct perf_header *ph, int feat, int fd) { - if (lseek(fd, self->offset, SEEK_SET) == (off_t)-1) { + if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) { pr_debug("Failed to lseek to %" PRIu64 " offset for feature " - "%d, continuing...\n", self->offset, feat); + "%d, continuing...\n", section->offset, feat); return 0; } @@ -738,7 +737,7 @@ static int perf_file_section__process(struct perf_file_section *self, break; case HEADER_BUILD_ID: - if (perf_header__read_build_ids(ph, fd, self->offset, self->size)) + if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) pr_debug("Failed to read buildids, continuing...\n"); break; default: @@ -748,21 +747,21 @@ static int perf_file_section__process(struct perf_file_section *self, return 0; } -static int perf_file_header__read_pipe(struct perf_pipe_file_header *self, +static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, struct perf_header *ph, int fd, bool repipe) { - if (readn(fd, self, sizeof(*self)) <= 0 || - memcmp(&self->magic, __perf_magic, sizeof(self->magic))) + if (readn(fd, header, sizeof(*header)) <= 0 || + memcmp(&header->magic, __perf_magic, sizeof(header->magic))) return -1; - if (repipe && do_write(STDOUT_FILENO, self, sizeof(*self)) < 0) + if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) return -1; - if (self->size != sizeof(*self)) { - u64 size = bswap_64(self->size); + if (header->size != sizeof(*header)) { + u64 size = bswap_64(header->size); - if (size != sizeof(*self)) + if (size != sizeof(*header)) return -1; ph->needs_swap = true; @@ -773,10 +772,10 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *self, static int perf_header__read_pipe(struct perf_session *session, int fd) { - struct perf_header *self = &session->header; + struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; - if (perf_file_header__read_pipe(&f_header, self, fd, + if (perf_file_header__read_pipe(&f_header, header, fd, session->repipe) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; @@ -789,7 +788,7 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) int perf_session__read_header(struct perf_session *session, int fd) { - struct perf_header *self = &session->header; + struct perf_header *header = &session->header; struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; @@ -802,7 +801,7 @@ int perf_session__read_header(struct perf_session *session, int fd) if (session->fd_pipe) return perf_header__read_pipe(session, fd); - if (perf_file_header__read(&f_header, self, fd) < 0) { + if (perf_file_header__read(&f_header, header, fd) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; } @@ -814,7 +813,7 @@ int perf_session__read_header(struct perf_session *session, int fd) struct perf_evsel *evsel; off_t tmp; - if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr))) + if (perf_header__getbuffer64(header, fd, &f_attr, sizeof(f_attr))) goto out_errno; tmp = lseek(fd, 0, SEEK_CUR); @@ -840,7 +839,7 @@ int perf_session__read_header(struct perf_session *session, int fd) lseek(fd, f_attr.ids.offset, SEEK_SET); for (j = 0; j < nr_ids; j++) { - if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id))) + if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id))) goto out_errno; perf_evlist__id_add(session->evlist, evsel, 0, j, f_id); @@ -854,17 +853,17 @@ int perf_session__read_header(struct perf_session *session, int fd) events = malloc(f_header.event_types.size); if (events == NULL) return -ENOMEM; - if (perf_header__getbuffer64(self, fd, events, + if (perf_header__getbuffer64(header, fd, events, f_header.event_types.size)) goto out_errno; event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } - perf_header__process_sections(self, fd, perf_file_section__process); + perf_header__process_sections(header, fd, perf_file_section__process); - lseek(fd, self->data_offset, SEEK_SET); + lseek(fd, header->data_offset, SEEK_SET); - self->frozen = 1; + header->frozen = 1; return 0; out_errno: return -errno; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 4cc2675..456661d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -39,7 +39,7 @@ struct perf_pipe_file_header { struct perf_header; -int perf_file_header__read(struct perf_file_header *self, +int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); struct perf_header { @@ -66,12 +66,12 @@ char *perf_header__find_event(u64 id); u64 perf_evlist__sample_type(struct perf_evlist *evlist); bool perf_evlist__sample_id_all(const struct perf_evlist *evlist); -void perf_header__set_feat(struct perf_header *self, int feat); -void perf_header__clear_feat(struct perf_header *self, int feat); -bool perf_header__has_feat(const struct perf_header *self, int feat); +void perf_header__set_feat(struct perf_header *header, int feat); +void perf_header__clear_feat(struct perf_header *header, int feat); +bool perf_header__has_feat(const struct perf_header *header, int feat); -int perf_header__process_sections(struct perf_header *self, int fd, - int (*process)(struct perf_file_section *self, +int perf_header__process_sections(struct perf_header *header, int fd, + int (*process)(struct perf_file_section *section, struct perf_header *ph, int feat, int fd)); -- cgit v0.10.2 From e6e1e2593592a8f6f6380496655d8c6f67431266 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Mar 2011 10:41:56 -0500 Subject: tracing: Remove lock_depth from event entry The lock_depth field in the event headers was added as a temporary data point for help in removing the BKL. Now that the BKL is pretty much been removed, we can remove this field. This in turn changes the header from 12 bytes to 8 bytes, removing the 4 byte buffer that gcc would insert if the first field in the data load was 8 bytes in size. Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 1a99e79..22b32af 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -37,7 +37,6 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; - int lock_depth; }; #define FTRACE_MAX_EVENT \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 85e3ee1..fd6e1b9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1101,7 +1101,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->lock_depth = (tsk) ? tsk->lock_depth : 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -1748,10 +1747,9 @@ static void print_lat_help_header(struct seq_file *m) seq_puts(m, "# | / _----=> need-resched \n"); seq_puts(m, "# || / _---=> hardirq/softirq \n"); seq_puts(m, "# ||| / _--=> preempt-depth \n"); - seq_puts(m, "# |||| /_--=> lock-depth \n"); - seq_puts(m, "# |||||/ delay \n"); - seq_puts(m, "# cmd pid |||||| time | caller \n"); - seq_puts(m, "# \\ / |||||| \\ | / \n"); + seq_puts(m, "# |||| / delay \n"); + seq_puts(m, "# cmd pid ||||| time | caller \n"); + seq_puts(m, "# \\ / ||||| \\ | / \n"); } static void print_func_help_header(struct seq_file *m) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5f499e04..e1d579b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -116,7 +116,6 @@ static int trace_define_common_fields(void) __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); - __common_field(int, lock_depth); return ret; } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 02272ba..151f32e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -529,7 +529,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) * @entry: The trace entry field from the ring buffer * * Prints the generic fields of irqs off, in hard or softirq, preempt - * count and lock depth. + * count. */ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { @@ -554,13 +554,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) else ret = trace_seq_putc(s, '.'); - if (!ret) - return 0; - - if (entry->lock_depth < 0) - return trace_seq_putc(s, '.'); - - return trace_seq_printf(s, "%d", entry->lock_depth); + return ret; } static int -- cgit v0.10.2 From 140e4f2d1cd816aed196705c036763313c0e4bd3 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 3 Dec 2010 16:13:19 -0800 Subject: tracing: Fix event alignment: ftrace:context_switch and ftrace:wakeup Signed-off-by: David Sharp LKML-Reference: <1291421609-14665-6-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 6cf2237..1516cb3 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -109,12 +109,12 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry, */ #define FTRACE_CTX_FIELDS \ __field( unsigned int, prev_pid ) \ + __field( unsigned int, next_pid ) \ + __field( unsigned int, next_cpu ) \ __field( unsigned char, prev_prio ) \ __field( unsigned char, prev_state ) \ - __field( unsigned int, next_pid ) \ __field( unsigned char, next_prio ) \ - __field( unsigned char, next_state ) \ - __field( unsigned int, next_cpu ) + __field( unsigned char, next_state ) FTRACE_ENTRY(context_switch, ctx_switch_entry, -- cgit v0.10.2 From b5e3008e489f5a00c6d5db914a4c4338c9ef5e8b Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 3 Dec 2010 16:13:20 -0800 Subject: tracing: Fix event alignment: module:module_request Acked-by: Li Zefan Signed-off-by: David Sharp LKML-Reference: <1291421609-14665-7-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt diff --git a/include/trace/events/module.h b/include/trace/events/module.h index c6bae36..21a546d 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -108,14 +108,14 @@ TRACE_EVENT(module_request, TP_ARGS(name, wait, ip), TP_STRUCT__entry( - __field( bool, wait ) __field( unsigned long, ip ) + __field( bool, wait ) __string( name, name ) ), TP_fast_assign( - __entry->wait = wait; __entry->ip = ip; + __entry->wait = wait; __assign_str(name, name); ), @@ -129,4 +129,3 @@ TRACE_EVENT(module_request, /* This part must be outside protection */ #include - -- cgit v0.10.2 From d5bf2ff07230a4a1b73ecb22363f77c02e1d85ab Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 3 Dec 2010 16:13:21 -0800 Subject: tracing: Fix event alignment: kvm:kvm_hv_hypercall Acked-by: Avi Kivity Signed-off-by: David Sharp LKML-Reference: <1291421609-14665-8-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 1357d7c..db93276 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -62,21 +62,21 @@ TRACE_EVENT(kvm_hv_hypercall, TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), TP_STRUCT__entry( - __field( __u16, code ) - __field( bool, fast ) __field( __u16, rep_cnt ) __field( __u16, rep_idx ) __field( __u64, ingpa ) __field( __u64, outgpa ) + __field( __u16, code ) + __field( bool, fast ) ), TP_fast_assign( - __entry->code = code; - __entry->fast = fast; __entry->rep_cnt = rep_cnt; __entry->rep_idx = rep_idx; __entry->ingpa = ingpa; __entry->outgpa = outgpa; + __entry->code = code; + __entry->fast = fast; ), TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", -- cgit v0.10.2 From ad440ad66f1617194738bf674dfe2d38978ac54d Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 3 Dec 2010 16:13:22 -0800 Subject: tracing: Fix event alignment: mce:mce_record Signed-off-by: David Sharp LKML-Reference: <1291421609-14665-9-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 7eee778..4cbbcef 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -17,36 +17,36 @@ TRACE_EVENT(mce_record, TP_STRUCT__entry( __field( u64, mcgcap ) __field( u64, mcgstatus ) - __field( u8, bank ) __field( u64, status ) __field( u64, addr ) __field( u64, misc ) __field( u64, ip ) - __field( u8, cs ) __field( u64, tsc ) __field( u64, walltime ) __field( u32, cpu ) __field( u32, cpuid ) __field( u32, apicid ) __field( u32, socketid ) + __field( u8, cs ) + __field( u8, bank ) __field( u8, cpuvendor ) ), TP_fast_assign( __entry->mcgcap = m->mcgcap; __entry->mcgstatus = m->mcgstatus; - __entry->bank = m->bank; __entry->status = m->status; __entry->addr = m->addr; __entry->misc = m->misc; __entry->ip = m->ip; - __entry->cs = m->cs; __entry->tsc = m->tsc; __entry->walltime = m->time; __entry->cpu = m->extcpu; __entry->cpuid = m->cpuid; __entry->apicid = m->apicid; __entry->socketid = m->socketid; + __entry->cs = m->cs; + __entry->bank = m->bank; __entry->cpuvendor = m->cpuvendor; ), -- cgit v0.10.2 From ca9da2dd63b0b32de1b693953dff66cadeb6400b Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 3 Dec 2010 16:13:23 -0800 Subject: tracing: Fix event alignment: skb:kfree_skb Acked-by: Neil Horman Signed-off-by: David Sharp LKML-Reference: <1291421609-14665-10-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index f10293c..0c68ae22 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -19,14 +19,14 @@ TRACE_EVENT(kfree_skb, TP_STRUCT__entry( __field( void *, skbaddr ) - __field( unsigned short, protocol ) __field( void *, location ) + __field( unsigned short, protocol ) ), TP_fast_assign( __entry->skbaddr = skb; - __entry->protocol = ntohs(skb->protocol); __entry->location = location; + __entry->protocol = ntohs(skb->protocol); ), TP_printk("skbaddr=%p protocol=%u location=%p", -- cgit v0.10.2 From 10da37a645b5e915d8572cc2b1f5eb11ada3ea4f Mon Sep 17 00:00:00 2001 From: David Sharp Date: Fri, 3 Dec 2010 16:13:26 -0800 Subject: tracing: Adjust conditional expression latency formatting. Formatting change only to improve code readability. No code changes except to introduce intermediate variables. Signed-off-by: David Sharp LKML-Reference: <1291421609-14665-13-git-send-email-dhsharp@google.com> [ Keep variable declarations and assignment separate ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 151f32e..456be90 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -533,20 +533,30 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) */ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { - int hardirq, softirq; + char hardsoft_irq; + char need_resched; + char irqs_off; + int hardirq; + int softirq; int ret; hardirq = entry->flags & TRACE_FLAG_HARDIRQ; softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + irqs_off = + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : + '.'; + need_resched = + (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'; + hardsoft_irq = + (hardirq && softirq) ? 'H' : + hardirq ? 'h' : + softirq ? 's' : + '.'; + if (!trace_seq_printf(s, "%c%c%c", - (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : - (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? - 'X' : '.', - (entry->flags & TRACE_FLAG_NEED_RESCHED) ? - 'N' : '.', - (hardirq && softirq) ? 'H' : - hardirq ? 'h' : softirq ? 's' : '.')) + irqs_off, need_resched, hardsoft_irq)) return 0; if (entry->preempt_count) -- cgit v0.10.2 From 1274a9c2e91652e28efa45c3e5886ec82f08bfbe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Feb 2011 16:43:33 -0500 Subject: ftrace: Add .ref.text as one of the safe areas to trace The section .ref.text will not go away unexpectedly and is safe to trace. Add it to the safe list of sections to allow tracing. Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 038b3d1..f9f6f52 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -206,7 +206,8 @@ static uint32_t (*w2)(uint16_t); static int is_mcounted_section_name(char const *const txtname) { - return 0 == strcmp(".text", txtname) || + return 0 == strcmp(".text", txtname) || + 0 == strcmp(".ref.text", txtname) || 0 == strcmp(".sched.text", txtname) || 0 == strcmp(".spinlock.text", txtname) || 0 == strcmp(".irqentry.text", txtname) || diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 1d7963f..4be0dee 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -130,6 +130,7 @@ if ($inputfile =~ m,kernel/trace/ftrace\.o$,) { # Acceptable sections to record. my %text_sections = ( ".text" => 1, + ".ref.text" => 1, ".sched.text" => 1, ".spinlock.text" => 1, ".irqentry.text" => 1, -- cgit v0.10.2 From 722b3c74695377d11d18a52f3da08114d37f3f37 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Feb 2011 20:36:02 -0500 Subject: ftrace/graph: Trace function entry before updating index Currently the index to the ret_stack is updated and the real return address is saved in the ret_stack. Then we call the trace function. The trace function could decide that it doesn't want to trace this function (ex. set_graph_function does not match) and it will return 0 which means not to trace this call. The normal function graph tracer has this code: if (!(trace->depth || ftrace_graph_addr(trace->func)) || ftrace_graph_ignore_irqs()) return 0; What this states is, if the trace depth (which is curr_ret_stack) is zero (top of nested functions) then test if we want to trace this function. If this function is not to be traced, then return 0 and the rest of the function graph tracer logic will not trace this function. The problem arises when an interrupt comes in after we updated the curr_ret_stack. The next function that gets called will have a trace->depth of 1. Which fools this trace code into thinking that we are in a nested function, and that we should trace. This causes interrupts to be traced when they should not be. The solution is to trace the function first and then update the ret_stack. Reported-by: zhiping zhong Reported-by: wu zhangjin Signed-off-by: Steven Rostedt diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 382eb29..a93742a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, return; } - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer) == -EBUSY) { - *parent = old; - return; - } - trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; *parent = old; + return; + } + + if (ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer) == -EBUSY) { + *parent = old; + return; } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v0.10.2 From 31274d72f01604f4b02d933b4f3cac84d2c201fd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Feb 2011 15:52:19 +0100 Subject: tracing: Explain about unstable clock on resume with ring buffer warning The "Delta way too big" warning might appear on a system with a unstable shed clock right after the system is resumed and tracing was enabled at time of suspend. Since it's not realy a bug, and the unstable sched clock is working fast and reliable otherwise, Steven suggested to keep using the sched clock in any case and just to make note in the warning itself. v2 changes: - added #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK Signed-off-by: Jiri Olsa LKML-Reference: <20110218145219.GD2604@jolsa.brq.redhat.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3237d96..db7b439 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2172,11 +2172,19 @@ rb_reserve_next_event(struct ring_buffer *buffer, if (likely(ts >= cpu_buffer->write_stamp)) { delta = diff; if (unlikely(test_time_stamp(delta))) { + int local_clock_stable = 1; +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + local_clock_stable = sched_clock_stable; +#endif WARN_ONCE(delta > (1ULL << 59), - KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", + KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s", (unsigned long long)delta, (unsigned long long)ts, - (unsigned long long)cpu_buffer->write_stamp); + (unsigned long long)cpu_buffer->write_stamp, + local_clock_stable ? "" : + "If you just came from a suspend/resume,\n" + "please switch to the trace global clock:\n" + " echo global > /sys/kernel/debug/tracing/trace_clock\n"); add_timestamp = 1; } } -- cgit v0.10.2 From 56355b83e2a24ce7e1870c8479205e2cdd332225 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Mon, 8 Nov 2010 14:05:12 +0800 Subject: tracing: Export trace_set_clr_event() Trace events belonging to a module only exists when the module is loaded. Well, we can use trace_set_clr_event funtion to enable some trace event at the module init routine, so that we will not miss something while loading then module. So, Export the trace_set_clr_event function so that module can use it. Signed-off-by: Yuanhan Liu LKML-Reference: <1289196312-25323-1-git-send-email-yuanhan.liu@linux.intel.com> Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e1d579b..e88f74f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -325,6 +325,7 @@ int trace_set_clr_event(const char *system, const char *event, int set) { return __ftrace_set_clr_event(NULL, system, event, set); } +EXPORT_SYMBOL_GPL(trace_set_clr_event); /* 128 should be much more than enough */ #define EVENT_BUF_SIZE 127 -- cgit v0.10.2 From 9a24470b2826e4665b1484836c7ae6aba1ddea32 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Mar 2011 14:53:38 -0500 Subject: tracing: Align 4 byte ints together in struct tracer Move elements in struct tracer for better alignment. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 951d0b7..5e9dfc6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -272,8 +272,8 @@ struct tracer { /* If you handled the flag setting, return 0 */ int (*set_flag)(u32 old_flags, u32 bit, int set); struct tracer *next; - int print_max; struct tracer_flags *flags; + int print_max; int use_max_tr; }; -- cgit v0.10.2 From 4a0b1665db09cf2da9ad7d0f12da386373c10bfa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Mar 2011 20:09:26 -0500 Subject: tracing: Fix irqoff selftest expanding max buffer If the kernel command line declares a tracer "ftrace=sometracer" and that tracer is either not defined or is enabled after irqsoff, then the irqs off selftest will fail with the following error: Testing tracer irqsoff: ------------[ cut here ]------------ WARNING: at /home/rostedt/work/autotest/nobackup/linux-test.git/kernel/trace/tra ce.c:713 update_max_tr_single+0xfa/0x11b() Hardware name: Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.38-rc8-test #1 Call Trace: [] ? warn_slowpath_common+0x65/0x7a [] ? update_max_tr_single+0xfa/0x11b [] ? warn_slowpath_null+0xf/0x13 [] ? update_max_tr_single+0xfa/0x11b [] ? stop_critical_timing+0x154/0x204 [] ? trace_selftest_startup_irqsoff+0x5b/0xc1 [] ? trace_selftest_startup_irqsoff+0x5b/0xc1 [] ? trace_selftest_startup_irqsoff+0x5b/0xc1 [] ? time_hardirqs_on+0x25/0x28 [] ? trace_hardirqs_on_caller+0x18/0x12f [] ? trace_hardirqs_on+0xb/0xd [] ? trace_selftest_startup_irqsoff+0x5b/0xc1 [] ? register_tracer+0xf8/0x1a3 [] ? init_irqsoff_tracer+0xd/0x11 [] ? do_one_initcall+0x71/0x121 [] ? init_irqsoff_tracer+0x0/0x11 [] ? kernel_init+0x13a/0x1b6 [] ? kernel_init+0x0/0x1b6 [] ? kernel_thread_helper+0x6/0x10 ---[ end trace e93713a9d40cd06c ]--- .. no entries found ..FAILED! What happens is the "ftrace=..." will expand the ring buffer to its default size (from its minimum size) but it will not expand the max ring buffer (the ring buffer to store maximum latencies). When the irqsoff test runs, it will call the ring buffer swap routine that checks if the max ring buffer is the same size as the normal ring buffer, and will fail if it is not. This causes the test to fail. The solution is to expand the max ring buffer before running the self test if the max ring buffer is used by that tracer and the normal ring buffer is expanded. The max ring buffer should be shrunk again after the test is done to save space. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd6e1b9..9541c27 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -779,6 +779,11 @@ __acquires(kernel_lock) tracing_reset_online_cpus(tr); current_trace = type; + + /* If we expanded the buffers, make sure the max is expanded too */ + if (ring_buffer_expanded && type->use_max_tr) + ring_buffer_resize(max_tr.buffer, trace_buf_size); + /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); ret = type->selftest(type, tr); @@ -791,6 +796,10 @@ __acquires(kernel_lock) /* Only reset on passing, to avoid touching corrupted buffers */ tracing_reset_online_cpus(tr); + /* Shrink the max buffer again */ + if (ring_buffer_expanded && type->use_max_tr) + ring_buffer_resize(max_tr.buffer, 1); + printk(KERN_CONT "PASSED\n"); } #endif -- cgit v0.10.2 From 5e814dd597c42daeb8d2a276e64a6ec986ad0e2a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 15 Mar 2011 20:51:09 +0100 Subject: perf probe: Clean up probe_point_lazy_walker() return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer compilers (gcc 4.6) complains about: return ret < 0 ?: 0; For the following reason: util/probe-finder.c: In function ‘probe_point_lazy_walker’: util/probe-finder.c:1331:18: error: the omitted middle operand in ?: will always be ‘true’, suggest explicit middle operand [-Werror=parentheses] And indeed the return value is a somewhat obscure (but correct) value of 'true', so return 'ret' instead - this is cleaner and unconfuses GCC as well. Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 17f9c4a..194f9e2 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1328,7 +1328,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno, * Continue if no error, because the lazy pattern will match * to other lines */ - return ret < 0 ?: 0; + return ret < 0 ? ret : 0; } /* Find probe points from lazy pattern */ -- cgit v0.10.2