From 2d1b6949d2c855f195de0f5146625015ecca3944 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 1 Aug 2009 13:15:36 +0200 Subject: perf_counter tools: Fix link errors with older toolchains On older distros (F8 for example) the perf build could fail with such missing symbols: LINK perf /usr/lib/gcc/x86_64-redhat-linux/4.3.2/../../../../lib64/libbfd.a(bfd.o): In function `bfd_demangle': (.text+0x2b3): undefined reference to `cplus_demangle' /usr/lib/gcc/x86_64-redhat-linux/4.3.2/../../../../lib64/libbfd.a(bfd.o): In function `bfd_demangle': Link in -liberty too. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index a5e9b87..4b20fa4 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -345,7 +345,7 @@ BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) -EXTLIBS = -lbfd +EXTLIBS = -lbfd -liberty # # Platform specific tweaks -- cgit v0.10.2 From 470a1396c25c27b4aff08b14d5c9cd9b3da15e09 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Jul 2009 10:50:09 +0200 Subject: tracing, perf_counter: Add help text to CONFIG_EVENT_PROFILE Explain what tracepoint profiling sources are about. Signed-off-by: Peter Zijlstra Acked-by: Jeff Garzik LKML-Reference: <1248856508.6987.3041.camel@twins> Signed-off-by: Ingo Molnar diff --git a/init/Kconfig b/init/Kconfig index cb2c092..823ee0a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -961,9 +961,17 @@ config PERF_COUNTERS Say Y if unsure. config EVENT_PROFILE - bool "Tracepoint profile sources" + bool "Tracepoint profiling sources" depends on PERF_COUNTERS && EVENT_TRACING default y + help + Allow the use of tracepoints as software performance counters. + + When this is enabled, you can create perf counters based on + tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID + found in debugfs://tracing/events/*/*/id. (The -e/--events + option to the perf tool can parse and interpret symbolic + tracepoints, in the subsystem:tracepoint_name format.) endmenu -- cgit v0.10.2 From e53c0994709166b111fbe9162d1a16ece7dfc45b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jul 2009 14:42:10 +0200 Subject: perf_counter: Collapse inherit on read() Currently the counter value returned by read() is the value of the parent counter, to which child counters are only fed back on child exit. Thus read() can return rather erratic (and meaningless) numbers depending on the state of the child processes. Change this by always iterating the full child hierarchy on read() and sum all counters. Suggested-by: Corey Ashford Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 9509310..48471d7 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1688,6 +1688,18 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +static u64 perf_counter_read_tree(struct perf_counter *counter) +{ + struct perf_counter *child; + u64 total = 0; + + total += perf_counter_read(counter); + list_for_each_entry(child, &counter->child_list, child_list) + total += perf_counter_read(child); + + return total; +} + /* * Read the performance counter - simple non blocking version for now */ @@ -1707,7 +1719,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) WARN_ON_ONCE(counter->ctx->parent_ctx); mutex_lock(&counter->child_mutex); - values[0] = perf_counter_read(counter); + values[0] = perf_counter_read_tree(counter); n = 1; if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) values[n++] = counter->total_time_enabled + -- cgit v0.10.2 From 9f498cc5be7e013d8d6e4c616980ed0ffc8680d2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2009 14:46:33 +0200 Subject: perf_counter: Full task tracing In order to be able to distinguish between no samples due to inactivity and no samples due to task ended, Arjan asked for PERF_EVENT_EXIT events. This is useful to the boot delay instrumentation (bootchart) app. This patch changes the PERF_EVENT_FORK to be emitted on every clone, and adds PERF_EVENT_EXIT to be emitted on task exit, after the task's counters have been closed. This task tracing is controlled through: attr.comm || attr.mmap and through the new attr.task field. Suggested-by: Arjan van de Ven Cc: Paul Mackerras Cc: Anton Blanchard Signed-off-by: Peter Zijlstra [ cleaned up perf_counter.h a bit ] Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index bd15d7a..e604e6e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -181,8 +181,9 @@ struct perf_counter_attr { freq : 1, /* use freq, not period */ inherit_stat : 1, /* per task counts */ enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ - __reserved_1 : 51; + __reserved_1 : 50; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; @@ -311,6 +312,15 @@ enum perf_event_type { /* * struct { * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * }; + */ + PERF_EVENT_EXIT = 4, + + /* + * struct { + * struct perf_event_header header; * u64 time; * u64 id; * u64 stream_id; @@ -323,6 +333,7 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u32 pid, ppid; + * u32 tid, ptid; * }; */ PERF_EVENT_FORK = 7, diff --git a/kernel/fork.c b/kernel/fork.c index 29b532e..466531e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1269,6 +1269,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); + perf_counter_fork(p); return p; bad_fork_free_pid: @@ -1410,9 +1411,6 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } - if (!(clone_flags & CLONE_THREAD)) - perf_counter_fork(p); - audit_finish_fork(p); tracehook_report_clone(regs, clone_flags, nr, p); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 48471d7..199ed47 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1; static atomic_t nr_counters __read_mostly; static atomic_t nr_mmap_counters __read_mostly; static atomic_t nr_comm_counters __read_mostly; +static atomic_t nr_task_counters __read_mostly; /* * perf counter paranoia level: @@ -1654,6 +1655,8 @@ static void free_counter(struct perf_counter *counter) atomic_dec(&nr_mmap_counters); if (counter->attr.comm) atomic_dec(&nr_comm_counters); + if (counter->attr.task) + atomic_dec(&nr_task_counters); } if (counter->destroy) @@ -2831,10 +2834,12 @@ perf_counter_read_event(struct perf_counter *counter, } /* - * fork tracking + * task tracking -- fork/exit + * + * enabled by: attr.comm | attr.mmap | attr.task */ -struct perf_fork_event { +struct perf_task_event { struct task_struct *task; struct { @@ -2842,37 +2847,42 @@ struct perf_fork_event { u32 pid; u32 ppid; + u32 tid; + u32 ptid; } event; }; -static void perf_counter_fork_output(struct perf_counter *counter, - struct perf_fork_event *fork_event) +static void perf_counter_task_output(struct perf_counter *counter, + struct perf_task_event *task_event) { struct perf_output_handle handle; - int size = fork_event->event.header.size; - struct task_struct *task = fork_event->task; + int size = task_event->event.header.size; + struct task_struct *task = task_event->task; int ret = perf_output_begin(&handle, counter, size, 0, 0); if (ret) return; - fork_event->event.pid = perf_counter_pid(counter, task); - fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); + task_event->event.pid = perf_counter_pid(counter, task); + task_event->event.ppid = perf_counter_pid(counter, task->real_parent); - perf_output_put(&handle, fork_event->event); + task_event->event.tid = perf_counter_tid(counter, task); + task_event->event.ptid = perf_counter_tid(counter, task->real_parent); + + perf_output_put(&handle, task_event->event); perf_output_end(&handle); } -static int perf_counter_fork_match(struct perf_counter *counter) +static int perf_counter_task_match(struct perf_counter *counter) { - if (counter->attr.comm || counter->attr.mmap) + if (counter->attr.comm || counter->attr.mmap || counter->attr.task) return 1; return 0; } -static void perf_counter_fork_ctx(struct perf_counter_context *ctx, - struct perf_fork_event *fork_event) +static void perf_counter_task_ctx(struct perf_counter_context *ctx, + struct perf_task_event *task_event) { struct perf_counter *counter; @@ -2881,19 +2891,19 @@ static void perf_counter_fork_ctx(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_fork_match(counter)) - perf_counter_fork_output(counter, fork_event); + if (perf_counter_task_match(counter)) + perf_counter_task_output(counter, task_event); } rcu_read_unlock(); } -static void perf_counter_fork_event(struct perf_fork_event *fork_event) +static void perf_counter_task_event(struct perf_task_event *task_event) { struct perf_cpu_context *cpuctx; struct perf_counter_context *ctx; cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_fork_ctx(&cpuctx->ctx, fork_event); + perf_counter_task_ctx(&cpuctx->ctx, task_event); put_cpu_var(perf_cpu_context); rcu_read_lock(); @@ -2903,32 +2913,40 @@ static void perf_counter_fork_event(struct perf_fork_event *fork_event) */ ctx = rcu_dereference(current->perf_counter_ctxp); if (ctx) - perf_counter_fork_ctx(ctx, fork_event); + perf_counter_task_ctx(ctx, task_event); rcu_read_unlock(); } -void perf_counter_fork(struct task_struct *task) +static void perf_counter_task(struct task_struct *task, int new) { - struct perf_fork_event fork_event; + struct perf_task_event task_event; if (!atomic_read(&nr_comm_counters) && - !atomic_read(&nr_mmap_counters)) + !atomic_read(&nr_mmap_counters) && + !atomic_read(&nr_task_counters)) return; - fork_event = (struct perf_fork_event){ + task_event = (struct perf_task_event){ .task = task, .event = { .header = { - .type = PERF_EVENT_FORK, + .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, .misc = 0, - .size = sizeof(fork_event.event), + .size = sizeof(task_event.event), }, /* .pid */ /* .ppid */ + /* .tid */ + /* .ptid */ }, }; - perf_counter_fork_event(&fork_event); + perf_counter_task_event(&task_event); +} + +void perf_counter_fork(struct task_struct *task) +{ + perf_counter_task(task, 1); } /* @@ -3887,6 +3905,8 @@ done: atomic_inc(&nr_mmap_counters); if (counter->attr.comm) atomic_inc(&nr_comm_counters); + if (counter->attr.task) + atomic_inc(&nr_task_counters); } return counter; @@ -4248,8 +4268,10 @@ void perf_counter_exit_task(struct task_struct *child) struct perf_counter_context *child_ctx; unsigned long flags; - if (likely(!child->perf_counter_ctxp)) + if (likely(!child->perf_counter_ctxp)) { + perf_counter_task(child, 0); return; + } local_irq_save(flags); /* @@ -4267,15 +4289,22 @@ void perf_counter_exit_task(struct task_struct *child) * incremented the context's refcount before we do put_ctx below. */ spin_lock(&child_ctx->lock); - child->perf_counter_ctxp = NULL; /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all * the counters from it. */ unclone_ctx(child_ctx); - spin_unlock(&child_ctx->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&child_ctx->lock, flags); + + /* + * Report the task dead after unscheduling the counters so that we + * won't get any samples after PERF_EVENT_EXIT. We can however still + * get a few PERF_EVENT_READ events. + */ + perf_counter_task(child, 0); + + child->perf_counter_ctxp = NULL; /* * We can recurse on the same lock type through: -- cgit v0.10.2 From 27d028de64bd7e1f8e72bdeae6b0586939574fcb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2009 16:52:41 +0200 Subject: perf report: Update for the new FORK/EXIT events Since FORK is now also issued for threads, detect those by comparing the parent and child PID. Teach it about EXIT events and ignore them. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b20a4b6..95fd06c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -99,6 +99,7 @@ struct comm_event { struct fork_event { struct perf_event_header header; u32 pid, ppid; + u32 tid, ptid; }; struct lost_event { @@ -1608,15 +1609,27 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) } static int -process_fork_event(event_t *event, unsigned long offset, unsigned long head) +process_task_event(event_t *event, unsigned long offset, unsigned long head) { struct thread *thread = threads__findnew(event->fork.pid); struct thread *parent = threads__findnew(event->fork.ppid); - dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", + dprintf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", (void *)(offset + head), (void *)(long)(event->header.size), - event->fork.pid, event->fork.ppid); + event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT", + event->fork.pid, event->fork.tid, + event->fork.ppid, event->fork.ptid); + + /* + * A thread clone will have the same PID for both + * parent and child. + */ + if (thread == parent) + return 0; + + if (event->header.type == PERF_EVENT_EXIT) + return 0; if (!thread || !parent || thread__fork(thread, parent)) { dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); @@ -1706,7 +1719,8 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return process_comm_event(event, offset, head); case PERF_EVENT_FORK: - return process_fork_event(event, offset, head); + case PERF_EVENT_EXIT: + return process_task_event(event, offset, head); case PERF_EVENT_LOST: return process_lost_event(event, offset, head); -- cgit v0.10.2 From 9b30a26bf3d2c56dcb1c3afaca28b73fcd6ed405 Mon Sep 17 00:00:00 2001 From: Stoyan Gaydarov Date: Thu, 30 Jul 2009 05:25:29 -0500 Subject: perf tools: Fix faulty check This patch fixes a spelling error that has resulted from copy and pasting. The location of the error was found using a semantic patch but the semantic patch was not trying to find these errors. After looking things over it seemed logical that this change was needed. Please review it and then include the patch if it is in fact the correct change. Signed-off-by: Stoyan Gaydarov Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1248949529-20891-1-git-send-email-sgayda2@uiuc.edu> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 2810605..b4fe057 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -565,7 +565,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, goto out_elf_end; secstrs = elf_getdata(sec_strndx, NULL); - if (symstrs == NULL) + if (secstrs == NULL) goto out_elf_end; nr_syms = shdr.sh_size / shdr.sh_entsize; -- cgit v0.10.2 From 59b9005692d4c8b5d73cfc41aa7229f47be163a9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 26 Jul 2009 19:06:19 -0300 Subject: perf top: Add mwait_idle_with_hints to skip_symbols[] We skip the display of idle routine related symbols because they are typically rather erratic and confusing: they depend on the IRQ rate or sometimes they dominate the profile if they are polling based. Add mwait_idle_with_hints too, this is one of the idle routines on x86. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c0a4230..f139f1a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -285,6 +285,7 @@ static const char *skip_symbols[] = { "enter_idle", "exit_idle", "mwait_idle", + "mwait_idle_with_hints", "ppc64_runlatch_off", "pseries_dedicated_idle_sleep", NULL -- cgit v0.10.2 From 7e030655dda5b5efc4305e2a8f46c4967d32eb3d Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 2 Aug 2009 13:43:11 +0200 Subject: perf: Fix read buffer overflow Check whether index is within bounds before testing the element. Signed-off-by: Roel Kluin Cc: a.p.zijlstra@chello.nl Cc: Andrew Morton LKML-Reference: <4A757BCF.40101@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 95fd06c..ce4f286 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -253,7 +253,7 @@ static int strcommon(const char *pathname) { int n = 0; - while (pathname[n] == cwd[n] && n < cwdlen) + while (n < cwdlen && pathname[n] == cwd[n]) ++n; return n; diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index c6e5dc0..2726fe4 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -318,7 +318,7 @@ char *quote_path_relative(const char *in, int len, strbuf_addch(out, '"'); if (prefix) { int off = 0; - while (prefix[off] && off < len && prefix[off] == in[off]) + while (off < len && prefix[off] && prefix[off] == in[off]) if (prefix[off] == '/') { prefix += off + 1; in += off + 1; -- cgit v0.10.2 From f26542600e605482a1231c44ddb2966d69bd09b0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 29 Jun 2009 10:40:20 +0200 Subject: perf_counter: Set the CONFIG_PERF_COUNTERS default to y if CONFIG_PROFILING=y If user has already enabled profiling support in the kernel (for oprofile, old-style profiling of ftrace) then offer up perfcounters with a y default in interactive kconfig sessions. Still keep it off by default otherwise. Cc: Peter Zijlstra Cc: Linus Torvalds Signed-off-by: Ingo Molnar diff --git a/init/Kconfig b/init/Kconfig index 823ee0a..3f7e609 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -940,6 +940,7 @@ menu "Performance Counters" config PERF_COUNTERS bool "Kernel Performance Counters" + default y if PROFILING depends on HAVE_PERF_COUNTERS select ANON_INODES help -- cgit v0.10.2