From a5285ad9e30fd90b88a11adcab97bd4c3ffe44eb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Nov 2013 02:02:09 +0100 Subject: perf tools: Tag thread comm as overriden The problem is that when a thread overrides its default ":%pid" comm, we forget to tag the thread comm as overriden. Hence, this overriden comm is not inherited on future forks. Fix it. Signed-off-by: Frederic Weisbecker Tested-by: David Ahern Acked-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131116010207.GA18855@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index cd8e2f5..49eaf1d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -70,14 +70,13 @@ int thread__set_comm(struct thread *thread, const char *str, u64 timestamp) /* Override latest entry if it had no specific time coverage */ if (!curr->start) { comm__override(curr, str, timestamp); - return 0; + } else { + new = comm__new(str, timestamp); + if (!new) + return -ENOMEM; + list_add(&new->list, &thread->comm_list); } - new = comm__new(str, timestamp); - if (!new) - return -ENOMEM; - - list_add(&new->list, &thread->comm_list); thread->comm_set = true; return 0; -- cgit v0.10.2 From 210e812f036736aeda097d9a6ef84b1f2b334bae Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Nov 2013 11:20:43 +0900 Subject: perf header: Fix bogus group name When processing event group descriptor in perf file header, we reuse an allocated group name but forgot to prevent it from freeing. Reported-by: Stephane Eranian Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1384741244-7271-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 369c036..559c516 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2078,8 +2078,10 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, if (evsel->idx == (int) desc[i].leader_idx) { evsel->leader = evsel; /* {anon_group} is a dummy name */ - if (strcmp(desc[i].name, "{anon_group}")) + if (strcmp(desc[i].name, "{anon_group}")) { evsel->group_name = desc[i].name; + desc[i].name = NULL; + } evsel->nr_members = desc[i].nr_members; if (i >= nr_groups || nr > 0) { -- cgit v0.10.2 From 50a2740b839ece03b305facd3fc07cdc3b74247c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Nov 2013 11:20:44 +0900 Subject: perf header: Fix possible memory leaks in process_group_desc() After processing all group descriptors or encountering an error, it frees all descriptors. However, current logic can leak memory since it might not traverse all descriptors. Note that the 'i' can have different value than nr_groups when an error occurred and it's safe to call free(desc[i].name) for every desc since we already make it NULL when it's reused for group names. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1384741244-7271-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 559c516..1cd0357 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2107,7 +2107,7 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, ret = 0; out_free: - while ((int) --i >= 0) + for (i = 0; i < nr_groups; i++) free(desc[i].name); free(desc); -- cgit v0.10.2 From eff2c92f86c2ac2a0eab3749d58be39592293c3a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 18 Nov 2013 14:23:14 -0500 Subject: tools lib traceevent: Fix use of multiple options in processing field Jiri Olsa reported that the scsi_dispatch_cmd_done event failed to parse with: Error: expected type 5 but read 4 Error: expected type 5 but read 4 The problem is with this part of the print_fmt: __print_symbolic(((REC->result) >> 24) & 0xff, ... The __print_symbolic() helper function's first parameter is the field to use to determine what symbol to print based on the value of the result. The parser can handle one operation, but it can not handle multiple operations ('>>' and '&'). Add code to process all operations for the field argument for __print_symbolic() as well as __print_flags(). Reported-by: Jiri Olsa Signed-off-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131118142314.27ca334b@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 0362d57..8a5b65d 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1606,6 +1606,24 @@ process_arg(struct event_format *event, struct print_arg *arg, char **tok) static enum event_type process_op(struct event_format *event, struct print_arg *arg, char **tok); +/* + * For __print_symbolic() and __print_flags, we need to completely + * evaluate the first argument, which defines what to print next. + */ +static enum event_type +process_field_arg(struct event_format *event, struct print_arg *arg, char **tok) +{ + enum event_type type; + + type = process_arg(event, arg, tok); + + while (type == EVENT_OP) { + type = process_op(event, arg, tok); + } + + return type; +} + static enum event_type process_cond(struct event_format *event, struct print_arg *top, char **tok) { @@ -2371,7 +2389,7 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) goto out_free; } - type = process_arg(event, field, &token); + type = process_field_arg(event, field, &token); /* Handle operations in the first argument */ while (type == EVENT_OP) @@ -2424,7 +2442,8 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) goto out_free; } - type = process_arg(event, field, &token); + type = process_field_arg(event, field, &token); + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free_field; -- cgit v0.10.2 From d5b5f391d434c5cc8bcb1ab2d759738797b85f52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Nov 2013 16:23:04 +0100 Subject: ftrace, perf: Avoid infinite event generation loop Vince's perf-trinity fuzzer found yet another 'interesting' problem. When we sample the irq_work_exit tracepoint with period==1 (or PERF_SAMPLE_PERIOD) and we add an fasync SIGNAL handler we create an infinite event generation loop: ,-> | irq_work_exit() -> | trace_irq_work_exit() -> | ... | __perf_event_overflow() -> (due to fasync) | irq_work_queue() -> (irq_work_list must be empty) '--------- arch_irq_work_raise() Similar things can happen due to regular poll() wakeups if we exceed the ring-buffer wakeup watermark, or have an event_limit. To avoid this, dis-allow sampling this particular tracepoint. In order to achieve this, create a special perf_perm function pointer for each event and call this (when set) on trying to create a tracepoint perf event. [ roasted: use expr... to allow for ',' in your expression ] Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Dave Jones Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20131114152304.GC5364@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 2874df2..4cab890 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -72,6 +72,17 @@ DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi); DEFINE_IRQ_VECTOR_EVENT(irq_work); /* + * We must dis-allow sampling irq_work_exit() because perf event sampling + * itself can cause irq_work, which would lead to an infinite loop; + * + * 1) irq_work_exit happens + * 2) generates perf sample + * 3) generates irq_work + * 4) goto 1 + */ +TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); + +/* * call_function - called when entering/exiting a call function interrupt * vector handler */ diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 9abbe63..8c9b7a1 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -248,6 +248,9 @@ struct ftrace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; + + int (*perf_perm)(struct ftrace_event_call *, + struct perf_event *); #endif }; @@ -317,6 +320,19 @@ struct ftrace_event_file { } \ early_initcall(trace_init_flags_##name); +#define __TRACE_EVENT_PERF_PERM(name, expr...) \ + static int perf_perm_##name(struct ftrace_event_call *tp_event, \ + struct perf_event *p_event) \ + { \ + return ({ expr; }); \ + } \ + static int __init trace_init_perf_perm_##name(void) \ + { \ + event_##name.perf_perm = &perf_perm_##name; \ + return 0; \ + } \ + early_initcall(trace_init_perf_perm_##name); + #define PERF_MAX_TRACE_SIZE 2048 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index ebeab36..f16dc0a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -267,6 +267,8 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_FLAGS(event, flag) +#define TRACE_EVENT_PERF_PERM(event, expr...) + #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT @@ -399,4 +401,6 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_FLAGS(event, flag) +#define TRACE_EVENT_PERF_PERM(event, expr...) + #endif /* ifdef TRACE_EVENT (see note above) */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 52594b2..6b852f6 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -90,6 +90,10 @@ #define TRACE_EVENT_FLAGS(name, value) \ __TRACE_EVENT_FLAGS(name, value) +#undef TRACE_EVENT_PERF_PERM +#define TRACE_EVENT_PERF_PERM(name, expr...) \ + __TRACE_EVENT_PERF_PERM(name, expr) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -140,6 +144,9 @@ #undef TRACE_EVENT_FLAGS #define TRACE_EVENT_FLAGS(event, flag) +#undef TRACE_EVENT_PERF_PERM +#define TRACE_EVENT_PERF_PERM(event, expr...) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 78e27e3..630889f 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -24,6 +24,12 @@ static int total_ref_count; static int perf_trace_event_perm(struct ftrace_event_call *tp_event, struct perf_event *p_event) { + if (tp_event->perf_perm) { + int ret = tp_event->perf_perm(tp_event, p_event); + if (ret) + return ret; + } + /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event) && perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) -- cgit v0.10.2 From 06db0b21712f878b808480ef31097637013bbf0f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Sep 2013 13:14:47 +0200 Subject: perf: Remove fragile swevent hlist optimization Currently we only allocate a single cpu hashtable for per-cpu swevents; do away with this optimization for it is fragile in the face of things like perf_pmu_migrate_context(). The easiest thing is to make sure all CPUs are consistent wrt state. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20130913111447.GN31370@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index d724e77..72348dc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5680,11 +5680,6 @@ static void swevent_hlist_put(struct perf_event *event) { int cpu; - if (event->cpu != -1) { - swevent_hlist_put_cpu(event, event->cpu); - return; - } - for_each_possible_cpu(cpu) swevent_hlist_put_cpu(event, cpu); } @@ -5718,9 +5713,6 @@ static int swevent_hlist_get(struct perf_event *event) int err; int cpu, failed_cpu; - if (event->cpu != -1) - return swevent_hlist_get_cpu(event, event->cpu); - get_online_cpus(); for_each_possible_cpu(cpu) { err = swevent_hlist_get_cpu(event, cpu); -- cgit v0.10.2 From 0022cedd4a7d8a87841351e2b018bb6794cf2e67 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 15 Nov 2013 12:39:45 -0500 Subject: perf/trace: Properly use u64 to hold event_id The 64-bit attr.config value for perf trace events was being copied into an "int" before doing a comparison, meaning the top 32 bits were being truncated. As far as I can tell this didn't cause any errors, but it did mean it was possible to create valid aliases for all the tracepoint ids which I don't think was intended. (For example, 0xffffffff00000018 and 0x18 both enable the same tracepoint). Signed-off-by: Vince Weaver Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1311151236100.11932@vincent-weaver-1.um.maine.edu Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 630889f..e854f42 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -179,7 +179,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, int perf_trace_init(struct perf_event *p_event) { struct ftrace_event_call *tp_event; - int event_id = p_event->attr.config; + u64 event_id = p_event->attr.config; int ret = -EINVAL; mutex_lock(&event_mutex); -- cgit v0.10.2 From 6b5fa0ba4f85a8499287aefaf3f1375450c40c6d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2013 16:14:51 -0300 Subject: tools lib traceevent: Fix conversion of pointer to integer of different size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc complaint on 32-bit system: /home/acme/git/linux/tools/lib/traceevent/event-parse.c: In function ‘eval_num_arg’: /home/acme/git/linux/tools/lib/traceevent/event-parse.c:3468:9: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] This is because the eval_num_arg returns everything as an 'unsigned long long', so it converts a void pointer to a wider integer, fix it by converting the void pointer to an integer of the same size, 'unsigned long', before casting it to 'unsigned long long'. Acked-by: Steven Rostedt Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-yllx4aqcg06v5n4vjpwiiuld@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 8a5b65d..217c82e 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3465,7 +3465,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg * is in the bottom half of the 32 bit field. */ offset &= 0xffff; - val = (unsigned long long)(data + offset); + val = (unsigned long long)((unsigned long)data + offset); break; default: /* not sure what to do there */ return 0; -- cgit v0.10.2