From 67516844625f45f0ce148a01c27bf41f591872b2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 9 Jul 2013 18:56:31 +0200
Subject: perf: Remove the 'match' callback for auxiliary events processing

It gives the following benefits:

  - only one function pointer is passed along the way

  - the 'match' function is called within output function
    and could be inlined by the compiler

Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1373388991-9711-1-git-send-email-jolsa@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index eba8fb5..708ab70 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4680,12 +4680,10 @@ perf_event_read_event(struct perf_event *event,
 	perf_output_end(&handle);
 }
 
-typedef int  (perf_event_aux_match_cb)(struct perf_event *event, void *data);
 typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
 
 static void
 perf_event_aux_ctx(struct perf_event_context *ctx,
-		   perf_event_aux_match_cb match,
 		   perf_event_aux_output_cb output,
 		   void *data)
 {
@@ -4696,15 +4694,12 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
 			continue;
 		if (!event_filter_match(event))
 			continue;
-		if (match(event, data))
-			output(event, data);
+		output(event, data);
 	}
 }
 
 static void
-perf_event_aux(perf_event_aux_match_cb match,
-	       perf_event_aux_output_cb output,
-	       void *data,
+perf_event_aux(perf_event_aux_output_cb output, void *data,
 	       struct perf_event_context *task_ctx)
 {
 	struct perf_cpu_context *cpuctx;
@@ -4717,7 +4712,7 @@ perf_event_aux(perf_event_aux_match_cb match,
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
 		if (cpuctx->unique_pmu != pmu)
 			goto next;
-		perf_event_aux_ctx(&cpuctx->ctx, match, output, data);
+		perf_event_aux_ctx(&cpuctx->ctx, output, data);
 		if (task_ctx)
 			goto next;
 		ctxn = pmu->task_ctx_nr;
@@ -4725,14 +4720,14 @@ perf_event_aux(perf_event_aux_match_cb match,
 			goto next;
 		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
 		if (ctx)
-			perf_event_aux_ctx(ctx, match, output, data);
+			perf_event_aux_ctx(ctx, output, data);
 next:
 		put_cpu_ptr(pmu->pmu_cpu_context);
 	}
 
 	if (task_ctx) {
 		preempt_disable();
-		perf_event_aux_ctx(task_ctx, match, output, data);
+		perf_event_aux_ctx(task_ctx, output, data);
 		preempt_enable();
 	}
 	rcu_read_unlock();
@@ -4759,6 +4754,12 @@ struct perf_task_event {
 	} event_id;
 };
 
+static int perf_event_task_match(struct perf_event *event)
+{
+	return event->attr.comm || event->attr.mmap ||
+	       event->attr.mmap_data || event->attr.task;
+}
+
 static void perf_event_task_output(struct perf_event *event,
 				   void *data)
 {
@@ -4768,6 +4769,9 @@ static void perf_event_task_output(struct perf_event *event,
 	struct task_struct *task = task_event->task;
 	int ret, size = task_event->event_id.header.size;
 
+	if (!perf_event_task_match(event))
+		return;
+
 	perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
 	ret = perf_output_begin(&handle, event,
@@ -4790,13 +4794,6 @@ out:
 	task_event->event_id.header.size = size;
 }
 
-static int perf_event_task_match(struct perf_event *event,
-				 void *data __maybe_unused)
-{
-	return event->attr.comm || event->attr.mmap ||
-	       event->attr.mmap_data || event->attr.task;
-}
-
 static void perf_event_task(struct task_struct *task,
 			      struct perf_event_context *task_ctx,
 			      int new)
@@ -4825,8 +4822,7 @@ static void perf_event_task(struct task_struct *task,
 		},
 	};
 
-	perf_event_aux(perf_event_task_match,
-		       perf_event_task_output,
+	perf_event_aux(perf_event_task_output,
 		       &task_event,
 		       task_ctx);
 }
@@ -4853,6 +4849,11 @@ struct perf_comm_event {
 	} event_id;
 };
 
+static int perf_event_comm_match(struct perf_event *event)
+{
+	return event->attr.comm;
+}
+
 static void perf_event_comm_output(struct perf_event *event,
 				   void *data)
 {
@@ -4862,6 +4863,9 @@ static void perf_event_comm_output(struct perf_event *event,
 	int size = comm_event->event_id.header.size;
 	int ret;
 
+	if (!perf_event_comm_match(event))
+		return;
+
 	perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
 	ret = perf_output_begin(&handle, event,
 				comm_event->event_id.header.size);
@@ -4883,12 +4887,6 @@ out:
 	comm_event->event_id.header.size = size;
 }
 
-static int perf_event_comm_match(struct perf_event *event,
-				 void *data __maybe_unused)
-{
-	return event->attr.comm;
-}
-
 static void perf_event_comm_event(struct perf_comm_event *comm_event)
 {
 	char comm[TASK_COMM_LEN];
@@ -4903,8 +4901,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 
 	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
 
-	perf_event_aux(perf_event_comm_match,
-		       perf_event_comm_output,
+	perf_event_aux(perf_event_comm_output,
 		       comm_event,
 		       NULL);
 }
@@ -4967,6 +4964,17 @@ struct perf_mmap_event {
 	} event_id;
 };
 
+static int perf_event_mmap_match(struct perf_event *event,
+				 void *data)
+{
+	struct perf_mmap_event *mmap_event = data;
+	struct vm_area_struct *vma = mmap_event->vma;
+	int executable = vma->vm_flags & VM_EXEC;
+
+	return (!executable && event->attr.mmap_data) ||
+	       (executable && event->attr.mmap);
+}
+
 static void perf_event_mmap_output(struct perf_event *event,
 				   void *data)
 {
@@ -4976,6 +4984,9 @@ static void perf_event_mmap_output(struct perf_event *event,
 	int size = mmap_event->event_id.header.size;
 	int ret;
 
+	if (!perf_event_mmap_match(event, data))
+		return;
+
 	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
 	ret = perf_output_begin(&handle, event,
 				mmap_event->event_id.header.size);
@@ -4996,17 +5007,6 @@ out:
 	mmap_event->event_id.header.size = size;
 }
 
-static int perf_event_mmap_match(struct perf_event *event,
-				 void *data)
-{
-	struct perf_mmap_event *mmap_event = data;
-	struct vm_area_struct *vma = mmap_event->vma;
-	int executable = vma->vm_flags & VM_EXEC;
-
-	return (!executable && event->attr.mmap_data) ||
-	       (executable && event->attr.mmap);
-}
-
 static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 {
 	struct vm_area_struct *vma = mmap_event->vma;
@@ -5070,8 +5070,7 @@ got_name:
 
 	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
-	perf_event_aux(perf_event_mmap_match,
-		       perf_event_mmap_output,
+	perf_event_aux(perf_event_mmap_output,
 		       mmap_event,
 		       NULL);
 
-- 
cgit v0.10.2


From ec1e7e437ac47ecf7b4e07241036b1e1c3366012 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Wed, 22 May 2013 17:42:38 -0700
Subject: perf tools: Rename cpu_map__all() to cpu_map__empty()

The CPU map is in an "empty" (or not-applicable) state when monitoring
specific threads.

cpu_map__all() returns true if the CPU map is in this empty state (i.e
for the 'empty_cpu_map' or if we created the map via
cpu_map__dummy_new().

The name, cpu_map__all(), is misleading, because even when monitoring
all CPUs, (eg: perf record -a), cpu_map__all() returns false.

Rename cpu_map__all() to cpu_map__empty().

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20130523012620.GA27733@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 9bed02e..b123bb9 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -41,7 +41,7 @@ static inline int cpu_map__nr(const struct cpu_map *map)
 	return map ? map->nr : 1;
 }
 
-static inline bool cpu_map__all(const struct cpu_map *map)
+static inline bool cpu_map__empty(const struct cpu_map *map)
 {
 	return map ? map->map[0] == -1 : true;
 }
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8065ce8..4a901be 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -421,7 +421,7 @@ void perf_evlist__munmap(struct perf_evlist *evlist)
 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 {
 	evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
-	if (cpu_map__all(evlist->cpus))
+	if (cpu_map__empty(evlist->cpus))
 		evlist->nr_mmaps = thread_map__nr(evlist->threads);
 	evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
 	return evlist->mmap != NULL ? 0 : -ENOMEM;
@@ -573,7 +573,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 			return -ENOMEM;
 	}
 
-	if (cpu_map__all(cpus))
+	if (cpu_map__empty(cpus))
 		return perf_evlist__mmap_per_thread(evlist, prot, mask);
 
 	return perf_evlist__mmap_per_cpu(evlist, prot, mask);
-- 
cgit v0.10.2


From 0d3942dbcf7f7e8955ba89deed4749b0ad64d721 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 14 May 2013 22:56:51 -0700
Subject: perf top: Add --objdump option

perf: Add objdump option to 'perf top'

Like with 'perf annotate' add the --objdump option to perf top so users
can specify an alternate path to the /usr/bin/objdump binary.

Reported-by: David A. Gilbert <DavidAGilbert@uk.ibm.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: DavidAGilbert@uk.ibm.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Maynard Johnson <mpjohn@us.ibm.com>
Link: http://lkml.kernel.org/r/20130515055651.GA9985@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e06c4f8..a237059 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -40,6 +40,7 @@
 #include "util/xyarray.h"
 #include "util/sort.h"
 #include "util/intlist.h"
+#include "arch/common.h"
 
 #include "util/debug.h"
 
@@ -939,6 +940,12 @@ static int __cmd_top(struct perf_top *top)
 	if (top->session == NULL)
 		return -ENOMEM;
 
+	if (!objdump_path) {
+		ret = perf_session_env__lookup_objdump(&top->session->header.env);
+		if (ret)
+			goto out_delete;
+	}
+
 	ret = perf_top__setup_sample_type(top);
 	if (ret)
 		goto out_delete;
@@ -1114,6 +1121,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "Interleave source code with assembly code (default)"),
 	OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
 		    "Display raw encoding of assembly instructions (default)"),
+	OPT_STRING(0, "objdump", &objdump_path, "path",
+		    "objdump binary to use for disassembly and annotations"),
 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
 	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
-- 
cgit v0.10.2


From eb4fe9cbb1599a673bde0d6dad50e609404275c7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 7 Jun 2013 15:37:01 +0200
Subject: perf tools: Remove cwd from perf_session struct

Removing 'cwd' from perf_session struct as it's no longer needed.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370612223-19188-1-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index f3b235e..ad8d3d4 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -37,7 +37,6 @@ struct perf_session {
 	int			fd;
 	bool			fd_pipe;
 	bool			repipe;
-	char			*cwd;
 	struct ordered_samples	ordered_samples;
 	char			filename[1];
 };
-- 
cgit v0.10.2


From 5888a8c26e5f8e76334d579637f9ed55036cec5a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 7 Jun 2013 15:37:02 +0200
Subject: perf tests: Omit end of the symbol check failure for test 1

Omitting end of the function check failure for test 1, since there's no
way to get exact symbol end via kallsyms.

Leaving the debug message.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370612223-19188-2-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 7b4c4d2..add1539 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -139,11 +139,18 @@ next_pair:
 				 * _really_ have a problem.
 				 */
 				s64 skew = sym->end - pair->end;
-				if (llabs(skew) < page_size)
-					continue;
+				if (llabs(skew) >= page_size)
+					pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
+						 sym->start, sym->name, sym->end, pair->end);
+
+				/*
+				 * Do not count this as a failure, because we
+				 * could really find a case where it's not
+				 * possible to get proper function end from
+				 * kallsyms.
+				 */
+				continue;
 
-				pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
-					 sym->start, sym->name, sym->end, pair->end);
 			} else {
 				struct rb_node *nnd;
 detour:
-- 
cgit v0.10.2


From 450ac18d8f8076f0c522af1afb8519614a3b32f5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 7 Jun 2013 15:37:03 +0200
Subject: perf tests: Make TEST_ASSERT_VAL global

Making TEST_ASSERT_VAL global as it's used in multiple objects.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370612223-19188-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 5eaffa2..dffe055 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -10,14 +10,6 @@
 #include "symbol.h"
 #include "tests.h"
 
-#define TEST_ASSERT_VAL(text, cond) \
-do { \
-	if (!(cond)) { \
-		pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
-		return -1; \
-	} \
-} while (0)
-
 static char *test_file(int size)
 {
 	static char buf_templ[] = "/tmp/test-XXXXXX";
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 0275bab..ad950f5 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -7,14 +7,6 @@
 #include "tests.h"
 #include <linux/hw_breakpoint.h>
 
-#define TEST_ASSERT_VAL(text, cond) \
-do { \
-	if (!(cond)) { \
-		pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
-		return -1; \
-	} \
-} while (0)
-
 #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
 			     PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
 
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index dd7feae..07a92f9 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -1,6 +1,14 @@
 #ifndef TESTS_H
 #define TESTS_H
 
+#define TEST_ASSERT_VAL(text, cond)					 \
+do {									 \
+	if (!(cond)) {							 \
+		pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
+		return -1;						 \
+	}								 \
+} while (0)
+
 enum {
 	TEST_OK   =  0,
 	TEST_FAIL = -1,
-- 
cgit v0.10.2


From 4e319027a7aee58ce8d409f5597b418f08307841 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@linaro.org>
Date: Tue, 11 Jun 2013 17:29:18 +0200
Subject: perf tools: Use default include path notation for libtraceevent
 headers

Header files of libtraceevent or no longer local headers. Thus, use
default path notation for them. Also removing extra traceevent include
path and instead handle this similar to liblk.

Signed-off-by: Robert Richter <robert.richter@linaro.org>
Signed-off-by: Robert Richter <rric@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Robert Richter <rric@kernel.org>
Link: http://lkml.kernel.org/r/1370964558-8599-1-git-send-email-rric@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 641fccd..5b7c6db 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -281,7 +281,7 @@ LIB_H += util/cpumap.h
 LIB_H += util/top.h
 LIB_H += $(ARCH_INCLUDE)
 LIB_H += util/cgroup.h
-LIB_H += $(TRACE_EVENT_DIR)event-parse.h
+LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h
 LIB_H += util/target.h
 LIB_H += util/rblist.h
 LIB_H += util/intlist.h
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ab3ed4a..87fc7d0 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1,3 +1,4 @@
+#include <traceevent/event-parse.h>
 #include "builtin.h"
 #include "util/color.h"
 #include "util/evlist.h"
@@ -5,7 +6,6 @@
 #include "util/thread.h"
 #include "util/parse-options.h"
 #include "util/thread_map.h"
-#include "event-parse.h"
 
 #include <libaudit.h>
 #include <stdlib.h>
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index b5d9238..214e17e 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -46,6 +46,8 @@ ifneq ($(obj-perf),)
 obj-perf := $(abspath $(obj-perf))/
 endif
 
+LIB_INCLUDE := $(srctree)/tools/lib/
+
 # include ARCH specific config
 -include $(src-perf)/arch/$(ARCH)/Makefile
 
@@ -121,8 +123,7 @@ endif
 
 CFLAGS += -I$(src-perf)/util
 CFLAGS += -I$(src-perf)
-CFLAGS += -I$(TRACE_EVENT_DIR)
-CFLAGS += -I$(srctree)/tools/lib/
+CFLAGS += -I$(LIB_INCLUDE)
 
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index a5d2fcc..f79e7d0 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -1,6 +1,6 @@
+#include <traceevent/event-parse.h>
 #include "evsel.h"
 #include "tests.h"
-#include "event-parse.h"
 
 static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
 				  int size, bool should_be_signed)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c9c7494..a635461 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -9,17 +9,17 @@
 
 #include <byteswap.h>
 #include <linux/bitops.h>
-#include "asm/bug.h"
 #include <lk/debugfs.h>
-#include "event-parse.h"
+#include <traceevent/event-parse.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include "asm/bug.h"
 #include "evsel.h"
 #include "evlist.h"
 #include "util.h"
 #include "cpumap.h"
 #include "thread_map.h"
 #include "target.h"
-#include <linux/hw_breakpoint.h>
-#include <linux/perf_event.h>
 #include "perf_regs.h"
 
 static struct {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index cf1fe01..ad47fb9 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,4 +1,5 @@
 #include <linux/kernel.h>
+#include <traceevent/event-parse.h>
 
 #include <byteswap.h>
 #include <unistd.h>
@@ -12,7 +13,6 @@
 #include "sort.h"
 #include "util.h"
 #include "cpumap.h"
-#include "event-parse.h"
 #include "perf_regs.h"
 #include "vdso.h"
 
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 1978c39..11eb7fd 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -1,8 +1,8 @@
 #ifndef _PERF_UTIL_TRACE_EVENT_H
 #define _PERF_UTIL_TRACE_EVENT_H
 
+#include <traceevent/event-parse.h>
 #include "parse-events.h"
-#include "event-parse.h"
 #include "session.h"
 
 struct machine;
-- 
cgit v0.10.2


From ba58041a8f0f0437c6c33414aa0d3fcf62ba90a5 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 7 Jun 2013 16:22:12 -0600
Subject: perf tools: Add methods for setting/retrieving priv element of thread
 struct

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1370643734-9579-3-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index eeb7ac6..5e7ba35 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -47,4 +47,14 @@ void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				u8 cpumode, enum map_type type, u64 addr,
 				struct addr_location *al,
 				symbol_filter_t filter);
+
+static inline void *thread__priv(struct thread *thread)
+{
+	return thread->priv;
+}
+
+static inline void thread__set_priv(struct thread *thread, void *p)
+{
+	thread->priv = p;
+}
 #endif	/* __PERF_THREAD_H */
-- 
cgit v0.10.2


From fa1531fdd7b6332aa61bcc9fda495583acba460d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 10 Jun 2013 16:31:28 -0400
Subject: perf tools: Remove callchain_cursor_reset call

Removing callchain_cursor_reset call as it is called in subsequent
machine__resolve_callchain_sample function.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-ic53wabwmmgvvwve2ymv3yf7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b2ecad6..93527af 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1242,8 +1242,6 @@ int machine__resolve_callchain(struct machine *machine,
 {
 	int ret;
 
-	callchain_cursor_reset(&callchain_cursor);
-
 	ret = machine__resolve_callchain_sample(machine, thread,
 						sample->callchain, parent);
 	if (ret)
-- 
cgit v0.10.2


From 167aedc44e1743777e6aee71b0fe7ed94c6298cd Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Wed, 26 Jun 2013 16:14:04 +0900
Subject: perf util: Move debugfs/tracing helper functions to util.c

Since they're generic helpers move them to util.c so that they can be
used by others.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1372230862-15861-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 3917eb9..615c062 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -46,65 +46,6 @@
 static int output_fd;
 
 
-static const char *find_debugfs(void)
-{
-	const char *path = perf_debugfs_mount(NULL);
-
-	if (!path)
-		pr_debug("Your kernel does not support the debugfs filesystem");
-
-	return path;
-}
-
-/*
- * Finds the path to the debugfs/tracing
- * Allocates the string and stores it.
- */
-static const char *find_tracing_dir(void)
-{
-	static char *tracing;
-	static int tracing_found;
-	const char *debugfs;
-
-	if (tracing_found)
-		return tracing;
-
-	debugfs = find_debugfs();
-	if (!debugfs)
-		return NULL;
-
-	tracing = malloc(strlen(debugfs) + 9);
-	if (!tracing)
-		return NULL;
-
-	sprintf(tracing, "%s/tracing", debugfs);
-
-	tracing_found = 1;
-	return tracing;
-}
-
-static char *get_tracing_file(const char *name)
-{
-	const char *tracing;
-	char *file;
-
-	tracing = find_tracing_dir();
-	if (!tracing)
-		return NULL;
-
-	file = malloc(strlen(tracing) + strlen(name) + 2);
-	if (!file)
-		return NULL;
-
-	sprintf(file, "%s/%s", tracing, name);
-	return file;
-}
-
-static void put_tracing_file(char *file)
-{
-	free(file);
-}
-
 int bigendian(void)
 {
 	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0};
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 59d868a..9a06584 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -269,3 +269,62 @@ void perf_debugfs_set_path(const char *mntpt)
 	snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt);
 	set_tracing_events_path(mntpt);
 }
+
+static const char *find_debugfs(void)
+{
+	const char *path = perf_debugfs_mount(NULL);
+
+	if (!path)
+		fprintf(stderr, "Your kernel does not support the debugfs filesystem");
+
+	return path;
+}
+
+/*
+ * Finds the path to the debugfs/tracing
+ * Allocates the string and stores it.
+ */
+const char *find_tracing_dir(void)
+{
+	static char *tracing;
+	static int tracing_found;
+	const char *debugfs;
+
+	if (tracing_found)
+		return tracing;
+
+	debugfs = find_debugfs();
+	if (!debugfs)
+		return NULL;
+
+	tracing = malloc(strlen(debugfs) + 9);
+	if (!tracing)
+		return NULL;
+
+	sprintf(tracing, "%s/tracing", debugfs);
+
+	tracing_found = 1;
+	return tracing;
+}
+
+char *get_tracing_file(const char *name)
+{
+	const char *tracing;
+	char *file;
+
+	tracing = find_tracing_dir();
+	if (!tracing)
+		return NULL;
+
+	file = malloc(strlen(tracing) + strlen(name) + 2);
+	if (!file)
+		return NULL;
+
+	sprintf(file, "%s/%s", tracing, name);
+	return file;
+}
+
+void put_tracing_file(char *file)
+{
+	free(file);
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 2732fad..cc1574e 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -80,6 +80,9 @@ extern char buildid_dir[];
 extern char tracing_events_path[];
 extern void perf_debugfs_set_path(const char *mountpoint);
 const char *perf_debugfs_mount(const char *mountpoint);
+const char *find_tracing_dir(void);
+char *get_tracing_file(const char *name);
+void put_tracing_file(char *file);
 
 /* On most systems <limits.h> would have given us this, but
  * not on some systems (e.g. GNU/Hurd).
-- 
cgit v0.10.2


From e7c93f09b83be25281cf129674e0035664715033 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Wed, 26 Jun 2013 16:14:05 +0900
Subject: perf util: Use evsel->name to get tracepoint_paths

Most tracepoint events already have their system and event name in
->name field so that searching whole event tracing directory for each
evsel to match given id is suboptimal.

Factor out this routine into tracepoint_name_to_path().  In case of en
invalid name, it'll try to find path using id again.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1372230862-15861-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 995fc25..ef72e98 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -217,6 +217,29 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 	return NULL;
 }
 
+struct tracepoint_path *tracepoint_name_to_path(const char *name)
+{
+	struct tracepoint_path *path = zalloc(sizeof(*path));
+	char *str = strchr(name, ':');
+
+	if (path == NULL || str == NULL) {
+		free(path);
+		return NULL;
+	}
+
+	path->system = strndup(name, str - name);
+	path->name = strdup(str+1);
+
+	if (path->system == NULL || path->name == NULL) {
+		free(path->system);
+		free(path->name);
+		free(path);
+		path = NULL;
+	}
+
+	return path;
+}
+
 const char *event_type(int type)
 {
 	switch (type) {
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8a48593..080f7cf 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -23,6 +23,7 @@ struct tracepoint_path {
 };
 
 extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
+extern struct tracepoint_path *tracepoint_name_to_path(const char *name);
 extern bool have_tracepoints(struct list_head *evlist);
 
 const char *event_type(int type);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 615c062..a42624a 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -414,12 +414,27 @@ get_tracepoints_path(struct list_head *pattrs)
 		if (pos->attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 		++nr_tracepoints;
+
+		if (pos->name) {
+			ppath->next = tracepoint_name_to_path(pos->name);
+			if (ppath->next)
+				goto next;
+
+			if (strchr(pos->name, ':') == NULL)
+				goto try_id;
+
+			goto error;
+		}
+
+try_id:
 		ppath->next = tracepoint_id_to_path(pos->attr.config);
 		if (!ppath->next) {
+error:
 			pr_debug("No memory to alloc tracepoints list\n");
 			put_tracepoints_path(&path);
 			return NULL;
 		}
+next:
 		ppath = ppath->next;
 	}
 
-- 
cgit v0.10.2


From ad3d6f508738323c0e843c4dbdd421c1aeb59cd8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 17 Jun 2013 18:02:43 +0200
Subject: perf tools: Do not elide parent symbol column

I found the parent symbol column data interesting even
if there's another sorting enabled. Switching it on.

Previous behaviour:
  $ perf report -i perf.data.delete -p perf_session__delete -x

  +   3.60%  perf  perf               [.] __rb_change_child
  +   1.89%  perf  perf               [.] rb_erase
  +   1.89%  perf  perf               [.] rb_erase
  +   1.83%  perf  perf               [.] free@plt

Current behaviour:
  $ perf report -i perf.data.delete -p perf_session__delete -x

  +   3.60%  perf  perf               [.] __rb_change_child        perf_session__delete
  +   1.89%  perf  perf               [.] rb_erase                 perf_session__delete_dead_threads
  +   1.89%  perf  perf               [.] rb_erase                 perf_session__delete_threads
  +   1.83%  perf  perf               [.] free@plt                 perf_session__delete

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-r79fn89bhqz16ixa5zmyflrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3662047..6ab49da 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -931,14 +931,6 @@ repeat:
 	if (parent_pattern != default_parent_pattern) {
 		if (sort_dimension__add("parent") < 0)
 			goto error;
-
-		/*
-		 * Only show the parent fields if we explicitly
-		 * sort that way. If we only use parent machinery
-		 * for filtering, we don't want it.
-		 */
-		if (!strstr(sort_order, "parent"))
-			sort_parent.elide = 1;
 	}
 
 	if (argc) {
-- 
cgit v0.10.2


From d4ae0a6f7c79be64c8f3551dd149189f8c4480eb Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 25 Jun 2013 13:54:13 +0200
Subject: perf report: Fix perf_session__delete removal

There's no point of having out_delete label with perf_session__delete
call within __cmd_report function, because it's called at the end of the
cmd_report function.

The speed up due to commenting out the perf_session__delete at the end
does not seem relevant anymore. Measured speedup for ~1GB data file with
222466 FORKS events is around 0.5%.

  $ perf report -i perf.data.delete -P perf_session__delete -s parent

  +  99.51%  [other]
  +   0.49%  perf_session__delete

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1372161253-22081-6-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 6ab49da..ee2ca3e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -497,7 +497,7 @@ static int __cmd_report(struct perf_report *rep)
 		ret = perf_session__cpu_bitmap(session, rep->cpu_list,
 					       rep->cpu_bitmap);
 		if (ret)
-			goto out_delete;
+			return ret;
 	}
 
 	if (use_browser <= 0)
@@ -508,11 +508,11 @@ static int __cmd_report(struct perf_report *rep)
 
 	ret = perf_report__setup_sample_type(rep);
 	if (ret)
-		goto out_delete;
+		return ret;
 
 	ret = perf_session__process_events(session, &rep->tool);
 	if (ret)
-		goto out_delete;
+		return ret;
 
 	kernel_map = session->machines.host.vmlinux_maps[MAP__FUNCTION];
 	kernel_kmap = map__kmap(kernel_map);
@@ -547,7 +547,7 @@ static int __cmd_report(struct perf_report *rep)
 
 	if (dump_trace) {
 		perf_session__fprintf_nr_events(session, stdout);
-		goto out_delete;
+		return 0;
 	}
 
 	nr_samples = 0;
@@ -572,7 +572,7 @@ static int __cmd_report(struct perf_report *rep)
 
 	if (nr_samples == 0) {
 		ui__error("The %s file has no samples!\n", session->filename);
-		goto out_delete;
+		return 0;
 	}
 
 	list_for_each_entry(pos, &session->evlist->entries, node)
@@ -598,19 +598,6 @@ static int __cmd_report(struct perf_report *rep)
 	} else
 		perf_evlist__tty_browse_hists(session->evlist, rep, help);
 
-out_delete:
-	/*
-	 * Speed up the exit process, for large files this can
-	 * take quite a while.
-	 *
-	 * XXX Enable this when using valgrind or if we ever
-	 * librarize this command.
-	 *
-	 * Also experiment with obstacks to see how much speed
-	 * up we'll get here.
-	 *
- 	 * perf_session__delete(session);
- 	 */
 	return ret;
 }
 
-- 
cgit v0.10.2


From cfe0d8ba14a1d98245b371e486c68f37eba1ca52 Mon Sep 17 00:00:00 2001
From: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Date: Fri, 28 Jun 2013 16:14:57 +0800
Subject: perf tools: Make Power7 events available for perf

Power7 supports over 530 different perf events but only a small subset
of these can be specified by name, for the remaining events, we must
specify them by their raw code:

        perf stat -e r2003c <application>

This patch makes all the POWER7 events available in sysfs.  So we can
instead specify these as:

        perf stat -e 'cpu/PM_CMPLU_STALL_DFU/' <application>

where PM_CMPLU_STALL_DFU is the r2003c in previous example.

Before this patch is applied, the size of power7-pmu.o is:

$ size arch/powerpc/perf/power7-pmu.o
   text	   data	    bss	    dec	    hex	filename
   3073	   2720	      0	   5793	   16a1	arch/powerpc/perf/power7-pmu.o

and after the patch is applied, it is:

$ size arch/powerpc/perf/power7-pmu.o
   text	   data	    bss	    dec	    hex	filename
  15950	  31112	      0	  47062	   b7d6	arch/powerpc/perf/power7-pmu.o

For the run time overhead, I use two scripts, one is "event_name.sh",
which contains 50 event names, it looks like:

 # ./perf record  -e 'cpu/PM_CMPLU_STALL_DFU/' -e .....  /bin/sleep 1

the other one is named "event_code.sh" which use corresponding  events
raw
code instead of events names, it looks like:

 # ./perf record -e r2003c -e ......  /bin/sleep 1

below is the result.

Using events name:

[root@localhost perf]# time ./event_name.sh
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~102 samples) ]

real	0m1.192s
user	0m0.028s
sys	0m0.106s

Using events raw code:

[root@localhost perf]# time ./event_code.sh
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.003 MB perf.data (~112 samples) ]

real	0m1.198s
user	0m0.028s
sys	0m0.105s

Signed-off-by: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Acked-by: Michael Ellerman <michael@ellerman.id.au>
Cc: icycoder@gmail.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Runzhen Wang <runzhew@clemson.edu>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1372407297-6996-3-git-send-email-runzhen@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index f265049..d9270d8e 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -136,11 +136,11 @@ extern ssize_t power_events_sysfs_show(struct device *dev,
 #define	EVENT_PTR(_id, _suffix)		&EVENT_VAR(_id, _suffix).attr.attr
 
 #define	EVENT_ATTR(_name, _id, _suffix)					\
-	PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id,	\
+	PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id,	\
 			power_events_sysfs_show)
 
 #define	GENERIC_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _g)
 #define	GENERIC_EVENT_PTR(_id)		EVENT_PTR(_id, _g)
 
-#define	POWER_EVENT_ATTR(_name, _id)	EVENT_ATTR(PM_##_name, _id, _p)
+#define	POWER_EVENT_ATTR(_name, _id)	EVENT_ATTR(_name, _id, _p)
 #define	POWER_EVENT_PTR(_id)		EVENT_PTR(_id, _p)
diff --git a/arch/powerpc/perf/power7-events-list.h b/arch/powerpc/perf/power7-events-list.h
new file mode 100644
index 0000000..687790a
--- /dev/null
+++ b/arch/powerpc/perf/power7-events-list.h
@@ -0,0 +1,548 @@
+/*
+ * Performance counter support for POWER7 processors.
+ *
+ * Copyright 2013 Runzhen Wang, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+EVENT(PM_IC_DEMAND_L2_BR_ALL,                 0x04898)
+EVENT(PM_GCT_UTIL_7_TO_10_SLOTS,              0x020a0)
+EVENT(PM_PMC2_SAVED,                          0x10022)
+EVENT(PM_CMPLU_STALL_DFU,                     0x2003c)
+EVENT(PM_VSU0_16FLOP,                         0x0a0a4)
+EVENT(PM_MRK_LSU_DERAT_MISS,                  0x3d05a)
+EVENT(PM_MRK_ST_CMPL,                         0x10034)
+EVENT(PM_NEST_PAIR3_ADD,                      0x40881)
+EVENT(PM_L2_ST_DISP,                          0x46180)
+EVENT(PM_L2_CASTOUT_MOD,                      0x16180)
+EVENT(PM_ISEG,                                0x020a4)
+EVENT(PM_MRK_INST_TIMEO,                      0x40034)
+EVENT(PM_L2_RCST_DISP_FAIL_ADDR,              0x36282)
+EVENT(PM_LSU1_DC_PREF_STREAM_CONFIRM,         0x0d0b6)
+EVENT(PM_IERAT_WR_64K,                        0x040be)
+EVENT(PM_MRK_DTLB_MISS_16M,                   0x4d05e)
+EVENT(PM_IERAT_MISS,                          0x100f6)
+EVENT(PM_MRK_PTEG_FROM_LMEM,                  0x4d052)
+EVENT(PM_FLOP,                                0x100f4)
+EVENT(PM_THRD_PRIO_4_5_CYC,                   0x040b4)
+EVENT(PM_BR_PRED_TA,                          0x040aa)
+EVENT(PM_CMPLU_STALL_FXU,                     0x20014)
+EVENT(PM_EXT_INT,                             0x200f8)
+EVENT(PM_VSU_FSQRT_FDIV,                      0x0a888)
+EVENT(PM_MRK_LD_MISS_EXPOSED_CYC,             0x1003e)
+EVENT(PM_LSU1_LDF,                            0x0c086)
+EVENT(PM_IC_WRITE_ALL,                        0x0488c)
+EVENT(PM_LSU0_SRQ_STFWD,                      0x0c0a0)
+EVENT(PM_PTEG_FROM_RL2L3_MOD,                 0x1c052)
+EVENT(PM_MRK_DATA_FROM_L31_SHR,               0x1d04e)
+EVENT(PM_DATA_FROM_L21_MOD,                   0x3c046)
+EVENT(PM_VSU1_SCAL_DOUBLE_ISSUED,             0x0b08a)
+EVENT(PM_VSU0_8FLOP,                          0x0a0a0)
+EVENT(PM_POWER_EVENT1,                        0x1006e)
+EVENT(PM_DISP_CLB_HELD_BAL,                   0x02092)
+EVENT(PM_VSU1_2FLOP,                          0x0a09a)
+EVENT(PM_LWSYNC_HELD,                         0x0209a)
+EVENT(PM_PTEG_FROM_DL2L3_SHR,                 0x3c054)
+EVENT(PM_INST_FROM_L21_MOD,                   0x34046)
+EVENT(PM_IERAT_XLATE_WR_16MPLUS,              0x040bc)
+EVENT(PM_IC_REQ_ALL,                          0x04888)
+EVENT(PM_DSLB_MISS,                           0x0d090)
+EVENT(PM_L3_MISS,                             0x1f082)
+EVENT(PM_LSU0_L1_PREF,                        0x0d0b8)
+EVENT(PM_VSU_SCALAR_SINGLE_ISSUED,            0x0b884)
+EVENT(PM_LSU1_DC_PREF_STREAM_CONFIRM_STRIDE,  0x0d0be)
+EVENT(PM_L2_INST,                             0x36080)
+EVENT(PM_VSU0_FRSP,                           0x0a0b4)
+EVENT(PM_FLUSH_DISP,                          0x02082)
+EVENT(PM_PTEG_FROM_L2MISS,                    0x4c058)
+EVENT(PM_VSU1_DQ_ISSUED,                      0x0b09a)
+EVENT(PM_CMPLU_STALL_LSU,                     0x20012)
+EVENT(PM_MRK_DATA_FROM_DMEM,                  0x1d04a)
+EVENT(PM_LSU_FLUSH_ULD,                       0x0c8b0)
+EVENT(PM_PTEG_FROM_LMEM,                      0x4c052)
+EVENT(PM_MRK_DERAT_MISS_16M,                  0x3d05c)
+EVENT(PM_THRD_ALL_RUN_CYC,                    0x2000c)
+EVENT(PM_MEM0_PREFETCH_DISP,                  0x20083)
+EVENT(PM_MRK_STALL_CMPLU_CYC_COUNT,           0x3003f)
+EVENT(PM_DATA_FROM_DL2L3_MOD,                 0x3c04c)
+EVENT(PM_VSU_FRSP,                            0x0a8b4)
+EVENT(PM_MRK_DATA_FROM_L21_MOD,               0x3d046)
+EVENT(PM_PMC1_OVERFLOW,                       0x20010)
+EVENT(PM_VSU0_SINGLE,                         0x0a0a8)
+EVENT(PM_MRK_PTEG_FROM_L3MISS,                0x2d058)
+EVENT(PM_MRK_PTEG_FROM_L31_SHR,               0x2d056)
+EVENT(PM_VSU0_VECTOR_SP_ISSUED,               0x0b090)
+EVENT(PM_VSU1_FEST,                           0x0a0ba)
+EVENT(PM_MRK_INST_DISP,                       0x20030)
+EVENT(PM_VSU0_COMPLEX_ISSUED,                 0x0b096)
+EVENT(PM_LSU1_FLUSH_UST,                      0x0c0b6)
+EVENT(PM_INST_CMPL,                           0x00002)
+EVENT(PM_FXU_IDLE,                            0x1000e)
+EVENT(PM_LSU0_FLUSH_ULD,                      0x0c0b0)
+EVENT(PM_MRK_DATA_FROM_DL2L3_MOD,             0x3d04c)
+EVENT(PM_LSU_LMQ_SRQ_EMPTY_ALL_CYC,           0x3001c)
+EVENT(PM_LSU1_REJECT_LMQ_FULL,                0x0c0a6)
+EVENT(PM_INST_PTEG_FROM_L21_MOD,              0x3e056)
+EVENT(PM_INST_FROM_RL2L3_MOD,                 0x14042)
+EVENT(PM_SHL_CREATED,                         0x05082)
+EVENT(PM_L2_ST_HIT,                           0x46182)
+EVENT(PM_DATA_FROM_DMEM,                      0x1c04a)
+EVENT(PM_L3_LD_MISS,                          0x2f082)
+EVENT(PM_FXU1_BUSY_FXU0_IDLE,                 0x4000e)
+EVENT(PM_DISP_CLB_HELD_RES,                   0x02094)
+EVENT(PM_L2_SN_SX_I_DONE,                     0x36382)
+EVENT(PM_GRP_CMPL,                            0x30004)
+EVENT(PM_STCX_CMPL,                           0x0c098)
+EVENT(PM_VSU0_2FLOP,                          0x0a098)
+EVENT(PM_L3_PREF_MISS,                        0x3f082)
+EVENT(PM_LSU_SRQ_SYNC_CYC,                    0x0d096)
+EVENT(PM_LSU_REJECT_ERAT_MISS,                0x20064)
+EVENT(PM_L1_ICACHE_MISS,                      0x200fc)
+EVENT(PM_LSU1_FLUSH_SRQ,                      0x0c0be)
+EVENT(PM_LD_REF_L1_LSU0,                      0x0c080)
+EVENT(PM_VSU0_FEST,                           0x0a0b8)
+EVENT(PM_VSU_VECTOR_SINGLE_ISSUED,            0x0b890)
+EVENT(PM_FREQ_UP,                             0x4000c)
+EVENT(PM_DATA_FROM_LMEM,                      0x3c04a)
+EVENT(PM_LSU1_LDX,                            0x0c08a)
+EVENT(PM_PMC3_OVERFLOW,                       0x40010)
+EVENT(PM_MRK_BR_MPRED,                        0x30036)
+EVENT(PM_SHL_MATCH,                           0x05086)
+EVENT(PM_MRK_BR_TAKEN,                        0x10036)
+EVENT(PM_CMPLU_STALL_BRU,                     0x4004e)
+EVENT(PM_ISLB_MISS,                           0x0d092)
+EVENT(PM_CYC,                                 0x0001e)
+EVENT(PM_DISP_HELD_THERMAL,                   0x30006)
+EVENT(PM_INST_PTEG_FROM_RL2L3_SHR,            0x2e054)
+EVENT(PM_LSU1_SRQ_STFWD,                      0x0c0a2)
+EVENT(PM_GCT_NOSLOT_BR_MPRED,                 0x4001a)
+EVENT(PM_1PLUS_PPC_CMPL,                      0x100f2)
+EVENT(PM_PTEG_FROM_DMEM,                      0x2c052)
+EVENT(PM_VSU_2FLOP,                           0x0a898)
+EVENT(PM_GCT_FULL_CYC,                        0x04086)
+EVENT(PM_MRK_DATA_FROM_L3_CYC,                0x40020)
+EVENT(PM_LSU_SRQ_S0_ALLOC,                    0x0d09d)
+EVENT(PM_MRK_DERAT_MISS_4K,                   0x1d05c)
+EVENT(PM_BR_MPRED_TA,                         0x040ae)
+EVENT(PM_INST_PTEG_FROM_L2MISS,               0x4e058)
+EVENT(PM_DPU_HELD_POWER,                      0x20006)
+EVENT(PM_RUN_INST_CMPL,                       0x400fa)
+EVENT(PM_MRK_VSU_FIN,                         0x30032)
+EVENT(PM_LSU_SRQ_S0_VALID,                    0x0d09c)
+EVENT(PM_GCT_EMPTY_CYC,                       0x20008)
+EVENT(PM_IOPS_DISP,                           0x30014)
+EVENT(PM_RUN_SPURR,                           0x10008)
+EVENT(PM_PTEG_FROM_L21_MOD,                   0x3c056)
+EVENT(PM_VSU0_1FLOP,                          0x0a080)
+EVENT(PM_SNOOP_TLBIE,                         0x0d0b2)
+EVENT(PM_DATA_FROM_L3MISS,                    0x2c048)
+EVENT(PM_VSU_SINGLE,                          0x0a8a8)
+EVENT(PM_DTLB_MISS_16G,                       0x1c05e)
+EVENT(PM_CMPLU_STALL_VECTOR,                  0x2001c)
+EVENT(PM_FLUSH,                               0x400f8)
+EVENT(PM_L2_LD_HIT,                           0x36182)
+EVENT(PM_NEST_PAIR2_AND,                      0x30883)
+EVENT(PM_VSU1_1FLOP,                          0x0a082)
+EVENT(PM_IC_PREF_REQ,                         0x0408a)
+EVENT(PM_L3_LD_HIT,                           0x2f080)
+EVENT(PM_GCT_NOSLOT_IC_MISS,                  0x2001a)
+EVENT(PM_DISP_HELD,                           0x10006)
+EVENT(PM_L2_LD,                               0x16080)
+EVENT(PM_LSU_FLUSH_SRQ,                       0x0c8bc)
+EVENT(PM_BC_PLUS_8_CONV,                      0x040b8)
+EVENT(PM_MRK_DATA_FROM_L31_MOD_CYC,           0x40026)
+EVENT(PM_CMPLU_STALL_VECTOR_LONG,             0x4004a)
+EVENT(PM_L2_RCST_BUSY_RC_FULL,                0x26282)
+EVENT(PM_TB_BIT_TRANS,                        0x300f8)
+EVENT(PM_THERMAL_MAX,                         0x40006)
+EVENT(PM_LSU1_FLUSH_ULD,                      0x0c0b2)
+EVENT(PM_LSU1_REJECT_LHS,                     0x0c0ae)
+EVENT(PM_LSU_LRQ_S0_ALLOC,                    0x0d09f)
+EVENT(PM_L3_CO_L31,                           0x4f080)
+EVENT(PM_POWER_EVENT4,                        0x4006e)
+EVENT(PM_DATA_FROM_L31_SHR,                   0x1c04e)
+EVENT(PM_BR_UNCOND,                           0x0409e)
+EVENT(PM_LSU1_DC_PREF_STREAM_ALLOC,           0x0d0aa)
+EVENT(PM_PMC4_REWIND,                         0x10020)
+EVENT(PM_L2_RCLD_DISP,                        0x16280)
+EVENT(PM_THRD_PRIO_2_3_CYC,                   0x040b2)
+EVENT(PM_MRK_PTEG_FROM_L2MISS,                0x4d058)
+EVENT(PM_IC_DEMAND_L2_BHT_REDIRECT,           0x04098)
+EVENT(PM_LSU_DERAT_MISS,                      0x200f6)
+EVENT(PM_IC_PREF_CANCEL_L2,                   0x04094)
+EVENT(PM_MRK_FIN_STALL_CYC_COUNT,             0x1003d)
+EVENT(PM_BR_PRED_CCACHE,                      0x040a0)
+EVENT(PM_GCT_UTIL_1_TO_2_SLOTS,               0x0209c)
+EVENT(PM_MRK_ST_CMPL_INT,                     0x30034)
+EVENT(PM_LSU_TWO_TABLEWALK_CYC,               0x0d0a6)
+EVENT(PM_MRK_DATA_FROM_L3MISS,                0x2d048)
+EVENT(PM_GCT_NOSLOT_CYC,                      0x100f8)
+EVENT(PM_LSU_SET_MPRED,                       0x0c0a8)
+EVENT(PM_FLUSH_DISP_TLBIE,                    0x0208a)
+EVENT(PM_VSU1_FCONV,                          0x0a0b2)
+EVENT(PM_DERAT_MISS_16G,                      0x4c05c)
+EVENT(PM_INST_FROM_LMEM,                      0x3404a)
+EVENT(PM_IC_DEMAND_L2_BR_REDIRECT,            0x0409a)
+EVENT(PM_CMPLU_STALL_SCALAR_LONG,             0x20018)
+EVENT(PM_INST_PTEG_FROM_L2,                   0x1e050)
+EVENT(PM_PTEG_FROM_L2,                        0x1c050)
+EVENT(PM_MRK_DATA_FROM_L21_SHR_CYC,           0x20024)
+EVENT(PM_MRK_DTLB_MISS_4K,                    0x2d05a)
+EVENT(PM_VSU0_FPSCR,                          0x0b09c)
+EVENT(PM_VSU1_VECT_DOUBLE_ISSUED,             0x0b082)
+EVENT(PM_MRK_PTEG_FROM_RL2L3_MOD,             0x1d052)
+EVENT(PM_MEM0_RQ_DISP,                        0x10083)
+EVENT(PM_L2_LD_MISS,                          0x26080)
+EVENT(PM_VMX_RESULT_SAT_1,                    0x0b0a0)
+EVENT(PM_L1_PREF,                             0x0d8b8)
+EVENT(PM_MRK_DATA_FROM_LMEM_CYC,              0x2002c)
+EVENT(PM_GRP_IC_MISS_NONSPEC,                 0x1000c)
+EVENT(PM_PB_NODE_PUMP,                        0x10081)
+EVENT(PM_SHL_MERGED,                          0x05084)
+EVENT(PM_NEST_PAIR1_ADD,                      0x20881)
+EVENT(PM_DATA_FROM_L3,                        0x1c048)
+EVENT(PM_LSU_FLUSH,                           0x0208e)
+EVENT(PM_LSU_SRQ_SYNC_COUNT,                  0x0d097)
+EVENT(PM_PMC2_OVERFLOW,                       0x30010)
+EVENT(PM_LSU_LDF,                             0x0c884)
+EVENT(PM_POWER_EVENT3,                        0x3006e)
+EVENT(PM_DISP_WT,                             0x30008)
+EVENT(PM_CMPLU_STALL_REJECT,                  0x40016)
+EVENT(PM_IC_BANK_CONFLICT,                    0x04082)
+EVENT(PM_BR_MPRED_CR_TA,                      0x048ae)
+EVENT(PM_L2_INST_MISS,                        0x36082)
+EVENT(PM_CMPLU_STALL_ERAT_MISS,               0x40018)
+EVENT(PM_NEST_PAIR2_ADD,                      0x30881)
+EVENT(PM_MRK_LSU_FLUSH,                       0x0d08c)
+EVENT(PM_L2_LDST,                             0x16880)
+EVENT(PM_INST_FROM_L31_SHR,                   0x1404e)
+EVENT(PM_VSU0_FIN,                            0x0a0bc)
+EVENT(PM_LARX_LSU,                            0x0c894)
+EVENT(PM_INST_FROM_RMEM,                      0x34042)
+EVENT(PM_DISP_CLB_HELD_TLBIE,                 0x02096)
+EVENT(PM_MRK_DATA_FROM_DMEM_CYC,              0x2002e)
+EVENT(PM_BR_PRED_CR,                          0x040a8)
+EVENT(PM_LSU_REJECT,                          0x10064)
+EVENT(PM_GCT_UTIL_3_TO_6_SLOTS,               0x0209e)
+EVENT(PM_CMPLU_STALL_END_GCT_NOSLOT,          0x10028)
+EVENT(PM_LSU0_REJECT_LMQ_FULL,                0x0c0a4)
+EVENT(PM_VSU_FEST,                            0x0a8b8)
+EVENT(PM_NEST_PAIR0_AND,                      0x10883)
+EVENT(PM_PTEG_FROM_L3,                        0x2c050)
+EVENT(PM_POWER_EVENT2,                        0x2006e)
+EVENT(PM_IC_PREF_CANCEL_PAGE,                 0x04090)
+EVENT(PM_VSU0_FSQRT_FDIV,                     0x0a088)
+EVENT(PM_MRK_GRP_CMPL,                        0x40030)
+EVENT(PM_VSU0_SCAL_DOUBLE_ISSUED,             0x0b088)
+EVENT(PM_GRP_DISP,                            0x3000a)
+EVENT(PM_LSU0_LDX,                            0x0c088)
+EVENT(PM_DATA_FROM_L2,                        0x1c040)
+EVENT(PM_MRK_DATA_FROM_RL2L3_MOD,             0x1d042)
+EVENT(PM_LD_REF_L1,                           0x0c880)
+EVENT(PM_VSU0_VECT_DOUBLE_ISSUED,             0x0b080)
+EVENT(PM_VSU1_2FLOP_DOUBLE,                   0x0a08e)
+EVENT(PM_THRD_PRIO_6_7_CYC,                   0x040b6)
+EVENT(PM_BC_PLUS_8_RSLV_TAKEN,                0x040ba)
+EVENT(PM_BR_MPRED_CR,                         0x040ac)
+EVENT(PM_L3_CO_MEM,                           0x4f082)
+EVENT(PM_LD_MISS_L1,                          0x400f0)
+EVENT(PM_DATA_FROM_RL2L3_MOD,                 0x1c042)
+EVENT(PM_LSU_SRQ_FULL_CYC,                    0x1001a)
+EVENT(PM_TABLEWALK_CYC,                       0x10026)
+EVENT(PM_MRK_PTEG_FROM_RMEM,                  0x3d052)
+EVENT(PM_LSU_SRQ_STFWD,                       0x0c8a0)
+EVENT(PM_INST_PTEG_FROM_RMEM,                 0x3e052)
+EVENT(PM_FXU0_FIN,                            0x10004)
+EVENT(PM_LSU1_L1_SW_PREF,                     0x0c09e)
+EVENT(PM_PTEG_FROM_L31_MOD,                   0x1c054)
+EVENT(PM_PMC5_OVERFLOW,                       0x10024)
+EVENT(PM_LD_REF_L1_LSU1,                      0x0c082)
+EVENT(PM_INST_PTEG_FROM_L21_SHR,              0x4e056)
+EVENT(PM_CMPLU_STALL_THRD,                    0x1001c)
+EVENT(PM_DATA_FROM_RMEM,                      0x3c042)
+EVENT(PM_VSU0_SCAL_SINGLE_ISSUED,             0x0b084)
+EVENT(PM_BR_MPRED_LSTACK,                     0x040a6)
+EVENT(PM_MRK_DATA_FROM_RL2L3_MOD_CYC,         0x40028)
+EVENT(PM_LSU0_FLUSH_UST,                      0x0c0b4)
+EVENT(PM_LSU_NCST,                            0x0c090)
+EVENT(PM_BR_TAKEN,                            0x20004)
+EVENT(PM_INST_PTEG_FROM_LMEM,                 0x4e052)
+EVENT(PM_GCT_NOSLOT_BR_MPRED_IC_MISS,         0x4001c)
+EVENT(PM_DTLB_MISS_4K,                        0x2c05a)
+EVENT(PM_PMC4_SAVED,                          0x30022)
+EVENT(PM_VSU1_PERMUTE_ISSUED,                 0x0b092)
+EVENT(PM_SLB_MISS,                            0x0d890)
+EVENT(PM_LSU1_FLUSH_LRQ,                      0x0c0ba)
+EVENT(PM_DTLB_MISS,                           0x300fc)
+EVENT(PM_VSU1_FRSP,                           0x0a0b6)
+EVENT(PM_VSU_VECTOR_DOUBLE_ISSUED,            0x0b880)
+EVENT(PM_L2_CASTOUT_SHR,                      0x16182)
+EVENT(PM_DATA_FROM_DL2L3_SHR,                 0x3c044)
+EVENT(PM_VSU1_STF,                            0x0b08e)
+EVENT(PM_ST_FIN,                              0x200f0)
+EVENT(PM_PTEG_FROM_L21_SHR,                   0x4c056)
+EVENT(PM_L2_LOC_GUESS_WRONG,                  0x26480)
+EVENT(PM_MRK_STCX_FAIL,                       0x0d08e)
+EVENT(PM_LSU0_REJECT_LHS,                     0x0c0ac)
+EVENT(PM_IC_PREF_CANCEL_HIT,                  0x04092)
+EVENT(PM_L3_PREF_BUSY,                        0x4f080)
+EVENT(PM_MRK_BRU_FIN,                         0x2003a)
+EVENT(PM_LSU1_NCLD,                           0x0c08e)
+EVENT(PM_INST_PTEG_FROM_L31_MOD,              0x1e054)
+EVENT(PM_LSU_NCLD,                            0x0c88c)
+EVENT(PM_LSU_LDX,                             0x0c888)
+EVENT(PM_L2_LOC_GUESS_CORRECT,                0x16480)
+EVENT(PM_THRESH_TIMEO,                        0x10038)
+EVENT(PM_L3_PREF_ST,                          0x0d0ae)
+EVENT(PM_DISP_CLB_HELD_SYNC,                  0x02098)
+EVENT(PM_VSU_SIMPLE_ISSUED,                   0x0b894)
+EVENT(PM_VSU1_SINGLE,                         0x0a0aa)
+EVENT(PM_DATA_TABLEWALK_CYC,                  0x3001a)
+EVENT(PM_L2_RC_ST_DONE,                       0x36380)
+EVENT(PM_MRK_PTEG_FROM_L21_MOD,               0x3d056)
+EVENT(PM_LARX_LSU1,                           0x0c096)
+EVENT(PM_MRK_DATA_FROM_RMEM,                  0x3d042)
+EVENT(PM_DISP_CLB_HELD,                       0x02090)
+EVENT(PM_DERAT_MISS_4K,                       0x1c05c)
+EVENT(PM_L2_RCLD_DISP_FAIL_ADDR,              0x16282)
+EVENT(PM_SEG_EXCEPTION,                       0x028a4)
+EVENT(PM_FLUSH_DISP_SB,                       0x0208c)
+EVENT(PM_L2_DC_INV,                           0x26182)
+EVENT(PM_PTEG_FROM_DL2L3_MOD,                 0x4c054)
+EVENT(PM_DSEG,                                0x020a6)
+EVENT(PM_BR_PRED_LSTACK,                      0x040a2)
+EVENT(PM_VSU0_STF,                            0x0b08c)
+EVENT(PM_LSU_FX_FIN,                          0x10066)
+EVENT(PM_DERAT_MISS_16M,                      0x3c05c)
+EVENT(PM_MRK_PTEG_FROM_DL2L3_MOD,             0x4d054)
+EVENT(PM_GCT_UTIL_11_PLUS_SLOTS,              0x020a2)
+EVENT(PM_INST_FROM_L3,                        0x14048)
+EVENT(PM_MRK_IFU_FIN,                         0x3003a)
+EVENT(PM_ITLB_MISS,                           0x400fc)
+EVENT(PM_VSU_STF,                             0x0b88c)
+EVENT(PM_LSU_FLUSH_UST,                       0x0c8b4)
+EVENT(PM_L2_LDST_MISS,                        0x26880)
+EVENT(PM_FXU1_FIN,                            0x40004)
+EVENT(PM_SHL_DEALLOCATED,                     0x05080)
+EVENT(PM_L2_SN_M_WR_DONE,                     0x46382)
+EVENT(PM_LSU_REJECT_SET_MPRED,                0x0c8a8)
+EVENT(PM_L3_PREF_LD,                          0x0d0ac)
+EVENT(PM_L2_SN_M_RD_DONE,                     0x46380)
+EVENT(PM_MRK_DERAT_MISS_16G,                  0x4d05c)
+EVENT(PM_VSU_FCONV,                           0x0a8b0)
+EVENT(PM_ANY_THRD_RUN_CYC,                    0x100fa)
+EVENT(PM_LSU_LMQ_FULL_CYC,                    0x0d0a4)
+EVENT(PM_MRK_LSU_REJECT_LHS,                  0x0d082)
+EVENT(PM_MRK_LD_MISS_L1_CYC,                  0x4003e)
+EVENT(PM_MRK_DATA_FROM_L2_CYC,                0x20020)
+EVENT(PM_INST_IMC_MATCH_DISP,                 0x30016)
+EVENT(PM_MRK_DATA_FROM_RMEM_CYC,              0x4002c)
+EVENT(PM_VSU0_SIMPLE_ISSUED,                  0x0b094)
+EVENT(PM_CMPLU_STALL_DIV,                     0x40014)
+EVENT(PM_MRK_PTEG_FROM_RL2L3_SHR,             0x2d054)
+EVENT(PM_VSU_FMA_DOUBLE,                      0x0a890)
+EVENT(PM_VSU_4FLOP,                           0x0a89c)
+EVENT(PM_VSU1_FIN,                            0x0a0be)
+EVENT(PM_NEST_PAIR1_AND,                      0x20883)
+EVENT(PM_INST_PTEG_FROM_RL2L3_MOD,            0x1e052)
+EVENT(PM_RUN_CYC,                             0x200f4)
+EVENT(PM_PTEG_FROM_RMEM,                      0x3c052)
+EVENT(PM_LSU_LRQ_S0_VALID,                    0x0d09e)
+EVENT(PM_LSU0_LDF,                            0x0c084)
+EVENT(PM_FLUSH_COMPLETION,                    0x30012)
+EVENT(PM_ST_MISS_L1,                          0x300f0)
+EVENT(PM_L2_NODE_PUMP,                        0x36480)
+EVENT(PM_INST_FROM_DL2L3_SHR,                 0x34044)
+EVENT(PM_MRK_STALL_CMPLU_CYC,                 0x3003e)
+EVENT(PM_VSU1_DENORM,                         0x0a0ae)
+EVENT(PM_MRK_DATA_FROM_L31_SHR_CYC,           0x20026)
+EVENT(PM_NEST_PAIR0_ADD,                      0x10881)
+EVENT(PM_INST_FROM_L3MISS,                    0x24048)
+EVENT(PM_EE_OFF_EXT_INT,                      0x02080)
+EVENT(PM_INST_PTEG_FROM_DMEM,                 0x2e052)
+EVENT(PM_INST_FROM_DL2L3_MOD,                 0x3404c)
+EVENT(PM_PMC6_OVERFLOW,                       0x30024)
+EVENT(PM_VSU_2FLOP_DOUBLE,                    0x0a88c)
+EVENT(PM_TLB_MISS,                            0x20066)
+EVENT(PM_FXU_BUSY,                            0x2000e)
+EVENT(PM_L2_RCLD_DISP_FAIL_OTHER,             0x26280)
+EVENT(PM_LSU_REJECT_LMQ_FULL,                 0x0c8a4)
+EVENT(PM_IC_RELOAD_SHR,                       0x04096)
+EVENT(PM_GRP_MRK,                             0x10031)
+EVENT(PM_MRK_ST_NEST,                         0x20034)
+EVENT(PM_VSU1_FSQRT_FDIV,                     0x0a08a)
+EVENT(PM_LSU0_FLUSH_LRQ,                      0x0c0b8)
+EVENT(PM_LARX_LSU0,                           0x0c094)
+EVENT(PM_IBUF_FULL_CYC,                       0x04084)
+EVENT(PM_MRK_DATA_FROM_DL2L3_SHR_CYC,         0x2002a)
+EVENT(PM_LSU_DC_PREF_STREAM_ALLOC,            0x0d8a8)
+EVENT(PM_GRP_MRK_CYC,                         0x10030)
+EVENT(PM_MRK_DATA_FROM_RL2L3_SHR_CYC,         0x20028)
+EVENT(PM_L2_GLOB_GUESS_CORRECT,               0x16482)
+EVENT(PM_LSU_REJECT_LHS,                      0x0c8ac)
+EVENT(PM_MRK_DATA_FROM_LMEM,                  0x3d04a)
+EVENT(PM_INST_PTEG_FROM_L3,                   0x2e050)
+EVENT(PM_FREQ_DOWN,                           0x3000c)
+EVENT(PM_PB_RETRY_NODE_PUMP,                  0x30081)
+EVENT(PM_INST_FROM_RL2L3_SHR,                 0x1404c)
+EVENT(PM_MRK_INST_ISSUED,                     0x10032)
+EVENT(PM_PTEG_FROM_L3MISS,                    0x2c058)
+EVENT(PM_RUN_PURR,                            0x400f4)
+EVENT(PM_MRK_GRP_IC_MISS,                     0x40038)
+EVENT(PM_MRK_DATA_FROM_L3,                    0x1d048)
+EVENT(PM_CMPLU_STALL_DCACHE_MISS,             0x20016)
+EVENT(PM_PTEG_FROM_RL2L3_SHR,                 0x2c054)
+EVENT(PM_LSU_FLUSH_LRQ,                       0x0c8b8)
+EVENT(PM_MRK_DERAT_MISS_64K,                  0x2d05c)
+EVENT(PM_INST_PTEG_FROM_DL2L3_MOD,            0x4e054)
+EVENT(PM_L2_ST_MISS,                          0x26082)
+EVENT(PM_MRK_PTEG_FROM_L21_SHR,               0x4d056)
+EVENT(PM_LWSYNC,                              0x0d094)
+EVENT(PM_LSU0_DC_PREF_STREAM_CONFIRM_STRIDE,  0x0d0bc)
+EVENT(PM_MRK_LSU_FLUSH_LRQ,                   0x0d088)
+EVENT(PM_INST_IMC_MATCH_CMPL,                 0x100f0)
+EVENT(PM_NEST_PAIR3_AND,                      0x40883)
+EVENT(PM_PB_RETRY_SYS_PUMP,                   0x40081)
+EVENT(PM_MRK_INST_FIN,                        0x30030)
+EVENT(PM_MRK_PTEG_FROM_DL2L3_SHR,             0x3d054)
+EVENT(PM_INST_FROM_L31_MOD,                   0x14044)
+EVENT(PM_MRK_DTLB_MISS_64K,                   0x3d05e)
+EVENT(PM_LSU_FIN,                             0x30066)
+EVENT(PM_MRK_LSU_REJECT,                      0x40064)
+EVENT(PM_L2_CO_FAIL_BUSY,                     0x16382)
+EVENT(PM_MEM0_WQ_DISP,                        0x40083)
+EVENT(PM_DATA_FROM_L31_MOD,                   0x1c044)
+EVENT(PM_THERMAL_WARN,                        0x10016)
+EVENT(PM_VSU0_4FLOP,                          0x0a09c)
+EVENT(PM_BR_MPRED_CCACHE,                     0x040a4)
+EVENT(PM_CMPLU_STALL_IFU,                     0x4004c)
+EVENT(PM_L1_DEMAND_WRITE,                     0x0408c)
+EVENT(PM_FLUSH_BR_MPRED,                      0x02084)
+EVENT(PM_MRK_DTLB_MISS_16G,                   0x1d05e)
+EVENT(PM_MRK_PTEG_FROM_DMEM,                  0x2d052)
+EVENT(PM_L2_RCST_DISP,                        0x36280)
+EVENT(PM_CMPLU_STALL,                         0x4000a)
+EVENT(PM_LSU_PARTIAL_CDF,                     0x0c0aa)
+EVENT(PM_DISP_CLB_HELD_SB,                    0x020a8)
+EVENT(PM_VSU0_FMA_DOUBLE,                     0x0a090)
+EVENT(PM_FXU0_BUSY_FXU1_IDLE,                 0x3000e)
+EVENT(PM_IC_DEMAND_CYC,                       0x10018)
+EVENT(PM_MRK_DATA_FROM_L21_SHR,               0x3d04e)
+EVENT(PM_MRK_LSU_FLUSH_UST,                   0x0d086)
+EVENT(PM_INST_PTEG_FROM_L3MISS,               0x2e058)
+EVENT(PM_VSU_DENORM,                          0x0a8ac)
+EVENT(PM_MRK_LSU_PARTIAL_CDF,                 0x0d080)
+EVENT(PM_INST_FROM_L21_SHR,                   0x3404e)
+EVENT(PM_IC_PREF_WRITE,                       0x0408e)
+EVENT(PM_BR_PRED,                             0x0409c)
+EVENT(PM_INST_FROM_DMEM,                      0x1404a)
+EVENT(PM_IC_PREF_CANCEL_ALL,                  0x04890)
+EVENT(PM_LSU_DC_PREF_STREAM_CONFIRM,          0x0d8b4)
+EVENT(PM_MRK_LSU_FLUSH_SRQ,                   0x0d08a)
+EVENT(PM_MRK_FIN_STALL_CYC,                   0x1003c)
+EVENT(PM_L2_RCST_DISP_FAIL_OTHER,             0x46280)
+EVENT(PM_VSU1_DD_ISSUED,                      0x0b098)
+EVENT(PM_PTEG_FROM_L31_SHR,                   0x2c056)
+EVENT(PM_DATA_FROM_L21_SHR,                   0x3c04e)
+EVENT(PM_LSU0_NCLD,                           0x0c08c)
+EVENT(PM_VSU1_4FLOP,                          0x0a09e)
+EVENT(PM_VSU1_8FLOP,                          0x0a0a2)
+EVENT(PM_VSU_8FLOP,                           0x0a8a0)
+EVENT(PM_LSU_LMQ_SRQ_EMPTY_CYC,               0x2003e)
+EVENT(PM_DTLB_MISS_64K,                       0x3c05e)
+EVENT(PM_THRD_CONC_RUN_INST,                  0x300f4)
+EVENT(PM_MRK_PTEG_FROM_L2,                    0x1d050)
+EVENT(PM_PB_SYS_PUMP,                         0x20081)
+EVENT(PM_VSU_FIN,                             0x0a8bc)
+EVENT(PM_MRK_DATA_FROM_L31_MOD,               0x1d044)
+EVENT(PM_THRD_PRIO_0_1_CYC,                   0x040b0)
+EVENT(PM_DERAT_MISS_64K,                      0x2c05c)
+EVENT(PM_PMC2_REWIND,                         0x30020)
+EVENT(PM_INST_FROM_L2,                        0x14040)
+EVENT(PM_GRP_BR_MPRED_NONSPEC,                0x1000a)
+EVENT(PM_INST_DISP,                           0x200f2)
+EVENT(PM_MEM0_RD_CANCEL_TOTAL,                0x30083)
+EVENT(PM_LSU0_DC_PREF_STREAM_CONFIRM,         0x0d0b4)
+EVENT(PM_L1_DCACHE_RELOAD_VALID,              0x300f6)
+EVENT(PM_VSU_SCALAR_DOUBLE_ISSUED,            0x0b888)
+EVENT(PM_L3_PREF_HIT,                         0x3f080)
+EVENT(PM_MRK_PTEG_FROM_L31_MOD,               0x1d054)
+EVENT(PM_CMPLU_STALL_STORE,                   0x2004a)
+EVENT(PM_MRK_FXU_FIN,                         0x20038)
+EVENT(PM_PMC4_OVERFLOW,                       0x10010)
+EVENT(PM_MRK_PTEG_FROM_L3,                    0x2d050)
+EVENT(PM_LSU0_LMQ_LHR_MERGE,                  0x0d098)
+EVENT(PM_BTAC_HIT,                            0x0508a)
+EVENT(PM_L3_RD_BUSY,                          0x4f082)
+EVENT(PM_LSU0_L1_SW_PREF,                     0x0c09c)
+EVENT(PM_INST_FROM_L2MISS,                    0x44048)
+EVENT(PM_LSU0_DC_PREF_STREAM_ALLOC,           0x0d0a8)
+EVENT(PM_L2_ST,                               0x16082)
+EVENT(PM_VSU0_DENORM,                         0x0a0ac)
+EVENT(PM_MRK_DATA_FROM_DL2L3_SHR,             0x3d044)
+EVENT(PM_BR_PRED_CR_TA,                       0x048aa)
+EVENT(PM_VSU0_FCONV,                          0x0a0b0)
+EVENT(PM_MRK_LSU_FLUSH_ULD,                   0x0d084)
+EVENT(PM_BTAC_MISS,                           0x05088)
+EVENT(PM_MRK_LD_MISS_EXPOSED_CYC_COUNT,       0x1003f)
+EVENT(PM_MRK_DATA_FROM_L2,                    0x1d040)
+EVENT(PM_LSU_DCACHE_RELOAD_VALID,             0x0d0a2)
+EVENT(PM_VSU_FMA,                             0x0a884)
+EVENT(PM_LSU0_FLUSH_SRQ,                      0x0c0bc)
+EVENT(PM_LSU1_L1_PREF,                        0x0d0ba)
+EVENT(PM_IOPS_CMPL,                           0x10014)
+EVENT(PM_L2_SYS_PUMP,                         0x36482)
+EVENT(PM_L2_RCLD_BUSY_RC_FULL,                0x46282)
+EVENT(PM_LSU_LMQ_S0_ALLOC,                    0x0d0a1)
+EVENT(PM_FLUSH_DISP_SYNC,                     0x02088)
+EVENT(PM_MRK_DATA_FROM_DL2L3_MOD_CYC,         0x4002a)
+EVENT(PM_L2_IC_INV,                           0x26180)
+EVENT(PM_MRK_DATA_FROM_L21_MOD_CYC,           0x40024)
+EVENT(PM_L3_PREF_LDST,                        0x0d8ac)
+EVENT(PM_LSU_SRQ_EMPTY_CYC,                   0x40008)
+EVENT(PM_LSU_LMQ_S0_VALID,                    0x0d0a0)
+EVENT(PM_FLUSH_PARTIAL,                       0x02086)
+EVENT(PM_VSU1_FMA_DOUBLE,                     0x0a092)
+EVENT(PM_1PLUS_PPC_DISP,                      0x400f2)
+EVENT(PM_DATA_FROM_L2MISS,                    0x200fe)
+EVENT(PM_SUSPENDED,                           0x00000)
+EVENT(PM_VSU0_FMA,                            0x0a084)
+EVENT(PM_CMPLU_STALL_SCALAR,                  0x40012)
+EVENT(PM_STCX_FAIL,                           0x0c09a)
+EVENT(PM_VSU0_FSQRT_FDIV_DOUBLE,              0x0a094)
+EVENT(PM_DC_PREF_DST,                         0x0d0b0)
+EVENT(PM_VSU1_SCAL_SINGLE_ISSUED,             0x0b086)
+EVENT(PM_L3_HIT,                              0x1f080)
+EVENT(PM_L2_GLOB_GUESS_WRONG,                 0x26482)
+EVENT(PM_MRK_DFU_FIN,                         0x20032)
+EVENT(PM_INST_FROM_L1,                        0x04080)
+EVENT(PM_BRU_FIN,                             0x10068)
+EVENT(PM_IC_DEMAND_REQ,                       0x04088)
+EVENT(PM_VSU1_FSQRT_FDIV_DOUBLE,              0x0a096)
+EVENT(PM_VSU1_FMA,                            0x0a086)
+EVENT(PM_MRK_LD_MISS_L1,                      0x20036)
+EVENT(PM_VSU0_2FLOP_DOUBLE,                   0x0a08c)
+EVENT(PM_LSU_DC_PREF_STRIDED_STREAM_CONFIRM,  0x0d8bc)
+EVENT(PM_INST_PTEG_FROM_L31_SHR,              0x2e056)
+EVENT(PM_MRK_LSU_REJECT_ERAT_MISS,            0x30064)
+EVENT(PM_MRK_DATA_FROM_L2MISS,                0x4d048)
+EVENT(PM_DATA_FROM_RL2L3_SHR,                 0x1c04c)
+EVENT(PM_INST_FROM_PREF,                      0x14046)
+EVENT(PM_VSU1_SQ,                             0x0b09e)
+EVENT(PM_L2_LD_DISP,                          0x36180)
+EVENT(PM_L2_DISP_ALL,                         0x46080)
+EVENT(PM_THRD_GRP_CMPL_BOTH_CYC,              0x10012)
+EVENT(PM_VSU_FSQRT_FDIV_DOUBLE,               0x0a894)
+EVENT(PM_BR_MPRED,                            0x400f6)
+EVENT(PM_INST_PTEG_FROM_DL2L3_SHR,            0x3e054)
+EVENT(PM_VSU_1FLOP,                           0x0a880)
+EVENT(PM_HV_CYC,                              0x2000a)
+EVENT(PM_MRK_LSU_FIN,                         0x40032)
+EVENT(PM_MRK_DATA_FROM_RL2L3_SHR,             0x1d04c)
+EVENT(PM_DTLB_MISS_16M,                       0x4c05e)
+EVENT(PM_LSU1_LMQ_LHR_MERGE,                  0x0d09a)
+EVENT(PM_IFU_FIN,                             0x40066)
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index d1821b8..56c67bc 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -53,37 +53,13 @@
 /*
  * Power7 event codes.
  */
-#define	PME_PM_CYC			0x1e
-#define	PME_PM_GCT_NOSLOT_CYC		0x100f8
-#define	PME_PM_CMPLU_STALL		0x4000a
-#define	PME_PM_INST_CMPL		0x2
-#define	PME_PM_LD_REF_L1		0xc880
-#define	PME_PM_LD_MISS_L1		0x400f0
-#define	PME_PM_BRU_FIN			0x10068
-#define	PME_PM_BR_MPRED			0x400f6
-
-#define PME_PM_CMPLU_STALL_FXU			0x20014
-#define PME_PM_CMPLU_STALL_DIV			0x40014
-#define PME_PM_CMPLU_STALL_SCALAR		0x40012
-#define PME_PM_CMPLU_STALL_SCALAR_LONG		0x20018
-#define PME_PM_CMPLU_STALL_VECTOR		0x2001c
-#define PME_PM_CMPLU_STALL_VECTOR_LONG		0x4004a
-#define PME_PM_CMPLU_STALL_LSU			0x20012
-#define PME_PM_CMPLU_STALL_REJECT		0x40016
-#define PME_PM_CMPLU_STALL_ERAT_MISS		0x40018
-#define PME_PM_CMPLU_STALL_DCACHE_MISS		0x20016
-#define PME_PM_CMPLU_STALL_STORE		0x2004a
-#define PME_PM_CMPLU_STALL_THRD			0x1001c
-#define PME_PM_CMPLU_STALL_IFU			0x4004c
-#define PME_PM_CMPLU_STALL_BRU			0x4004e
-#define PME_PM_GCT_NOSLOT_IC_MISS		0x2001a
-#define PME_PM_GCT_NOSLOT_BR_MPRED		0x4001a
-#define PME_PM_GCT_NOSLOT_BR_MPRED_IC_MISS	0x4001c
-#define PME_PM_GRP_CMPL				0x30004
-#define PME_PM_1PLUS_PPC_CMPL			0x100f2
-#define PME_PM_CMPLU_STALL_DFU			0x2003c
-#define PME_PM_RUN_CYC				0x200f4
-#define PME_PM_RUN_INST_CMPL			0x400fa
+#define EVENT(_name, _code) \
+	PME_##_name = _code,
+
+enum {
+#include "power7-events-list.h"
+};
+#undef EVENT
 
 /*
  * Layout of constraint bits:
@@ -398,96 +374,36 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 };
 
 
-GENERIC_EVENT_ATTR(cpu-cycles,			CYC);
-GENERIC_EVENT_ATTR(stalled-cycles-frontend,	GCT_NOSLOT_CYC);
-GENERIC_EVENT_ATTR(stalled-cycles-backend,	CMPLU_STALL);
-GENERIC_EVENT_ATTR(instructions,		INST_CMPL);
-GENERIC_EVENT_ATTR(cache-references,		LD_REF_L1);
-GENERIC_EVENT_ATTR(cache-misses,		LD_MISS_L1);
-GENERIC_EVENT_ATTR(branch-instructions,		BRU_FIN);
-GENERIC_EVENT_ATTR(branch-misses,		BR_MPRED);
-
-POWER_EVENT_ATTR(CYC,				CYC);
-POWER_EVENT_ATTR(GCT_NOSLOT_CYC,		GCT_NOSLOT_CYC);
-POWER_EVENT_ATTR(CMPLU_STALL,			CMPLU_STALL);
-POWER_EVENT_ATTR(INST_CMPL,			INST_CMPL);
-POWER_EVENT_ATTR(LD_REF_L1,			LD_REF_L1);
-POWER_EVENT_ATTR(LD_MISS_L1,			LD_MISS_L1);
-POWER_EVENT_ATTR(BRU_FIN,			BRU_FIN)
-POWER_EVENT_ATTR(BR_MPRED,			BR_MPRED);
-
-POWER_EVENT_ATTR(CMPLU_STALL_FXU,		CMPLU_STALL_FXU);
-POWER_EVENT_ATTR(CMPLU_STALL_DIV,		CMPLU_STALL_DIV);
-POWER_EVENT_ATTR(CMPLU_STALL_SCALAR,		CMPLU_STALL_SCALAR);
-POWER_EVENT_ATTR(CMPLU_STALL_SCALAR_LONG,	CMPLU_STALL_SCALAR_LONG);
-POWER_EVENT_ATTR(CMPLU_STALL_VECTOR,		CMPLU_STALL_VECTOR);
-POWER_EVENT_ATTR(CMPLU_STALL_VECTOR_LONG,	CMPLU_STALL_VECTOR_LONG);
-POWER_EVENT_ATTR(CMPLU_STALL_LSU,		CMPLU_STALL_LSU);
-POWER_EVENT_ATTR(CMPLU_STALL_REJECT,		CMPLU_STALL_REJECT);
-
-POWER_EVENT_ATTR(CMPLU_STALL_ERAT_MISS,		CMPLU_STALL_ERAT_MISS);
-POWER_EVENT_ATTR(CMPLU_STALL_DCACHE_MISS,	CMPLU_STALL_DCACHE_MISS);
-POWER_EVENT_ATTR(CMPLU_STALL_STORE,		CMPLU_STALL_STORE);
-POWER_EVENT_ATTR(CMPLU_STALL_THRD,		CMPLU_STALL_THRD);
-POWER_EVENT_ATTR(CMPLU_STALL_IFU,		CMPLU_STALL_IFU);
-POWER_EVENT_ATTR(CMPLU_STALL_BRU,		CMPLU_STALL_BRU);
-POWER_EVENT_ATTR(GCT_NOSLOT_IC_MISS,		GCT_NOSLOT_IC_MISS);
-
-POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED,		GCT_NOSLOT_BR_MPRED);
-POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED_IC_MISS,	GCT_NOSLOT_BR_MPRED_IC_MISS);
-POWER_EVENT_ATTR(GRP_CMPL,			GRP_CMPL);
-POWER_EVENT_ATTR(1PLUS_PPC_CMPL,		1PLUS_PPC_CMPL);
-POWER_EVENT_ATTR(CMPLU_STALL_DFU,		CMPLU_STALL_DFU);
-POWER_EVENT_ATTR(RUN_CYC,			RUN_CYC);
-POWER_EVENT_ATTR(RUN_INST_CMPL,			RUN_INST_CMPL);
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_GCT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend,	PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions,		PM_INST_CMPL);
+GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BRU_FIN);
+GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED);
+
+#define EVENT(_name, _code)     POWER_EVENT_ATTR(_name, _name);
+#include "power7-events-list.h"
+#undef EVENT
+
+#define EVENT(_name, _code)     POWER_EVENT_PTR(_name),
 
 static struct attribute *power7_events_attr[] = {
-	GENERIC_EVENT_PTR(CYC),
-	GENERIC_EVENT_PTR(GCT_NOSLOT_CYC),
-	GENERIC_EVENT_PTR(CMPLU_STALL),
-	GENERIC_EVENT_PTR(INST_CMPL),
-	GENERIC_EVENT_PTR(LD_REF_L1),
-	GENERIC_EVENT_PTR(LD_MISS_L1),
-	GENERIC_EVENT_PTR(BRU_FIN),
-	GENERIC_EVENT_PTR(BR_MPRED),
-
-	POWER_EVENT_PTR(CYC),
-	POWER_EVENT_PTR(GCT_NOSLOT_CYC),
-	POWER_EVENT_PTR(CMPLU_STALL),
-	POWER_EVENT_PTR(INST_CMPL),
-	POWER_EVENT_PTR(LD_REF_L1),
-	POWER_EVENT_PTR(LD_MISS_L1),
-	POWER_EVENT_PTR(BRU_FIN),
-	POWER_EVENT_PTR(BR_MPRED),
-
-	POWER_EVENT_PTR(CMPLU_STALL_FXU),
-	POWER_EVENT_PTR(CMPLU_STALL_DIV),
-	POWER_EVENT_PTR(CMPLU_STALL_SCALAR),
-	POWER_EVENT_PTR(CMPLU_STALL_SCALAR_LONG),
-	POWER_EVENT_PTR(CMPLU_STALL_VECTOR),
-	POWER_EVENT_PTR(CMPLU_STALL_VECTOR_LONG),
-	POWER_EVENT_PTR(CMPLU_STALL_LSU),
-	POWER_EVENT_PTR(CMPLU_STALL_REJECT),
-
-	POWER_EVENT_PTR(CMPLU_STALL_ERAT_MISS),
-	POWER_EVENT_PTR(CMPLU_STALL_DCACHE_MISS),
-	POWER_EVENT_PTR(CMPLU_STALL_STORE),
-	POWER_EVENT_PTR(CMPLU_STALL_THRD),
-	POWER_EVENT_PTR(CMPLU_STALL_IFU),
-	POWER_EVENT_PTR(CMPLU_STALL_BRU),
-	POWER_EVENT_PTR(GCT_NOSLOT_IC_MISS),
-	POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED),
-
-	POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED_IC_MISS),
-	POWER_EVENT_PTR(GRP_CMPL),
-	POWER_EVENT_PTR(1PLUS_PPC_CMPL),
-	POWER_EVENT_PTR(CMPLU_STALL_DFU),
-	POWER_EVENT_PTR(RUN_CYC),
-	POWER_EVENT_PTR(RUN_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_CYC),
+	GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC),
+	GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+	GENERIC_EVENT_PTR(PM_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_LD_REF_L1),
+	GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+	GENERIC_EVENT_PTR(PM_BRU_FIN),
+	GENERIC_EVENT_PTR(PM_BR_MPRED),
+
+	#include "power7-events-list.h"
+	#undef EVENT
 	NULL
 };
 
-
 static struct attribute_group power7_pmu_events_group = {
 	.name = "events",
 	.attrs = power7_events_attr,
-- 
cgit v0.10.2


From b3824404ebbd489858f3a7097c715120118fa5cd Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 2 Jul 2013 13:27:21 -0600
Subject: perf evlist: Fix use of uninitialized variable

Fixes valgrind complaint:
==1870== Syscall param write(buf) points to uninitialised byte(s)
==1870==    at 0x4E3F5B0: __write_nocancel (in /lib64/libpthread-2.14.90.so)
==1870==    by 0x449D7C: perf_evlist__start_workload (evlist.c:846)
==1870==    by 0x427BC1: cmd_record (builtin-record.c:561)
==1870==    by 0x419D72: run_builtin (perf.c:319)
==1870==    by 0x4195F2: main (perf.c:376)
==1870==  Address 0x7feffcdd7 is on thread 1's stack

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1372793245-4136-3-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 4a901be..d8f34e0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -838,7 +838,7 @@ out_close_ready_pipe:
 int perf_evlist__start_workload(struct perf_evlist *evlist)
 {
 	if (evlist->workload.cork_fd > 0) {
-		char bf;
+		char bf = 0;
 		int ret;
 		/*
 		 * Remove the cork, let it rip!
-- 
cgit v0.10.2


From 0142dab01cd690f6e376f1fb4d4462beb054dfaf Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 2 Jul 2013 13:27:23 -0600
Subject: perf tools: Don't free list head in parse_events__free_terms

Function should only be freeing the entries in the list in case of
failure, as those were allocated there, not the list_head itself.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1372793245-4136-5-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index ef72e98..bcf83ee 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1260,6 +1260,4 @@ void parse_events__free_terms(struct list_head *terms)
 
 	list_for_each_entry_safe(term, h, terms, list)
 		free(term);
-
-	free(terms);
 }
-- 
cgit v0.10.2


From c549aca50134640537bf0fbae43c08fd5ff91932 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 2 Jul 2013 13:27:24 -0600
Subject: perf tests: Make terms a stack variable in test_term

No need to malloc the memory for it.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1372793245-4136-6-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index ad950f5..344c844 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1246,24 +1246,20 @@ static int test_events(struct evlist_test *events, unsigned cnt)
 
 static int test_term(struct terms_test *t)
 {
-	struct list_head *terms;
+	struct list_head terms;
 	int ret;
 
-	terms = malloc(sizeof(*terms));
-	if (!terms)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(terms);
+	INIT_LIST_HEAD(&terms);
 
-	ret = parse_events_terms(terms, t->str);
+	ret = parse_events_terms(&terms, t->str);
 	if (ret) {
 		pr_debug("failed to parse terms '%s', err %d\n",
 			 t->str , ret);
 		return ret;
 	}
 
-	ret = t->check(terms);
-	parse_events__free_terms(terms);
+	ret = t->check(&terms);
+	parse_events__free_terms(&terms);
 
 	return ret;
 }
-- 
cgit v0.10.2


From c5cd8ac07e7ad5f21b1930b23b2e1bb231958430 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 2 Jul 2013 13:27:25 -0600
Subject: perf parse events: Demystify memory allocations

List heads are currently allocated way down the function chain in
__add_event and add_tracepoint and then freed when the scanner code
calls parse_events_update_lists.

Be more explicit with where memory is allocated and who should free it. With
this patch the list_head is allocated in the scanner code and freed when the
scanner code calls parse_events_update_lists.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1372793245-4136-7-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index bcf83ee..e853769 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -264,40 +264,29 @@ const char *event_type(int type)
 
 
-static int __add_event(struct list_head **_list, int *idx,
+static int __add_event(struct list_head *list, int *idx,
 		       struct perf_event_attr *attr,
 		       char *name, struct cpu_map *cpus)
 {
 	struct perf_evsel *evsel;
-	struct list_head *list = *_list;
-
-	if (!list) {
-		list = malloc(sizeof(*list));
-		if (!list)
-			return -ENOMEM;
-		INIT_LIST_HEAD(list);
-	}
 
 	event_attr_init(attr);
 
 	evsel = perf_evsel__new(attr, (*idx)++);
-	if (!evsel) {
-		free(list);
+	if (!evsel)
 		return -ENOMEM;
-	}
 
 	evsel->cpus = cpus;
 	if (name)
 		evsel->name = strdup(name);
 	list_add_tail(&evsel->node, list);
-	*_list = list;
 	return 0;
 }
 
-static int add_event(struct list_head **_list, int *idx,
+static int add_event(struct list_head *list, int *idx,
 		     struct perf_event_attr *attr, char *name)
 {
-	return __add_event(_list, idx, attr, name, NULL);
+	return __add_event(list, idx, attr, name, NULL);
 }
 
 static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -318,7 +307,7 @@ static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES]
 	return -1;
 }
 
-int parse_events_add_cache(struct list_head **list, int *idx,
+int parse_events_add_cache(struct list_head *list, int *idx,
 			   char *type, char *op_result1, char *op_result2)
 {
 	struct perf_event_attr attr;
@@ -379,31 +368,21 @@ int parse_events_add_cache(struct list_head **list, int *idx,
 	return add_event(list, idx, &attr, name);
 }
 
-static int add_tracepoint(struct list_head **listp, int *idx,
+static int add_tracepoint(struct list_head *list, int *idx,
 			  char *sys_name, char *evt_name)
 {
 	struct perf_evsel *evsel;
-	struct list_head *list = *listp;
-
-	if (!list) {
-		list = malloc(sizeof(*list));
-		if (!list)
-			return -ENOMEM;
-		INIT_LIST_HEAD(list);
-	}
 
 	evsel = perf_evsel__newtp(sys_name, evt_name, (*idx)++);
-	if (!evsel) {
-		free(list);
+	if (!evsel)
 		return -ENOMEM;
-	}
 
 	list_add_tail(&evsel->node, list);
-	*listp = list;
+
 	return 0;
 }
 
-static int add_tracepoint_multi_event(struct list_head **list, int *idx,
+static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 				      char *sys_name, char *evt_name)
 {
 	char evt_path[MAXPATHLEN];
@@ -435,7 +414,7 @@ static int add_tracepoint_multi_event(struct list_head **list, int *idx,
 	return ret;
 }
 
-static int add_tracepoint_event(struct list_head **list, int *idx,
+static int add_tracepoint_event(struct list_head *list, int *idx,
 				char *sys_name, char *evt_name)
 {
 	return strpbrk(evt_name, "*?") ?
@@ -443,7 +422,7 @@ static int add_tracepoint_event(struct list_head **list, int *idx,
 	       add_tracepoint(list, idx, sys_name, evt_name);
 }
 
-static int add_tracepoint_multi_sys(struct list_head **list, int *idx,
+static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 				    char *sys_name, char *evt_name)
 {
 	struct dirent *events_ent;
@@ -475,7 +454,7 @@ static int add_tracepoint_multi_sys(struct list_head **list, int *idx,
 	return ret;
 }
 
-int parse_events_add_tracepoint(struct list_head **list, int *idx,
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
 				char *sys, char *event)
 {
 	int ret;
@@ -530,7 +509,7 @@ do {					\
 	return 0;
 }
 
-int parse_events_add_breakpoint(struct list_head **list, int *idx,
+int parse_events_add_breakpoint(struct list_head *list, int *idx,
 				void *ptr, char *type)
 {
 	struct perf_event_attr attr;
@@ -611,7 +590,7 @@ static int config_attr(struct perf_event_attr *attr,
 	return 0;
 }
 
-int parse_events_add_numeric(struct list_head **list, int *idx,
+int parse_events_add_numeric(struct list_head *list, int *idx,
 			     u32 type, u64 config,
 			     struct list_head *head_config)
 {
@@ -644,7 +623,7 @@ static char *pmu_event_name(struct list_head *head_terms)
 	return NULL;
 }
 
-int parse_events_add_pmu(struct list_head **list, int *idx,
+int parse_events_add_pmu(struct list_head *list, int *idx,
 			 char *name, struct list_head *head_config)
 {
 	struct perf_event_attr attr;
@@ -687,6 +666,7 @@ void parse_events__set_leader(char *name, struct list_head *list)
 	leader->group_name = name ? strdup(name) : NULL;
 }
 
+/* list_event is assumed to point to malloc'ed memory */
 void parse_events_update_lists(struct list_head *list_event,
 			       struct list_head *list_all)
 {
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 080f7cf..f1cb4c4 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -85,16 +85,16 @@ void parse_events__free_terms(struct list_head *terms);
 int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
-int parse_events_add_tracepoint(struct list_head **list, int *idx,
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
 				char *sys, char *event);
-int parse_events_add_numeric(struct list_head **list, int *idx,
+int parse_events_add_numeric(struct list_head *list, int *idx,
 			     u32 type, u64 config,
 			     struct list_head *head_config);
-int parse_events_add_cache(struct list_head **list, int *idx,
+int parse_events_add_cache(struct list_head *list, int *idx,
 			   char *type, char *op_result1, char *op_result2);
-int parse_events_add_breakpoint(struct list_head **list, int *idx,
+int parse_events_add_breakpoint(struct list_head *list, int *idx,
 				void *ptr, char *type);
-int parse_events_add_pmu(struct list_head **list, int *idx,
+int parse_events_add_pmu(struct list_head *list, int *idx,
 			 char *pmu , struct list_head *head_config);
 void parse_events__set_leader(char *name, struct list_head *list);
 void parse_events_update_lists(struct list_head *list_event,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index afc44c1..4eb67ec 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -22,6 +22,13 @@ do { \
 		YYABORT; \
 } while (0)
 
+#define ALLOC_LIST(list) \
+do { \
+	list = malloc(sizeof(*list)); \
+	ABORT_ON(!list);              \
+	INIT_LIST_HEAD(list);         \
+} while (0)
+
 static inc_group_count(struct list_head *list,
 		       struct parse_events_evlist *data)
 {
@@ -196,9 +203,10 @@ event_pmu:
 PE_NAME '/' event_config '/'
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3));
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, $3));
 	parse_events__free_terms($3);
 	$$ = list;
 }
@@ -212,11 +220,12 @@ event_legacy_symbol:
 value_sym '/' event_config '/'
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 	int type = $1 >> 16;
 	int config = $1 & 255;
 
-	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_numeric(list, &data->idx,
 					  type, config, $3));
 	parse_events__free_terms($3);
 	$$ = list;
@@ -225,11 +234,12 @@ value_sym '/' event_config '/'
 value_sym sep_slash_dc
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 	int type = $1 >> 16;
 	int config = $1 & 255;
 
-	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_numeric(list, &data->idx,
 					  type, config, NULL));
 	$$ = list;
 }
@@ -238,27 +248,30 @@ event_legacy_cache:
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5));
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5));
 	$$ = list;
 }
 |
 PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL));
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL));
 	$$ = list;
 }
 |
 PE_NAME_CACHE_TYPE
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL));
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL));
 	$$ = list;
 }
 
@@ -266,9 +279,10 @@ event_legacy_mem:
 PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_breakpoint(list, &data->idx,
 					     (void *) $2, $4));
 	$$ = list;
 }
@@ -276,9 +290,10 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
 PE_PREFIX_MEM PE_VALUE sep_dc
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_breakpoint(&list, &data->idx,
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_breakpoint(list, &data->idx,
 					     (void *) $2, NULL));
 	$$ = list;
 }
@@ -287,9 +302,10 @@ event_legacy_tracepoint:
 PE_NAME ':' PE_NAME
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3));
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_tracepoint(list, &data->idx, $1, $3));
 	$$ = list;
 }
 
@@ -297,9 +313,10 @@ event_legacy_numeric:
 PE_VALUE ':' PE_VALUE
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_numeric(&list, &data->idx, (u32)$1, $3, NULL));
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_numeric(list, &data->idx, (u32)$1, $3, NULL));
 	$$ = list;
 }
 
@@ -307,9 +324,10 @@ event_legacy_raw:
 PE_RAW
 {
 	struct parse_events_evlist *data = _data;
-	struct list_head *list = NULL;
+	struct list_head *list;
 
-	ABORT_ON(parse_events_add_numeric(&list, &data->idx,
+	ALLOC_LIST(list);
+	ABORT_ON(parse_events_add_numeric(list, &data->idx,
 					  PERF_TYPE_RAW, $1, NULL));
 	$$ = list;
 }
-- 
cgit v0.10.2


From 9f1efa82640b4f06c8f6c847f088c53e4100395c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:16 +0900
Subject: tools lib traceevent: Remove unused install targets

The html_install, img_install, install_plugin and install_python are
unused in the Makefile.  Get rid of them.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmig.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 0b0a907..fd7510d 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -39,13 +39,8 @@ bindir_relative = bin
 bindir = $(prefix)/$(bindir_relative)
 man_dir = $(prefix)/share/man
 man_dir_SQ = '$(subst ','\'',$(man_dir))'
-html_install = $(prefix)/share/kernelshark/html
-html_install_SQ = '$(subst ','\'',$(html_install))'
-img_install = $(prefix)/share/kernelshark/html/images
-img_install_SQ = '$(subst ','\'',$(img_install))'
 
-export man_dir man_dir_SQ html_install html_install_SQ INSTALL
-export img_install img_install_SQ
+export man_dir man_dir_SQ INSTALL
 export DESTDIR DESTDIR_SQ
 
 # copy a bit from Linux kbuild
@@ -300,7 +295,7 @@ define do_install
 	$(INSTALL) $1 '$(DESTDIR_SQ)$2'
 endef
 
-install_lib: all_cmd install_plugins install_python
+install_lib: all_cmd
 	$(Q)$(call do_install,$(LIB_FILE),$(bindir_SQ))
 
 install: install_lib
-- 
cgit v0.10.2


From 4ccdf57d46843f5c03e390bdb652c9744e30ee20 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:17 +0900
Subject: tools lib traceevent: Get rid of unused gui target

It's came from trace-cmd's kernelshark which is not a part of
libtraceevent.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index fd7510d..4505de8 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -71,10 +71,7 @@ $(if $(BUILD_OUTPUT),, \
 
 all: sub-make
 
-gui: force
-	$(call build_output, all_cmd)
-
-$(filter-out gui,$(MAKECMDGOALS)): sub-make
+$(MAKECMDGOALS): sub-make
 
 sub-make: force
 	$(call build_output, $(MAKECMDGOALS))
@@ -253,9 +250,6 @@ define check_deps
 		$(RM) $@.$$$$
 endef
 
-$(gui_deps): ks_version.h
-$(non_gui_deps): tc_version.h
-
 $(all_deps): .%.d: $(src)/%.c
 	$(Q)$(call check_deps)
 
-- 
cgit v0.10.2


From 79d5adf06dd530fe6a9ab3d086b2d23eb7560491 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:18 +0900
Subject: tools lib traceevent: Add const qualifier to string arguments

If pevent_register_event_handler() received a string literal as
@sys_name or @event_name parameter, it emitted a warning about const
qualifier removal.  Since they're not modified in the function we can
make it have const qualifier.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-4-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 82b0606..d1c2a6a 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5450,10 +5450,9 @@ int pevent_register_print_function(struct pevent *pevent,
  * If @id is >= 0, then it is used to find the event.
  * else @sys_name and @event_name are used.
  */
-int pevent_register_event_handler(struct pevent *pevent,
-				  int id, char *sys_name, char *event_name,
-				  pevent_event_handler_func func,
-				  void *context)
+int pevent_register_event_handler(struct pevent *pevent, int id,
+				  const char *sys_name, const char *event_name,
+				  pevent_event_handler_func func, void *context)
 {
 	struct event_format *event;
 	struct event_handler *handle;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 7be7e89..bfceab9 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -561,7 +561,8 @@ int pevent_print_num_field(struct trace_seq *s, const char *fmt,
 			   struct event_format *event, const char *name,
 			   struct pevent_record *record, int err);
 
-int pevent_register_event_handler(struct pevent *pevent, int id, char *sys_name, char *event_name,
+int pevent_register_event_handler(struct pevent *pevent, int id,
+				  const char *sys_name, const char *event_name,
 				  pevent_event_handler_func func, void *context);
 int pevent_register_print_function(struct pevent *pevent,
 				   pevent_func_handler func,
-- 
cgit v0.10.2


From 6a48aec3a7179b0cdae2339d5a4072214ee6c6fe Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:19 +0900
Subject: tools lib traceevent: Add trace_seq_reset()

Sometimes it'd be useful if existing trace_seq can be reused.  But
currently it's impossible since there's no API to reset the trace_seq.
Let's add trace_seq_reset() for this case.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-5-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index bfceab9..39f0c1d 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -69,6 +69,7 @@ struct trace_seq {
 };
 
 void trace_seq_init(struct trace_seq *s);
+void trace_seq_reset(struct trace_seq *s);
 void trace_seq_destroy(struct trace_seq *s);
 
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
index a57db80..d7f2e68 100644
--- a/tools/lib/traceevent/trace-seq.c
+++ b/tools/lib/traceevent/trace-seq.c
@@ -49,6 +49,19 @@ void trace_seq_init(struct trace_seq *s)
 }
 
 /**
+ * trace_seq_reset - re-initialize the trace_seq structure
+ * @s: a pointer to the trace_seq structure to reset
+ */
+void trace_seq_reset(struct trace_seq *s)
+{
+	if (!s)
+		return;
+	TRACE_SEQ_CHECK(s);
+	s->len = 0;
+	s->readpos = 0;
+}
+
+/**
  * trace_seq_destroy - free up memory of a trace_seq
  * @s: a pointer to the trace_seq to free the buffer
  *
-- 
cgit v0.10.2


From 012ac692575b1ea6ed930871850584e4c64f1382 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:20 +0900
Subject: tools lib traceevent: Add page_size field to pevent

The page size of traced system can be different than current system's
because the recorded data file might be analyzed in a different machine.
In this case we should use original page size of traced system when
accessing the data file, so this information needs to be saved.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 39f0c1d..c37b202 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -400,6 +400,7 @@ struct pevent {
 
 	int cpus;
 	int long_size;
+	int page_size;
 
 	struct cmdline *cmdlines;
 	struct cmdline_list *cmdlist;
@@ -621,6 +622,16 @@ static inline void pevent_set_long_size(struct pevent *pevent, int long_size)
 	pevent->long_size = long_size;
 }
 
+static inline int pevent_get_page_size(struct pevent *pevent)
+{
+	return pevent->page_size;
+}
+
+static inline void pevent_set_page_size(struct pevent *pevent, int _page_size)
+{
+	pevent->page_size = _page_size;
+}
+
 static inline int pevent_is_file_bigendian(struct pevent *pevent)
 {
 	return pevent->file_bigendian;
-- 
cgit v0.10.2


From d6c25223f6067c6889d8fc3f9576d34bbac161b0 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:21 +0900
Subject: tools lib traceevent: Port kbuffer parser routines

kbuffer code is for parsing ftrace ring-buffer binary data and used
for trace-cmd.  Move the code here in order to be used more widely.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Original-patch-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-7-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 4505de8..0794acc 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -181,6 +181,7 @@ $(obj)/%.o: $(src)/%.c
 	$(Q)$(call do_compile)
 
 PEVENT_LIB_OBJS = event-parse.o trace-seq.o parse-filter.o parse-utils.o
+PEVENT_LIB_OBJS += kbuffer-parse.o
 
 ALL_OBJS = $(PEVENT_LIB_OBJS)
 
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
new file mode 100644
index 0000000..dcc6652
--- /dev/null
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -0,0 +1,732 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "kbuffer.h"
+
+#define MISSING_EVENTS (1 << 31)
+#define MISSING_STORED (1 << 30)
+
+#define COMMIT_MASK ((1 << 27) - 1)
+
+enum {
+	KBUFFER_FL_HOST_BIG_ENDIAN	= (1<<0),
+	KBUFFER_FL_BIG_ENDIAN		= (1<<1),
+	KBUFFER_FL_LONG_8		= (1<<2),
+	KBUFFER_FL_OLD_FORMAT		= (1<<3),
+};
+
+#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN)
+
+/** kbuffer
+ * @timestamp		- timestamp of current event
+ * @lost_events		- # of lost events between this subbuffer and previous
+ * @flags		- special flags of the kbuffer
+ * @subbuffer		- pointer to the sub-buffer page
+ * @data		- pointer to the start of data on the sub-buffer page
+ * @index		- index from @data to the @curr event data
+ * @curr		- offset from @data to the start of current event
+ *			   (includes metadata)
+ * @next		- offset from @data to the start of next event
+ * @size		- The size of data on @data
+ * @start		- The offset from @subbuffer where @data lives
+ *
+ * @read_4		- Function to read 4 raw bytes (may swap)
+ * @read_8		- Function to read 8 raw bytes (may swap)
+ * @read_long		- Function to read a long word (4 or 8 bytes with needed swap)
+ */
+struct kbuffer {
+	unsigned long long 	timestamp;
+	long long		lost_events;
+	unsigned long		flags;
+	void			*subbuffer;
+	void			*data;
+	unsigned int		index;
+	unsigned int		curr;
+	unsigned int		next;
+	unsigned int		size;
+	unsigned int		start;
+
+	unsigned int (*read_4)(void *ptr);
+	unsigned long long (*read_8)(void *ptr);
+	unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr);
+	int (*next_event)(struct kbuffer *kbuf);
+};
+
+static void *zmalloc(size_t size)
+{
+	return calloc(1, size);
+}
+
+static int host_is_bigendian(void)
+{
+	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
+	unsigned int *ptr;
+
+	ptr = (unsigned int *)str;
+	return *ptr == 0x01020304;
+}
+
+static int do_swap(struct kbuffer *kbuf)
+{
+	return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) &
+		ENDIAN_MASK;
+}
+
+static unsigned long long __read_8(void *ptr)
+{
+	unsigned long long data = *(unsigned long long *)ptr;
+
+	return data;
+}
+
+static unsigned long long __read_8_sw(void *ptr)
+{
+	unsigned long long data = *(unsigned long long *)ptr;
+	unsigned long long swap;
+
+	swap = ((data & 0xffULL) << 56) |
+		((data & (0xffULL << 8)) << 40) |
+		((data & (0xffULL << 16)) << 24) |
+		((data & (0xffULL << 24)) << 8) |
+		((data & (0xffULL << 32)) >> 8) |
+		((data & (0xffULL << 40)) >> 24) |
+		((data & (0xffULL << 48)) >> 40) |
+		((data & (0xffULL << 56)) >> 56);
+
+	return swap;
+}
+
+static unsigned int __read_4(void *ptr)
+{
+	unsigned int data = *(unsigned int *)ptr;
+
+	return data;
+}
+
+static unsigned int __read_4_sw(void *ptr)
+{
+	unsigned int data = *(unsigned int *)ptr;
+	unsigned int swap;
+
+	swap = ((data & 0xffULL) << 24) |
+		((data & (0xffULL << 8)) << 8) |
+		((data & (0xffULL << 16)) >> 8) |
+		((data & (0xffULL << 24)) >> 24);
+
+	return swap;
+}
+
+static unsigned long long read_8(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_8(ptr);
+}
+
+static unsigned int read_4(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_4(ptr);
+}
+
+static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_8(ptr);
+}
+
+static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_4(ptr);
+}
+
+static unsigned long long read_long(struct kbuffer *kbuf, void *ptr)
+{
+	return kbuf->read_long(kbuf, ptr);
+}
+
+static int calc_index(struct kbuffer *kbuf, void *ptr)
+{
+	return (unsigned long)ptr - (unsigned long)kbuf->data;
+}
+
+static int __next_event(struct kbuffer *kbuf);
+
+/**
+ * kbuffer_alloc - allocat a new kbuffer
+ * @size;	enum to denote size of word
+ * @endian:	enum to denote endianness
+ *
+ * Allocates and returns a new kbuffer.
+ */
+struct kbuffer *
+kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian)
+{
+	struct kbuffer *kbuf;
+	int flags = 0;
+
+	switch (size) {
+	case KBUFFER_LSIZE_4:
+		break;
+	case KBUFFER_LSIZE_8:
+		flags |= KBUFFER_FL_LONG_8;
+		break;
+	default:
+		return NULL;
+	}
+
+	switch (endian) {
+	case KBUFFER_ENDIAN_LITTLE:
+		break;
+	case KBUFFER_ENDIAN_BIG:
+		flags |= KBUFFER_FL_BIG_ENDIAN;
+		break;
+	default:
+		return NULL;
+	}
+
+	kbuf = zmalloc(sizeof(*kbuf));
+	if (!kbuf)
+		return NULL;
+
+	kbuf->flags = flags;
+
+	if (host_is_bigendian())
+		kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN;
+
+	if (do_swap(kbuf)) {
+		kbuf->read_8 = __read_8_sw;
+		kbuf->read_4 = __read_4_sw;
+	} else {
+		kbuf->read_8 = __read_8;
+		kbuf->read_4 = __read_4;
+	}
+
+	if (kbuf->flags & KBUFFER_FL_LONG_8)
+		kbuf->read_long = __read_long_8;
+	else
+		kbuf->read_long = __read_long_4;
+
+	/* May be changed by kbuffer_set_old_format() */
+	kbuf->next_event = __next_event;
+
+	return kbuf;
+}
+
+/** kbuffer_free - free an allocated kbuffer
+ * @kbuf:	The kbuffer to free
+ *
+ * Can take NULL as a parameter.
+ */
+void kbuffer_free(struct kbuffer *kbuf)
+{
+	free(kbuf);
+}
+
+static unsigned int type4host(struct kbuffer *kbuf,
+			      unsigned int type_len_ts)
+{
+	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+		return (type_len_ts >> 29) & 3;
+	else
+		return type_len_ts & 3;
+}
+
+static unsigned int len4host(struct kbuffer *kbuf,
+			     unsigned int type_len_ts)
+{
+	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+		return (type_len_ts >> 27) & 7;
+	else
+		return (type_len_ts >> 2) & 7;
+}
+
+static unsigned int type_len4host(struct kbuffer *kbuf,
+				  unsigned int type_len_ts)
+{
+	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+		return (type_len_ts >> 27) & ((1 << 5) - 1);
+	else
+		return type_len_ts & ((1 << 5) - 1);
+}
+
+static unsigned int ts4host(struct kbuffer *kbuf,
+			    unsigned int type_len_ts)
+{
+	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+		return type_len_ts & ((1 << 27) - 1);
+	else
+		return type_len_ts >> 5;
+}
+
+/*
+ * Linux 2.6.30 and earlier (not much ealier) had a different
+ * ring buffer format. It should be obsolete, but we handle it anyway.
+ */
+enum old_ring_buffer_type {
+	OLD_RINGBUF_TYPE_PADDING,
+	OLD_RINGBUF_TYPE_TIME_EXTEND,
+	OLD_RINGBUF_TYPE_TIME_STAMP,
+	OLD_RINGBUF_TYPE_DATA,
+};
+
+static unsigned int old_update_pointers(struct kbuffer *kbuf)
+{
+	unsigned long long extend;
+	unsigned int type_len_ts;
+	unsigned int type;
+	unsigned int len;
+	unsigned int delta;
+	unsigned int length;
+	void *ptr = kbuf->data + kbuf->curr;
+
+	type_len_ts = read_4(kbuf, ptr);
+	ptr += 4;
+
+	type = type4host(kbuf, type_len_ts);
+	len = len4host(kbuf, type_len_ts);
+	delta = ts4host(kbuf, type_len_ts);
+
+	switch (type) {
+	case OLD_RINGBUF_TYPE_PADDING:
+		kbuf->next = kbuf->size;
+		return 0;
+
+	case OLD_RINGBUF_TYPE_TIME_EXTEND:
+		extend = read_4(kbuf, ptr);
+		extend <<= TS_SHIFT;
+		extend += delta;
+		delta = extend;
+		ptr += 4;
+		break;
+
+	case OLD_RINGBUF_TYPE_TIME_STAMP:
+		/* should never happen! */
+		kbuf->curr = kbuf->size;
+		kbuf->next = kbuf->size;
+		kbuf->index = kbuf->size;
+		return -1;
+	default:
+		if (len)
+			length = len * 4;
+		else {
+			length = read_4(kbuf, ptr);
+			length -= 4;
+			ptr += 4;
+		}
+		break;
+	}
+
+	kbuf->timestamp += delta;
+	kbuf->index = calc_index(kbuf, ptr);
+	kbuf->next = kbuf->index + length;
+
+	return type;
+}
+
+static int __old_next_event(struct kbuffer *kbuf)
+{
+	int type;
+
+	do {
+		kbuf->curr = kbuf->next;
+		if (kbuf->next >= kbuf->size)
+			return -1;
+		type = old_update_pointers(kbuf);
+	} while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING);
+
+	return 0;
+}
+
+static unsigned int
+translate_data(struct kbuffer *kbuf, void *data, void **rptr,
+	       unsigned long long *delta, int *length)
+{
+	unsigned long long extend;
+	unsigned int type_len_ts;
+	unsigned int type_len;
+
+	type_len_ts = read_4(kbuf, data);
+	data += 4;
+
+	type_len = type_len4host(kbuf, type_len_ts);
+	*delta = ts4host(kbuf, type_len_ts);
+
+	switch (type_len) {
+	case KBUFFER_TYPE_PADDING:
+		*length = read_4(kbuf, data);
+		data += *length;
+		break;
+
+	case KBUFFER_TYPE_TIME_EXTEND:
+		extend = read_4(kbuf, data);
+		data += 4;
+		extend <<= TS_SHIFT;
+		extend += *delta;
+		*delta = extend;
+		*length = 0;
+		break;
+
+	case KBUFFER_TYPE_TIME_STAMP:
+		data += 12;
+		*length = 0;
+		break;
+	case 0:
+		*length = read_4(kbuf, data) - 4;
+		*length = (*length + 3) & ~3;
+		data += 4;
+		break;
+	default:
+		*length = type_len * 4;
+		break;
+	}
+
+	*rptr = data;
+
+	return type_len;
+}
+
+static unsigned int update_pointers(struct kbuffer *kbuf)
+{
+	unsigned long long delta;
+	unsigned int type_len;
+	int length;
+	void *ptr = kbuf->data + kbuf->curr;
+
+	type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
+
+	kbuf->timestamp += delta;
+	kbuf->index = calc_index(kbuf, ptr);
+	kbuf->next = kbuf->index + length;
+
+	return type_len;
+}
+
+/**
+ * kbuffer_translate_data - read raw data to get a record
+ * @swap:	Set to 1 if bytes in words need to be swapped when read
+ * @data:	The raw data to read
+ * @size:	Address to store the size of the event data.
+ *
+ * Returns a pointer to the event data. To determine the entire
+ * record size (record metadata + data) just add the difference between
+ * @data and the returned value to @size.
+ */
+void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
+{
+	unsigned long long delta;
+	struct kbuffer kbuf;
+	int type_len;
+	int length;
+	void *ptr;
+
+	if (swap) {
+		kbuf.read_8 = __read_8_sw;
+		kbuf.read_4 = __read_4_sw;
+		kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN;
+	} else {
+		kbuf.read_8 = __read_8;
+		kbuf.read_4 = __read_4;
+		kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0;
+	}
+
+	type_len = translate_data(&kbuf, data, &ptr, &delta, &length);
+	switch (type_len) {
+	case KBUFFER_TYPE_PADDING:
+	case KBUFFER_TYPE_TIME_EXTEND:
+	case KBUFFER_TYPE_TIME_STAMP:
+		return NULL;
+	};
+
+	*size = length;
+
+	return ptr;
+}
+
+static int __next_event(struct kbuffer *kbuf)
+{
+	int type;
+
+	do {
+		kbuf->curr = kbuf->next;
+		if (kbuf->next >= kbuf->size)
+			return -1;
+		type = update_pointers(kbuf);
+	} while (type == KBUFFER_TYPE_TIME_EXTEND || type == KBUFFER_TYPE_PADDING);
+
+	return 0;
+}
+
+static int next_event(struct kbuffer *kbuf)
+{
+	return kbuf->next_event(kbuf);
+}
+
+/**
+ * kbuffer_next_event - increment the current pointer
+ * @kbuf:	The kbuffer to read
+ * @ts:		Address to store the next record's timestamp (may be NULL to ignore)
+ *
+ * Increments the pointers into the subbuffer of the kbuffer to point to the
+ * next event so that the next kbuffer_read_event() will return a
+ * new event.
+ *
+ * Returns the data of the next event if a new event exists on the subbuffer,
+ * NULL otherwise.
+ */
+void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts)
+{
+	int ret;
+
+	if (!kbuf || !kbuf->subbuffer)
+		return NULL;
+
+	ret = next_event(kbuf);
+	if (ret < 0)
+		return NULL;
+
+	if (ts)
+		*ts = kbuf->timestamp;
+
+	return kbuf->data + kbuf->index;
+}
+
+/**
+ * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer
+ * @kbuf:	The kbuffer to load
+ * @subbuffer:	The subbuffer to load into @kbuf.
+ *
+ * Load a new subbuffer (page) into @kbuf. This will reset all
+ * the pointers and update the @kbuf timestamp. The next read will
+ * return the first event on @subbuffer.
+ *
+ * Returns 0 on succes, -1 otherwise.
+ */
+int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer)
+{
+	unsigned long long flags;
+	void *ptr = subbuffer;
+
+	if (!kbuf || !subbuffer)
+		return -1;
+
+	kbuf->subbuffer = subbuffer;
+
+	kbuf->timestamp = read_8(kbuf, ptr);
+	ptr += 8;
+
+	kbuf->curr = 0;
+
+	if (kbuf->flags & KBUFFER_FL_LONG_8)
+		kbuf->start = 16;
+	else
+		kbuf->start = 12;
+
+	kbuf->data = subbuffer + kbuf->start;
+
+	flags = read_long(kbuf, ptr);
+	kbuf->size = (unsigned int)flags & COMMIT_MASK;
+
+	if (flags & MISSING_EVENTS) {
+		if (flags & MISSING_STORED) {
+			ptr = kbuf->data + kbuf->size;
+			kbuf->lost_events = read_long(kbuf, ptr);
+		} else
+			kbuf->lost_events = -1;
+	} else
+		kbuf->lost_events = 0;
+
+	kbuf->index = 0;
+	kbuf->next = 0;
+
+	next_event(kbuf);
+
+	return 0;
+}
+
+/**
+ * kbuffer_read_event - read the next event in the kbuffer subbuffer
+ * @kbuf:	The kbuffer to read from
+ * @ts:		The address to store the timestamp of the event (may be NULL to ignore)
+ *
+ * Returns a pointer to the data part of the current event.
+ * NULL if no event is left on the subbuffer.
+ */
+void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts)
+{
+	if (!kbuf || !kbuf->subbuffer)
+		return NULL;
+
+	if (kbuf->curr >= kbuf->size)
+		return NULL;
+
+	if (ts)
+		*ts = kbuf->timestamp;
+	return kbuf->data + kbuf->index;
+}
+
+/**
+ * kbuffer_timestamp - Return the timestamp of the current event
+ * @kbuf:	The kbuffer to read from
+ *
+ * Returns the timestamp of the current (next) event.
+ */
+unsigned long long kbuffer_timestamp(struct kbuffer *kbuf)
+{
+	return kbuf->timestamp;
+}
+
+/**
+ * kbuffer_read_at_offset - read the event that is at offset
+ * @kbuf:	The kbuffer to read from
+ * @offset:	The offset into the subbuffer
+ * @ts:		The address to store the timestamp of the event (may be NULL to ignore)
+ *
+ * The @offset must be an index from the @kbuf subbuffer beginning.
+ * If @offset is bigger than the stored subbuffer, NULL will be returned.
+ *
+ * Returns the data of the record that is at @offset. Note, @offset does
+ * not need to be the start of the record, the offset just needs to be
+ * in the record (or beginning of it).
+ *
+ * Note, the kbuf timestamp and pointers are updated to the
+ * returned record. That is, kbuffer_read_event() will return the same
+ * data and timestamp, and kbuffer_next_event() will increment from
+ * this record.
+ */
+void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
+			     unsigned long long *ts)
+{
+	void *data;
+
+	if (offset < kbuf->start)
+		offset = 0;
+	else
+		offset -= kbuf->start;
+
+	/* Reset the buffer */
+	kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
+
+	while (kbuf->curr < offset) {
+		data = kbuffer_next_event(kbuf, ts);
+		if (!data)
+			break;
+	}
+
+	return data;
+}
+
+/**
+ * kbuffer_subbuffer_size - the size of the loaded subbuffer
+ * @kbuf:	The kbuffer to read from
+ *
+ * Returns the size of the subbuffer. Note, this size is
+ * where the last event resides. The stored subbuffer may actually be
+ * bigger due to padding and such.
+ */
+int kbuffer_subbuffer_size(struct kbuffer *kbuf)
+{
+	return kbuf->size;
+}
+
+/**
+ * kbuffer_curr_index - Return the index of the record
+ * @kbuf:	The kbuffer to read from
+ *
+ * Returns the index from the start of the data part of
+ * the subbuffer to the current location. Note this is not
+ * from the start of the subbuffer. An index of zero will
+ * point to the first record. Use kbuffer_curr_offset() for
+ * the actually offset (that can be used by kbuffer_read_at_offset())
+ */
+int kbuffer_curr_index(struct kbuffer *kbuf)
+{
+	return kbuf->curr;
+}
+
+/**
+ * kbuffer_curr_offset - Return the offset of the record
+ * @kbuf:	The kbuffer to read from
+ *
+ * Returns the offset from the start of the subbuffer to the
+ * current location.
+ */
+int kbuffer_curr_offset(struct kbuffer *kbuf)
+{
+	return kbuf->curr + kbuf->start;
+}
+
+/**
+ * kbuffer_event_size - return the size of the event data
+ * @kbuf:	The kbuffer to read
+ *
+ * Returns the size of the event data (the payload not counting
+ * the meta data of the record) of the current event.
+ */
+int kbuffer_event_size(struct kbuffer *kbuf)
+{
+	return kbuf->next - kbuf->index;
+}
+
+/**
+ * kbuffer_curr_size - return the size of the entire record
+ * @kbuf:	The kbuffer to read
+ *
+ * Returns the size of the entire record (meta data and payload)
+ * of the current event.
+ */
+int kbuffer_curr_size(struct kbuffer *kbuf)
+{
+	return kbuf->next - kbuf->curr;
+}
+
+/**
+ * kbuffer_missed_events - return the # of missed events from last event.
+ * @kbuf: 	The kbuffer to read from
+ *
+ * Returns the # of missed events (if recorded) before the current
+ * event. Note, only events on the beginning of a subbuffer can
+ * have missed events, all other events within the buffer will be
+ * zero.
+ */
+int kbuffer_missed_events(struct kbuffer *kbuf)
+{
+	/* Only the first event can have missed events */
+	if (kbuf->curr)
+		return 0;
+
+	return kbuf->lost_events;
+}
+
+/**
+ * kbuffer_set_old_forma - set the kbuffer to use the old format parsing
+ * @kbuf:	The kbuffer to set
+ *
+ * This is obsolete (or should be). The first kernels to use the
+ * new ring buffer had a slightly different ring buffer format
+ * (2.6.30 and earlier). It is still somewhat supported by kbuffer,
+ * but should not be counted on in the future.
+ */
+void kbuffer_set_old_format(struct kbuffer *kbuf)
+{
+	kbuf->flags |= KBUFFER_FL_OLD_FORMAT;
+
+	kbuf->next_event = __old_next_event;
+}
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
new file mode 100644
index 0000000..c831f64
--- /dev/null
+++ b/tools/lib/traceevent/kbuffer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef _KBUFFER_H
+#define _KBUFFER_H
+
+#ifndef TS_SHIFT
+#define TS_SHIFT		27
+#endif
+
+enum kbuffer_endian {
+	KBUFFER_ENDIAN_BIG,
+	KBUFFER_ENDIAN_LITTLE,
+};
+
+enum kbuffer_long_size {
+	KBUFFER_LSIZE_4,
+	KBUFFER_LSIZE_8,
+};
+
+enum {
+	KBUFFER_TYPE_PADDING		= 29,
+	KBUFFER_TYPE_TIME_EXTEND	= 30,
+	KBUFFER_TYPE_TIME_STAMP		= 31,
+};
+
+struct kbuffer;
+
+struct kbuffer *kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian);
+void kbuffer_free(struct kbuffer *kbuf);
+int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer);
+void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts);
+void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts);
+unsigned long long kbuffer_timestamp(struct kbuffer *kbuf);
+
+void *kbuffer_translate_data(int swap, void *data, unsigned int *size);
+
+void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, unsigned long long *ts);
+
+int kbuffer_curr_index(struct kbuffer *kbuf);
+
+int kbuffer_curr_offset(struct kbuffer *kbuf);
+int kbuffer_curr_size(struct kbuffer *kbuf);
+int kbuffer_event_size(struct kbuffer *kbuf);
+int kbuffer_missed_events(struct kbuffer *kbuf);
+int kbuffer_subbuffer_size(struct kbuffer *kbuf);
+
+void kbuffer_set_old_format(struct kbuffer *kbuf);
+
+#endif /* _K_BUFFER_H */
-- 
cgit v0.10.2


From 30f36762aab575f2894f7528d91ecd0b6e26e3e4 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:22 +0900
Subject: perf util: Save page size in a trace file to pevent

We now have page_size field in struct pevent, save the actual size of
the system.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-8-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index af215c0..c6b491b 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -349,6 +349,7 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
 	int show_funcs = 0;
 	int show_printk = 0;
 	ssize_t size = -1;
+	int file_page_size;
 	struct pevent *pevent;
 	int err;
 
@@ -393,10 +394,12 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
 		goto out;
 	long_size = buf[0];
 
-	page_size = read4(pevent);
-	if (!page_size)
+	file_page_size = read4(pevent);
+	if (!file_page_size)
 		goto out;
 
+	pevent_set_page_size(pevent, file_page_size);
+
 	err = read_header_files(pevent);
 	if (err)
 		goto out;
-- 
cgit v0.10.2


From 59657f9c8dd91363f1c94c1f749b33ecf626cfc2 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:23 +0900
Subject: perf util: Save long size of traced system

Save size of long type of system to struct pevent.  Since original
static variable was not used anywhere, just get rid of it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-9-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index c6b491b..6166294 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -41,7 +41,6 @@ static int input_fd;
 
 int file_bigendian;
 int host_bigendian;
-static int long_size;
 
 static ssize_t trace_data_size;
 static bool repipe;
@@ -231,12 +230,6 @@ static int read_header_files(struct pevent *pevent)
 	size = read8(pevent);
 	skip(size);
 
-	/*
-	 * The size field in the page is of type long,
-	 * use that instead, since it represents the kernel.
-	 */
-	long_size = header_page_size_size;
-
 	if (do_read(buf, 13) < 0)
 		return -1;
 
@@ -349,6 +342,7 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
 	int show_funcs = 0;
 	int show_printk = 0;
 	ssize_t size = -1;
+	int file_long_size;
 	int file_page_size;
 	struct pevent *pevent;
 	int err;
@@ -392,12 +386,13 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
 
 	if (do_read(buf, 1) < 0)
 		goto out;
-	long_size = buf[0];
+	file_long_size = buf[0];
 
 	file_page_size = read4(pevent);
 	if (!file_page_size)
 		goto out;
 
+	pevent_set_long_size(pevent, file_long_size);
 	pevent_set_page_size(pevent, file_page_size);
 
 	err = read_header_files(pevent);
-- 
cgit v0.10.2


From 63af28fa167374e470c32570b7c955c4d973dda4 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:24 +0900
Subject: perf util: Make file/host_bigendian variable local

They're not used anywhere, just make them local variables.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-10-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 6166294..0dd9fbd 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -39,9 +39,6 @@
 
 static int input_fd;
 
-int file_bigendian;
-int host_bigendian;
-
 static ssize_t trace_data_size;
 static bool repipe;
 
@@ -342,6 +339,8 @@ ssize_t trace_report(int fd, struct pevent **ppevent, bool __repipe)
 	int show_funcs = 0;
 	int show_printk = 0;
 	ssize_t size = -1;
+	int file_bigendian;
+	int host_bigendian;
 	int file_long_size;
 	int file_page_size;
 	struct pevent *pevent;
-- 
cgit v0.10.2


From 2b2efc7fb8a3ede936580e870c5f809e491c925c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:25 +0900
Subject: perf util: Skip reading header_event file

It seems perf does not parse header_event file so we can skip it as we
do for header_page file.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-11-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 0dd9fbd..fa45fca 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -212,7 +212,6 @@ static int read_ftrace_printk(struct pevent *pevent)
 static int read_header_files(struct pevent *pevent)
 {
 	unsigned long long size;
-	char *header_event;
 	char buf[BUFSIZ];
 	int ret = 0;
 
@@ -236,14 +235,8 @@ static int read_header_files(struct pevent *pevent)
 	}
 
 	size = read8(pevent);
-	header_event = malloc(size);
-	if (header_event == NULL)
-		return -1;
-
-	if (do_read(header_event, size) < 0)
-		ret = -1;
+	skip(size);
 
-	free(header_event);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 94b4d89e0021fe3e10415feede364a3fccaa2f89 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:26 +0900
Subject: perf util: Parse header_page to get proper long size

The header_page file describes the format of the ring buffer page
which is used by ftrace (not perf).  And size of "commit" field (I
guess it's older name was 'size') represents the real size of long
type used for kernel.  So update the pevent's long size.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-12-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index fa45fca..f211227 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -212,6 +212,7 @@ static int read_ftrace_printk(struct pevent *pevent)
 static int read_header_files(struct pevent *pevent)
 {
 	unsigned long long size;
+	char *header_page;
 	char buf[BUFSIZ];
 	int ret = 0;
 
@@ -224,7 +225,26 @@ static int read_header_files(struct pevent *pevent)
 	}
 
 	size = read8(pevent);
-	skip(size);
+
+	header_page = malloc(size);
+	if (header_page == NULL)
+		return -1;
+
+	if (do_read(header_page, size) < 0) {
+		pr_debug("did not read header page");
+		free(header_page);
+		return -1;
+	}
+
+	if (!pevent_parse_header_page(pevent, header_page, size,
+				      pevent_get_long_size(pevent))) {
+		/*
+		 * The commit field in the page is of type long,
+		 * use that instead, since it represents the kernel.
+		 */
+		pevent_set_long_size(pevent, pevent->header_page_size_size);
+	}
+	free(header_page);
 
 	if (do_read(buf, 13) < 0)
 		return -1;
-- 
cgit v0.10.2


From 9515b2eb7670a98fb9bdd8ceeefeea6ffb9f53fc Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:27 +0900
Subject: perf util: Get rid of unused header_page_* variables

They're not used anywhere and same information is kept in a pevent
already.  So let's get rid of them.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-13-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 4454835..0deae88 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -28,10 +28,6 @@
 #include "util.h"
 #include "trace-event.h"
 
-int header_page_size_size;
-int header_page_ts_size;
-int header_page_data_offset;
-
 bool latency_format;
 
 struct pevent *read_trace_init(int file_bigendian, int host_bigendian)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 11eb7fd..761c484 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -10,10 +10,6 @@ struct perf_sample;
 union perf_event;
 struct perf_tool;
 
-extern int header_page_size_size;
-extern int header_page_ts_size;
-extern int header_page_data_offset;
-
 extern bool latency_format;
 extern struct pevent *perf_pevent;
 
-- 
cgit v0.10.2


From 47390ae2afb6695c56810a9fc74fb930266addd0 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:28 +0900
Subject: perf script: Adopt latency_format variable

It's the only user of the variable, so move it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-14-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 92d4658..3de8979 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -24,6 +24,7 @@ static u64			last_timestamp;
 static u64			nr_unordered;
 extern const struct option	record_options[];
 static bool			no_callchain;
+static bool			latency_format;
 static bool			system_wide;
 static const char		*cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 0deae88..fe7a27d 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -28,8 +28,6 @@
 #include "util.h"
 #include "trace-event.h"
 
-bool latency_format;
-
 struct pevent *read_trace_init(int file_bigendian, int host_bigendian)
 {
 	struct pevent *pevent = pevent_alloc();
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 761c484..ed30c9c 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -10,7 +10,6 @@ struct perf_sample;
 union perf_event;
 struct perf_tool;
 
-extern bool latency_format;
 extern struct pevent *perf_pevent;
 
 enum {
-- 
cgit v0.10.2


From 077f159d0402ac90a3ea9975f4089a042e0be065 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:29 +0900
Subject: perf util: Rename read_*() functions in trace-event-info.c

It's confusing to have same name for two difference functions which does
something opposite way.  Since what they do in this file is read *AND*
writing some of tracing metadata files, rename them to record_*() looks
better to me.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-15-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index a42624a..a800f2b 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -101,7 +101,7 @@ out:
 	return err;
 }
 
-static int read_header_files(void)
+static int record_header_files(void)
 {
 	char *path;
 	struct stat st;
@@ -240,7 +240,7 @@ out:
 	return err;
 }
 
-static int read_ftrace_files(struct tracepoint_path *tps)
+static int record_ftrace_files(struct tracepoint_path *tps)
 {
 	char *path;
 	int ret;
@@ -269,7 +269,7 @@ static bool system_in_tp_list(char *sys, struct tracepoint_path *tps)
 	return false;
 }
 
-static int read_event_files(struct tracepoint_path *tps)
+static int record_event_files(struct tracepoint_path *tps)
 {
 	struct dirent *dent;
 	struct stat st;
@@ -344,7 +344,7 @@ out:
 	return err;
 }
 
-static int read_proc_kallsyms(void)
+static int record_proc_kallsyms(void)
 {
 	unsigned int size;
 	const char *path = "/proc/kallsyms";
@@ -362,7 +362,7 @@ static int read_proc_kallsyms(void)
 	return record_file(path, 4);
 }
 
-static int read_ftrace_printk(void)
+static int record_ftrace_printk(void)
 {
 	unsigned int size;
 	char *path;
@@ -539,19 +539,19 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs,
 	err = tracing_data_header();
 	if (err)
 		goto out;
-	err = read_header_files();
+	err = record_header_files();
 	if (err)
 		goto out;
-	err = read_ftrace_files(tps);
+	err = record_ftrace_files(tps);
 	if (err)
 		goto out;
-	err = read_event_files(tps);
+	err = record_event_files(tps);
 	if (err)
 		goto out;
-	err = read_proc_kallsyms();
+	err = record_proc_kallsyms();
 	if (err)
 		goto out;
-	err = read_ftrace_printk();
+	err = record_ftrace_printk();
 
 out:
 	/*
-- 
cgit v0.10.2


From cfd0e8cf7fe7ad6f6c08e23ca4410bd718eeaf0b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:30 +0900
Subject: perf util: No need to call read_trace_init() in tracing_data_header()

It's useless to call the read_trace_init() function at this time as we
don't need a returned pevent and it makes me confusing. :)

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-16-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index a800f2b..f3c9e55 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -476,8 +476,6 @@ static int tracing_data_header(void)
 	else
 		buf[0] = 0;
 
-	read_trace_init(buf[0], buf[0]);
-
 	if (write(output_fd, buf, 1) != 1)
 		return -1;
 
-- 
cgit v0.10.2


From 45a9ee1b3fa2d8c3fb7a0c7e5a7b77281a4239a6 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:20:31 +0900
Subject: perf util: Remove unused enum and macro in trace-event.h

They're internals of ftrace ring-buffer and not used in perf code
directly.  As it now resides on libtraceevent/kbuffer.h, just get rid of
them.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1370323231-14022-17-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index ed30c9c..669a64a 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -12,16 +12,6 @@ struct perf_tool;
 
 extern struct pevent *perf_pevent;
 
-enum {
-	RINGBUF_TYPE_PADDING		= 29,
-	RINGBUF_TYPE_TIME_EXTEND	= 30,
-	RINGBUF_TYPE_TIME_STAMP		= 31,
-};
-
-#ifndef TS_SHIFT
-#define TS_SHIFT		27
-#endif
-
 int bigendian(void);
 
 struct pevent *read_trace_init(int file_bigendian, int host_bigendian);
-- 
cgit v0.10.2


From f36f83f947ede547833e462696893f866df77324 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 14:46:19 +0900
Subject: perf sched: Move struct perf_sched definition out of cmd_sched()

For some reason it consumed quite amount of compile time when declared
as local variable, and it disappeared when moved out of the function.
Moving other variables/tables didn't help.

On my system this single-file-change build time reduced from 11s to 3s.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1370324779-16921-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index fed9ae4..fba4a94 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1662,28 +1662,29 @@ static int __cmd_record(int argc, const char **argv)
 	return cmd_record(i, rec_argv, NULL);
 }
 
+static const char default_sort_order[] = "avg, max, switch, runtime";
+static struct perf_sched sched = {
+	.tool = {
+		.sample		 = perf_sched__process_tracepoint_sample,
+		.comm		 = perf_event__process_comm,
+		.lost		 = perf_event__process_lost,
+		.fork		 = perf_event__process_fork,
+		.ordered_samples = true,
+	},
+	.cmp_pid	      = LIST_HEAD_INIT(sched.cmp_pid),
+	.sort_list	      = LIST_HEAD_INIT(sched.sort_list),
+	.start_work_mutex     = PTHREAD_MUTEX_INITIALIZER,
+	.work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER,
+	.curr_pid	      = { [0 ... MAX_CPUS - 1] = -1 },
+	.sort_order	      = default_sort_order,
+	.replay_repeat	      = 10,
+	.profile_cpu	      = -1,
+	.next_shortname1      = 'A',
+	.next_shortname2      = '0',
+};
+
 int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	const char default_sort_order[] = "avg, max, switch, runtime";
-	struct perf_sched sched = {
-		.tool = {
-			.sample		 = perf_sched__process_tracepoint_sample,
-			.comm		 = perf_event__process_comm,
-			.lost		 = perf_event__process_lost,
-			.fork		 = perf_event__process_fork,
-			.ordered_samples = true,
-		},
-		.cmp_pid	      = LIST_HEAD_INIT(sched.cmp_pid),
-		.sort_list	      = LIST_HEAD_INIT(sched.sort_list),
-		.start_work_mutex     = PTHREAD_MUTEX_INITIALIZER,
-		.work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER,
-		.curr_pid	      = { [0 ... MAX_CPUS - 1] = -1 },
-		.sort_order	      = default_sort_order,
-		.replay_repeat	      = 10,
-		.profile_cpu	      = -1,
-		.next_shortname1      = 'A',
-		.next_shortname2      = '0',
-	};
 	const struct option latency_options[] = {
 	OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
 		   "sort by key(s): runtime, switch, avg, max"),
-- 
cgit v0.10.2


From f1d9a530553eed9e598d1597a2a348f01810dd4a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 18:22:12 +0900
Subject: perf gtk/hists: Use GtkTreeStore instead of GtkListStore

The GtkTreeStore can save items in a tree-like way.  This is a
preparation for supporting callgraphs in the hist browser.

Reviewed-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370337737-30812-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 9708dd5..cb6a9b4 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -131,7 +131,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 	GType col_types[MAX_COLUMNS];
 	GtkCellRenderer *renderer;
 	struct sort_entry *se;
-	GtkListStore *store;
+	GtkTreeStore *store;
 	struct rb_node *nd;
 	GtkWidget *view;
 	int col_idx;
@@ -156,7 +156,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 		col_types[nr_cols++] = G_TYPE_STRING;
 	}
 
-	store = gtk_list_store_newv(nr_cols, col_types);
+	store = gtk_tree_store_newv(nr_cols, col_types);
 
 	view = gtk_tree_view_new();
 
@@ -199,7 +199,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 		if (percent < min_pcnt)
 			continue;
 
-		gtk_list_store_append(store, &iter);
+		gtk_tree_store_append(store, &iter, NULL);
 
 		col_idx = 0;
 
@@ -209,7 +209,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 			else
 				fmt->entry(&hpp, h);
 
-			gtk_list_store_set(store, &iter, col_idx++, s, -1);
+			gtk_tree_store_set(store, &iter, col_idx++, s, -1);
 		}
 
 		list_for_each_entry(se, &hist_entry__sort_list, list) {
@@ -219,7 +219,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 			se->se_snprintf(h, s, ARRAY_SIZE(s),
 					hists__col_len(hists, se->se_width_idx));
 
-			gtk_list_store_set(store, &iter, col_idx++, s, -1);
+			gtk_tree_store_set(store, &iter, col_idx++, s, -1);
 		}
 	}
 
-- 
cgit v0.10.2


From 2bbc5874251830fee170a0fc97fa5788717d2fd9 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 18:22:13 +0900
Subject: perf gtk/hists: Add support for callchains

Display callchain information in the symbol column.  It's only enabled
when recorded with -g and has symbol sort key.

Reviewed-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370337737-30812-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index cb6a9b4..226c7e1 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -124,6 +124,55 @@ void perf_gtk__init_hpp(void)
 				perf_gtk__hpp_color_overhead_guest_us;
 }
 
+static void callchain_list__sym_name(struct callchain_list *cl,
+				     char *bf, size_t bfsize)
+{
+	if (cl->ms.sym)
+		scnprintf(bf, bfsize, "%s", cl->ms.sym->name);
+	else
+		scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
+}
+
+static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
+				    GtkTreeIter *parent, int col)
+{
+	struct rb_node *nd;
+	bool has_single_node = (rb_first(root) == rb_last(root));
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		struct callchain_node *node;
+		struct callchain_list *chain;
+		GtkTreeIter iter, new_parent;
+		bool need_new_parent;
+
+		node = rb_entry(nd, struct callchain_node, rb_node);
+
+		new_parent = *parent;
+		need_new_parent = !has_single_node && (node->val_nr > 1);
+
+		list_for_each_entry(chain, &node->val, list) {
+			char buf[128];
+
+			gtk_tree_store_append(store, &iter, &new_parent);
+
+			callchain_list__sym_name(chain, buf, sizeof(buf));
+			gtk_tree_store_set(store, &iter, col, buf, -1);
+
+			if (need_new_parent) {
+				/*
+				 * Only show the top-most symbol in a callchain
+				 * if it's not the only callchain.
+				 */
+				new_parent = iter;
+				need_new_parent = false;
+			}
+		}
+
+		/* Now 'iter' contains info of the last callchain_list */
+		perf_gtk__add_callchain(&node->rb_root, store, &iter, col);
+	}
+}
+
 static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 				 float min_pcnt)
 {
@@ -135,6 +184,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 	struct rb_node *nd;
 	GtkWidget *view;
 	int col_idx;
+	int sym_col = -1;
 	int nr_cols;
 	char s[512];
 
@@ -153,6 +203,9 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 		if (se->elide)
 			continue;
 
+		if (se == &sort_sym)
+			sym_col = nr_cols;
+
 		col_types[nr_cols++] = G_TYPE_STRING;
 	}
 
@@ -183,6 +236,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 							    col_idx++, NULL);
 	}
 
+	if (symbol_conf.use_callchain && sort__has_sym) {
+		GtkTreeViewColumn *column;
+
+		column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), sym_col);
+		gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view), column);
+	}
+
 	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
 
 	g_object_unref(GTK_TREE_MODEL(store));
@@ -221,6 +281,11 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
 			gtk_tree_store_set(store, &iter, col_idx++, s, -1);
 		}
+
+		if (symbol_conf.use_callchain && sort__has_sym) {
+			perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
+						sym_col);
+		}
 	}
 
 	gtk_container_add(GTK_CONTAINER(window), view);
-- 
cgit v0.10.2


From cc60f24e225e50a0b57398f9ba105fd8ffcf4bb3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 18:22:14 +0900
Subject: perf gtk/hists: Display callchain overhead also

Display callchain percent value in the overhead column.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370337737-30812-4-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 226c7e1..fa9f8a0 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -134,7 +134,7 @@ static void callchain_list__sym_name(struct callchain_list *cl,
 }
 
 static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
-				    GtkTreeIter *parent, int col)
+				    GtkTreeIter *parent, int col, u64 total)
 {
 	struct rb_node *nd;
 	bool has_single_node = (rb_first(root) == rb_last(root));
@@ -144,9 +144,14 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
 		struct callchain_list *chain;
 		GtkTreeIter iter, new_parent;
 		bool need_new_parent;
+		double percent;
+		u64 hits, child_total;
 
 		node = rb_entry(nd, struct callchain_node, rb_node);
 
+		hits = callchain_cumul_hits(node);
+		percent = 100.0 * hits / total;
+
 		new_parent = *parent;
 		need_new_parent = !has_single_node && (node->val_nr > 1);
 
@@ -155,6 +160,9 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
 
 			gtk_tree_store_append(store, &iter, &new_parent);
 
+			scnprintf(buf, sizeof(buf), "%5.2f%%", percent);
+			gtk_tree_store_set(store, &iter, 0, buf, -1);
+
 			callchain_list__sym_name(chain, buf, sizeof(buf));
 			gtk_tree_store_set(store, &iter, col, buf, -1);
 
@@ -168,8 +176,14 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
 			}
 		}
 
+		if (callchain_param.mode == CHAIN_GRAPH_REL)
+			child_total = node->children_hit;
+		else
+			child_total = total;
+
 		/* Now 'iter' contains info of the last callchain_list */
-		perf_gtk__add_callchain(&node->rb_root, store, &iter, col);
+		perf_gtk__add_callchain(&node->rb_root, store, &iter, col,
+					child_total);
 	}
 }
 
@@ -283,8 +297,15 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 		}
 
 		if (symbol_conf.use_callchain && sort__has_sym) {
+			u64 total;
+
+			if (callchain_param.mode == CHAIN_GRAPH_REL)
+				total = h->stat.period;
+			else
+				total = hists->stats.total_period;
+
 			perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
-						sym_col);
+						sym_col, total);
 		}
 	}
 
-- 
cgit v0.10.2


From 1a309426b4fae258dbd86ac0c575a849bf163b7b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 18:22:15 +0900
Subject: perf gtk/hists: Make column headers resizable

Sometimes it's annoying to see when some symbols have very wierd long
names.  So it might be a good idea to make column size changable.

Reviewed-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370337737-30812-5-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index fa9f8a0..b4a0dd2 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -250,11 +250,16 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 							    col_idx++, NULL);
 	}
 
-	if (symbol_conf.use_callchain && sort__has_sym) {
+	for (col_idx = 0; col_idx < nr_cols; col_idx++) {
 		GtkTreeViewColumn *column;
 
-		column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), sym_col);
-		gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view), column);
+		column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+		gtk_tree_view_column_set_resizable(column, TRUE);
+
+		if (col_idx == sym_col) {
+			gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+							  column);
+		}
 	}
 
 	gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
-- 
cgit v0.10.2


From 450f390ad2538c5e35d830fa5c624708a77dce0a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 18:22:16 +0900
Subject: perf gtk/hists: Add a double-click handler for callchains

If callchain is displayed, add "row-activated" signal handler for
handling double-click or pressing ENTER key action.

Reviewed-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370337737-30812-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index b4a0dd2..3a5d013 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -187,6 +187,18 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
 	}
 }
 
+static void on_row_activated(GtkTreeView *view, GtkTreePath *path,
+			     GtkTreeViewColumn *col __maybe_unused,
+			     gpointer user_data __maybe_unused)
+{
+	bool expanded = gtk_tree_view_row_expanded(view, path);
+
+	if (expanded)
+		gtk_tree_view_collapse_row(view, path);
+	else
+		gtk_tree_view_expand_row(view, path, FALSE);
+}
+
 static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 				 float min_pcnt)
 {
@@ -314,6 +326,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 		}
 	}
 
+	g_signal_connect(view, "row-activated",
+			 G_CALLBACK(on_row_activated), NULL);
 	gtk_container_add(GTK_CONTAINER(window), view);
 }
 
-- 
cgit v0.10.2


From 9d58d2f66c92fe47dd395947a3d51b9ace7dcc92 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 18:22:17 +0900
Subject: perf gtk/hists: Set rules hint for the hist browser

The 'rules' means that every second line of the tree view has a shaded
background, which makes it easier to see which cell belongs to which
row in the tree view.  It can be useful for a tree view that has a lot
of rows.

Reviewed-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1370337737-30812-7-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 3a5d013..3254903 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -326,6 +326,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 		}
 	}
 
+	gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
 	g_signal_connect(view, "row-activated",
 			 G_CALLBACK(on_row_activated), NULL);
 	gtk_container_add(GTK_CONTAINER(window), view);
-- 
cgit v0.10.2


From 63c2c9f8f24378ebf46d4a9d542863bb733ef05c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 4 Jul 2013 16:20:20 +0300
Subject: perf inject: Remove unused parameter

The 'machine' parameter is unused in 'perf_event__repipe_synth()' and
some callers pass NULL anyway.  So remove it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1372944040-32690-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 84ad6ab..f299ddf 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -38,8 +38,7 @@ struct event_entry {
 };
 
 static int perf_event__repipe_synth(struct perf_tool *tool,
-				    union perf_event *event,
-				    struct machine *machine __maybe_unused)
+				    union perf_event *event)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	uint32_t size;
@@ -65,20 +64,20 @@ static int perf_event__repipe_op2_synth(struct perf_tool *tool,
 					struct perf_session *session
 					__maybe_unused)
 {
-	return perf_event__repipe_synth(tool, event, NULL);
+	return perf_event__repipe_synth(tool, event);
 }
 
 static int perf_event__repipe_event_type_synth(struct perf_tool *tool,
 					       union perf_event *event)
 {
-	return perf_event__repipe_synth(tool, event, NULL);
+	return perf_event__repipe_synth(tool, event);
 }
 
 static int perf_event__repipe_tracing_data_synth(union perf_event *event,
 						 struct perf_session *session
 						 __maybe_unused)
 {
-	return perf_event__repipe_synth(NULL, event, NULL);
+	return perf_event__repipe_synth(NULL, event);
 }
 
 static int perf_event__repipe_attr(union perf_event *event,
@@ -89,15 +88,15 @@ static int perf_event__repipe_attr(union perf_event *event,
 	if (ret)
 		return ret;
 
-	return perf_event__repipe_synth(NULL, event, NULL);
+	return perf_event__repipe_synth(NULL, event);
 }
 
 static int perf_event__repipe(struct perf_tool *tool,
 			      union perf_event *event,
 			      struct perf_sample *sample __maybe_unused,
-			      struct machine *machine)
+			      struct machine *machine __maybe_unused)
 {
-	return perf_event__repipe_synth(tool, event, machine);
+	return perf_event__repipe_synth(tool, event);
 }
 
 typedef int (*inject_handler)(struct perf_tool *tool,
@@ -119,7 +118,7 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
 
 	build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 
-	return perf_event__repipe_synth(tool, event, machine);
+	return perf_event__repipe_synth(tool, event);
 }
 
 static int perf_event__repipe_mmap(struct perf_tool *tool,
@@ -153,7 +152,7 @@ static int perf_event__repipe_tracing_data(union perf_event *event,
 {
 	int err;
 
-	perf_event__repipe_synth(NULL, event, NULL);
+	perf_event__repipe_synth(NULL, event);
 	err = perf_event__process_tracing_data(event, session);
 
 	return err;
-- 
cgit v0.10.2


From 47c3d1091cb68e727b840efd6fa3709d5b1ddfc2 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 4 Jul 2013 16:20:21 +0300
Subject: perf tools: Fix missing tool parameter

The 'inject' command expects to get a reference to 'struct perf_inject'
from its 'tool' member.  For that to work, 'tool' needs to be a
parameter of all tool callbacks.  Make it so.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1372944040-32690-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index f299ddf..c943513 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -73,22 +73,17 @@ static int perf_event__repipe_event_type_synth(struct perf_tool *tool,
 	return perf_event__repipe_synth(tool, event);
 }
 
-static int perf_event__repipe_tracing_data_synth(union perf_event *event,
-						 struct perf_session *session
-						 __maybe_unused)
-{
-	return perf_event__repipe_synth(NULL, event);
-}
-
-static int perf_event__repipe_attr(union perf_event *event,
-				   struct perf_evlist **pevlist __maybe_unused)
+static int perf_event__repipe_attr(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_evlist **pevlist)
 {
 	int ret;
-	ret = perf_event__process_attr(event, pevlist);
+
+	ret = perf_event__process_attr(tool, event, pevlist);
 	if (ret)
 		return ret;
 
-	return perf_event__repipe_synth(NULL, event);
+	return perf_event__repipe_synth(tool, event);
 }
 
 static int perf_event__repipe(struct perf_tool *tool,
@@ -147,13 +142,14 @@ static int perf_event__repipe_fork(struct perf_tool *tool,
 	return err;
 }
 
-static int perf_event__repipe_tracing_data(union perf_event *event,
+static int perf_event__repipe_tracing_data(struct perf_tool *tool,
+					   union perf_event *event,
 					   struct perf_session *session)
 {
 	int err;
 
-	perf_event__repipe_synth(NULL, event);
-	err = perf_event__process_tracing_data(event, session);
+	perf_event__repipe_synth(tool, event);
+	err = perf_event__process_tracing_data(tool, event, session);
 
 	return err;
 }
@@ -407,7 +403,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 			.unthrottle	= perf_event__repipe,
 			.attr		= perf_event__repipe_attr,
 			.event_type	= perf_event__repipe_event_type_synth,
-			.tracing_data	= perf_event__repipe_tracing_data_synth,
+			.tracing_data	= perf_event__repipe_op2_synth,
 			.build_id	= perf_event__repipe_op2_synth,
 		},
 		.input_name  = "-",
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a4dafbe..d12d79c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2922,7 +2922,8 @@ int perf_event__synthesize_attrs(struct perf_tool *tool,
 	return err;
 }
 
-int perf_event__process_attr(union perf_event *event,
+int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+			     union perf_event *event,
 			     struct perf_evlist **pevlist)
 {
 	u32 i, ids, n_ids;
@@ -3065,7 +3066,8 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
 	return aligned_size;
 }
 
-int perf_event__process_tracing_data(union perf_event *event,
+int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
 				     struct perf_session *session)
 {
 	ssize_t size_read, padding, size = event->tracing_data.size;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 16a3e83..2d1ca7d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -130,7 +130,8 @@ int perf_event__synthesize_attr(struct perf_tool *tool,
 int perf_event__synthesize_attrs(struct perf_tool *tool,
 				 struct perf_session *session,
 				 perf_event__handler_t process);
-int perf_event__process_attr(union perf_event *event, struct perf_evlist **pevlist);
+int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
+			     struct perf_evlist **pevlist);
 
 int perf_event__synthesize_event_type(struct perf_tool *tool,
 				      u64 event_id, char *name,
@@ -145,7 +146,8 @@ int perf_event__process_event_type(struct perf_tool *tool,
 int perf_event__synthesize_tracing_data(struct perf_tool *tool,
 					int fd, struct perf_evlist *evlist,
 					perf_event__handler_t process);
-int perf_event__process_tracing_data(union perf_event *event,
+int perf_event__process_tracing_data(struct perf_tool *tool,
+				     union perf_event *event,
 				     struct perf_session *session);
 
 int perf_event__synthesize_build_id(struct perf_tool *tool,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ad47fb9..6b71b88 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -193,7 +193,9 @@ void perf_session__delete(struct perf_session *self)
 	vdso__exit();
 }
 
-static int process_event_synth_tracing_data_stub(union perf_event *event
+static int process_event_synth_tracing_data_stub(struct perf_tool *tool
+						 __maybe_unused,
+						 union perf_event *event
 						 __maybe_unused,
 						 struct perf_session *session
 						__maybe_unused)
@@ -202,7 +204,8 @@ static int process_event_synth_tracing_data_stub(union perf_event *event
 	return 0;
 }
 
-static int process_event_synth_attr_stub(union perf_event *event __maybe_unused,
+static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
+					 union perf_event *event __maybe_unused,
 					 struct perf_evlist **pevlist
 					 __maybe_unused)
 {
@@ -921,7 +924,7 @@ static int perf_session__process_user_event(struct perf_session *session, union
 	/* These events are processed right away */
 	switch (event->header.type) {
 	case PERF_RECORD_HEADER_ATTR:
-		err = tool->attr(event, &session->evlist);
+		err = tool->attr(tool, event, &session->evlist);
 		if (err == 0)
 			perf_session__set_id_hdr_size(session);
 		return err;
@@ -930,7 +933,7 @@ static int perf_session__process_user_event(struct perf_session *session, union
 	case PERF_RECORD_HEADER_TRACING_DATA:
 		/* setup for reading amidst mmap */
 		lseek(session->fd, file_offset, SEEK_SET);
-		return tool->tracing_data(event, session);
+		return tool->tracing_data(tool, event, session);
 	case PERF_RECORD_HEADER_BUILD_ID:
 		return tool->build_id(tool, event, session);
 	case PERF_RECORD_FINISHED_ROUND:
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index b0e1aad..88f8cbd 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -18,12 +18,11 @@ typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
 typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
 			struct perf_sample *sample, struct machine *machine);
 
-typedef int (*event_attr_op)(union perf_event *event,
+typedef int (*event_attr_op)(struct perf_tool *tool,
+			     union perf_event *event,
 			     struct perf_evlist **pevlist);
-typedef int (*event_simple_op)(struct perf_tool *tool, union perf_event *event);
 
-typedef int (*event_synth_op)(union perf_event *event,
-			      struct perf_session *session);
+typedef int (*event_simple_op)(struct perf_tool *tool, union perf_event *event);
 
 typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
 			 struct perf_session *session);
@@ -39,7 +38,7 @@ struct perf_tool {
 			throttle,
 			unthrottle;
 	event_attr_op	attr;
-	event_synth_op	tracing_data;
+	event_op2	tracing_data;
 	event_simple_op	event_type;
 	event_op2	finished_round,
 			build_id;
-- 
cgit v0.10.2


From a609bda78203a21fb0e78b9d5b4ab911678e4ebb Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 4 Jul 2013 16:20:22 +0300
Subject: perf inject: Add missing 'finished_round'

By default, perf inject should "repipe" all events including
'finished_round'.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1372944040-32690-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index c943513..ad1296c 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -404,6 +404,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 			.attr		= perf_event__repipe_attr,
 			.event_type	= perf_event__repipe_event_type_synth,
 			.tracing_data	= perf_event__repipe_op2_synth,
+			.finished_round	= perf_event__repipe_op2_synth,
 			.build_id	= perf_event__repipe_op2_synth,
 		},
 		.input_name  = "-",
-- 
cgit v0.10.2


From b6b96fb48f75d62858b39baf13c83c4504642f15 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 4 Jul 2013 16:20:25 +0300
Subject: perf tools: Add const specifier to perf_pmu__find name parameter

The name parameter is constant, declare it so.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1372944040-32690-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 4c6f9c4..1d1862d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -73,7 +73,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head)
  * located at:
  * /sys/bus/event_source/devices/<dev>/format as sysfs group attributes.
  */
-static int pmu_format(char *name, struct list_head *format)
+static int pmu_format(const char *name, struct list_head *format)
 {
 	struct stat st;
 	char path[PATH_MAX];
@@ -162,7 +162,7 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
  * Reading the pmu event aliases definition, which should be located at:
  * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
  */
-static int pmu_aliases(char *name, struct list_head *head)
+static int pmu_aliases(const char *name, struct list_head *head)
 {
 	struct stat st;
 	char path[PATH_MAX];
@@ -208,7 +208,7 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
  * located at:
  * /sys/bus/event_source/devices/<dev>/type as sysfs attribute.
  */
-static int pmu_type(char *name, __u32 *type)
+static int pmu_type(const char *name, __u32 *type)
 {
 	struct stat st;
 	char path[PATH_MAX];
@@ -266,7 +266,7 @@ static void pmu_read_sysfs(void)
 	closedir(dir);
 }
 
-static struct cpu_map *pmu_cpumask(char *name)
+static struct cpu_map *pmu_cpumask(const char *name)
 {
 	struct stat st;
 	char path[PATH_MAX];
@@ -293,7 +293,7 @@ static struct cpu_map *pmu_cpumask(char *name)
 	return cpus;
 }
 
-static struct perf_pmu *pmu_lookup(char *name)
+static struct perf_pmu *pmu_lookup(const char *name)
 {
 	struct perf_pmu *pmu;
 	LIST_HEAD(format);
@@ -330,7 +330,7 @@ static struct perf_pmu *pmu_lookup(char *name)
 	return pmu;
 }
 
-static struct perf_pmu *pmu_find(char *name)
+static struct perf_pmu *pmu_find(const char *name)
 {
 	struct perf_pmu *pmu;
 
@@ -356,7 +356,7 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
 	return NULL;
 }
 
-struct perf_pmu *perf_pmu__find(char *name)
+struct perf_pmu *perf_pmu__find(const char *name)
 {
 	struct perf_pmu *pmu;
 
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 32fe55b..d17b565 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -21,7 +21,7 @@ struct perf_pmu {
 	struct list_head list;
 };
 
-struct perf_pmu *perf_pmu__find(char *name);
+struct perf_pmu *perf_pmu__find(const char *name);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 		     struct list_head *head_terms);
 int perf_pmu__config_terms(struct list_head *formats,
-- 
cgit v0.10.2


From 93edcbd91d888c7530c7fc749176fc935b8e2287 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 4 Jul 2013 16:20:26 +0300
Subject: perf evlist: Tidy duplicated munmap code

The same lines of code are used in three places.  Make it a new function
'__perf_evlist__munmap()'.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1372944040-32690-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d8f34e0..42ea4e9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -403,16 +403,20 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 	return event;
 }
 
+static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
+{
+	if (evlist->mmap[idx].base != NULL) {
+		munmap(evlist->mmap[idx].base, evlist->mmap_len);
+		evlist->mmap[idx].base = NULL;
+	}
+}
+
 void perf_evlist__munmap(struct perf_evlist *evlist)
 {
 	int i;
 
-	for (i = 0; i < evlist->nr_mmaps; i++) {
-		if (evlist->mmap[i].base != NULL) {
-			munmap(evlist->mmap[i].base, evlist->mmap_len);
-			evlist->mmap[i].base = NULL;
-		}
-	}
+	for (i = 0; i < evlist->nr_mmaps; i++)
+		__perf_evlist__munmap(evlist, i);
 
 	free(evlist->mmap);
 	evlist->mmap = NULL;
@@ -477,12 +481,8 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m
 	return 0;
 
 out_unmap:
-	for (cpu = 0; cpu < nr_cpus; cpu++) {
-		if (evlist->mmap[cpu].base != NULL) {
-			munmap(evlist->mmap[cpu].base, evlist->mmap_len);
-			evlist->mmap[cpu].base = NULL;
-		}
-	}
+	for (cpu = 0; cpu < nr_cpus; cpu++)
+		__perf_evlist__munmap(evlist, cpu);
 	return -1;
 }
 
@@ -517,12 +517,8 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in
 	return 0;
 
 out_unmap:
-	for (thread = 0; thread < nr_threads; thread++) {
-		if (evlist->mmap[thread].base != NULL) {
-			munmap(evlist->mmap[thread].base, evlist->mmap_len);
-			evlist->mmap[thread].base = NULL;
-		}
-	}
+	for (thread = 0; thread < nr_threads; thread++)
+		__perf_evlist__munmap(evlist, thread);
 	return -1;
 }
 
-- 
cgit v0.10.2


From 27389d7823f573be8eaff32fb4abe564e181eb71 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 4 Jul 2013 16:20:27 +0300
Subject: perf tools: Validate perf event header size

The 'size' variable includes the header so must be at least
'sizeof(struct perf_event_header)'.  Error out immediately if that is
not the case.  Also don't byte-swap the header until it is actually
"fetched" from the mmap region.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1372944040-32690-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6b71b88..951a1cf 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1094,8 +1094,10 @@ more:
 		perf_event_header__bswap(&event->header);
 
 	size = event->header.size;
-	if (size == 0)
-		size = 8;
+	if (size < sizeof(struct perf_event_header)) {
+		pr_err("bad event header size\n");
+		goto out_err;
+	}
 
 	if (size > cur_size) {
 		void *new = realloc(buf, size);
@@ -1164,8 +1166,12 @@ fetch_mmaped_event(struct perf_session *session,
 	if (session->header.needs_swap)
 		perf_event_header__bswap(&event->header);
 
-	if (head + event->header.size > mmap_size)
+	if (head + event->header.size > mmap_size) {
+		/* We're not fetching the event so swap back again */
+		if (session->header.needs_swap)
+			perf_event_header__bswap(&event->header);
 		return NULL;
+	}
 
 	return event;
 }
@@ -1245,7 +1251,7 @@ more:
 
 	size = event->header.size;
 
-	if (size == 0 ||
+	if (size < sizeof(struct perf_event_header) ||
 	    perf_session__process_event(session, event, tool, file_pos) < 0) {
 		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
 		       file_offset + head, event->header.size,
-- 
cgit v0.10.2


From 380512345e13c3af64e59627f1b993c4faa94a84 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 4 Jul 2013 16:20:31 +0300
Subject: perf tools: struct thread has a tid not a pid

As evident from 'machine__process_fork_event()' and
'machine__process_exit_event()' the 'pid' member of struct thread is
actually the tid.

Rename 'pid' to 'tid' in struct thread accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1372944040-32690-13-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0259502..b49f5c5 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -313,7 +313,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->tid);
 
 	if (evsel->handler.func != NULL) {
 		tracepoint_handler f = evsel->handler.func;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index fba4a94..948183a 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1075,7 +1075,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 	if (!atoms) {
 		if (thread_atoms_insert(sched, migrant))
 			return -1;
-		register_pid(sched, migrant->pid, migrant->comm);
+		register_pid(sched, migrant->tid, migrant->comm);
 		atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
 		if (!atoms) {
 			pr_err("migration-event: Internal tree error");
@@ -1115,7 +1115,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
 	sched->all_runtime += work_list->total_runtime;
 	sched->all_count   += work_list->nb_atoms;
 
-	ret = printf("  %s:%d ", work_list->thread->comm, work_list->thread->pid);
+	ret = printf("  %s:%d ", work_list->thread->comm, work_list->thread->tid);
 
 	for (i = 0; i < 24 - ret; i++)
 		printf(" ");
@@ -1131,9 +1131,9 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
 
 static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
 {
-	if (l->thread->pid < r->thread->pid)
+	if (l->thread->tid < r->thread->tid)
 		return -1;
-	if (l->thread->pid > r->thread->pid)
+	if (l->thread->tid > r->thread->tid)
 		return 1;
 
 	return 0;
@@ -1321,7 +1321,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 			printf("*");
 
 		if (sched->curr_thread[cpu]) {
-			if (sched->curr_thread[cpu]->pid)
+			if (sched->curr_thread[cpu]->tid)
 				printf("%2s ", sched->curr_thread[cpu]->shortname);
 			else
 				printf(".  ");
@@ -1332,7 +1332,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	printf("  %12.6f secs ", (double)timestamp/1e9);
 	if (new_shortname) {
 		printf("%s => %s:%d\n",
-			sched_in->shortname, sched_in->comm, sched_in->pid);
+			sched_in->shortname, sched_in->comm, sched_in->tid);
 	} else {
 		printf("\n");
 	}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 87fc7d0..0e4b67f 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -142,7 +142,7 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre
 	printed += fprintf_duration(duration, fp);
 
 	if (trace->multiple_threads)
-		printed += fprintf(fp, "%d ", thread->pid);
+		printed += fprintf(fp, "%d ", thread->tid);
 
 	return printed;
 }
@@ -593,7 +593,7 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 			color = PERF_COLOR_YELLOW;
 
 		printed += color_fprintf(fp, color, "%20s", thread->comm);
-		printed += fprintf(fp, " - %-5d :%11lu   [", thread->pid, ttrace->nr_events);
+		printed += fprintf(fp, " - %-5d :%11lu   [", thread->tid, ttrace->nr_events);
 		printed += color_fprintf(fp, color, "%5.1f%%", ratio);
 		printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
 	}
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index fc0bd38..06e892f 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1256,7 +1256,7 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size,
 		printed += scnprintf(bf + printed, size - printed,
 				    ", Thread: %s(%d)",
 				    (thread->comm_set ? thread->comm : ""),
-				    thread->pid);
+				    thread->tid);
 	if (dso)
 		printed += scnprintf(bf + printed, size - printed,
 				    ", DSO: %s", dso->short_name);
@@ -1579,7 +1579,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		    asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
 			     (browser->hists->thread_filter ? "out of" : "into"),
 			     (thread->comm_set ? thread->comm : ""),
-			     thread->pid) > 0)
+			     thread->tid) > 0)
 			zoom_thread = nr_options++;
 
 		if (dso != NULL &&
@@ -1702,7 +1702,7 @@ zoom_out_thread:
 			} else {
 				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
 						   thread->comm_set ? thread->comm : "",
-						   thread->pid);
+						   thread->tid);
 				browser->hists->thread_filter = thread;
 				sort_thread.elide = true;
 				pstack__push(fstack, &browser->hists->thread_filter);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 5cd13d7..9541270 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -686,7 +686,7 @@ int perf_event__preprocess_sample(const union perf_event *event,
 	    !strlist__has_entry(symbol_conf.comm_list, thread->comm))
 		goto out_filtered;
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->tid);
 	/*
 	 * Have we already created the kernel maps for this machine?
 	 *
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 93527af..5dd5026 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -233,7 +233,7 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
 	return;
 }
 
-static struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid,
+static struct thread *__machine__findnew_thread(struct machine *machine, pid_t tid,
 						bool create)
 {
 	struct rb_node **p = &machine->threads.rb_node;
@@ -241,23 +241,23 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t p
 	struct thread *th;
 
 	/*
-	 * Font-end cache - PID lookups come in blocks,
+	 * Front-end cache - TID lookups come in blocks,
 	 * so most of the time we dont have to look up
 	 * the full rbtree:
 	 */
-	if (machine->last_match && machine->last_match->pid == pid)
+	if (machine->last_match && machine->last_match->tid == tid)
 		return machine->last_match;
 
 	while (*p != NULL) {
 		parent = *p;
 		th = rb_entry(parent, struct thread, rb_node);
 
-		if (th->pid == pid) {
+		if (th->tid == tid) {
 			machine->last_match = th;
 			return th;
 		}
 
-		if (pid < th->pid)
+		if (tid < th->tid)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -266,7 +266,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t p
 	if (!create)
 		return NULL;
 
-	th = thread__new(pid);
+	th = thread__new(tid);
 	if (th != NULL) {
 		rb_link_node(&th->rb_node, parent, p);
 		rb_insert_color(&th->rb_node, &machine->threads);
@@ -276,14 +276,14 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t p
 	return th;
 }
 
-struct thread *machine__findnew_thread(struct machine *machine, pid_t pid)
+struct thread *machine__findnew_thread(struct machine *machine, pid_t tid)
 {
-	return __machine__findnew_thread(machine, pid, true);
+	return __machine__findnew_thread(machine, tid, true);
 }
 
-struct thread *machine__find_thread(struct machine *machine, pid_t pid)
+struct thread *machine__find_thread(struct machine *machine, pid_t tid)
 {
-	return __machine__findnew_thread(machine, pid, false);
+	return __machine__findnew_thread(machine, tid, false);
 }
 
 int machine__process_comm_event(struct machine *machine, union perf_event *event)
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 7794068..e49ba01 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -36,7 +36,7 @@ struct map *machine__kernel_map(struct machine *machine, enum map_type type)
 	return machine->vmlinux_maps[type];
 }
 
-struct thread *machine__find_thread(struct machine *machine, pid_t pid);
+struct thread *machine__find_thread(struct machine *machine, pid_t tid);
 
 int machine__process_comm_event(struct machine *machine, union perf_event *event);
 int machine__process_exit_event(struct machine *machine, union perf_event *event);
@@ -99,7 +99,7 @@ static inline bool machine__is_host(struct machine *machine)
 	return machine ? machine->pid == HOST_KERNEL_ID : false;
 }
 
-struct thread *machine__findnew_thread(struct machine *machine, pid_t pid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t tid);
 
 size_t machine__fprintf(struct machine *machine, FILE *fp);
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 313a5a7..8deee19 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -55,14 +55,14 @@ static int64_t cmp_null(void *l, void *r)
 static int64_t
 sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return right->thread->pid - left->thread->pid;
+	return right->thread->tid - left->thread->tid;
 }
 
 static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
 				       size_t size, unsigned int width)
 {
 	return repsep_snprintf(bf, size, "%*s:%5d", width - 6,
-			      self->thread->comm ?: "", self->thread->pid);
+			      self->thread->comm ?: "", self->thread->tid);
 }
 
 struct sort_entry sort_thread = {
@@ -77,7 +77,7 @@ struct sort_entry sort_thread = {
 static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	return right->thread->pid - left->thread->pid;
+	return right->thread->tid - left->thread->tid;
 }
 
 static int64_t
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 40399cb..6feeb88 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,17 +7,17 @@
 #include "util.h"
 #include "debug.h"
 
-struct thread *thread__new(pid_t pid)
+struct thread *thread__new(pid_t tid)
 {
 	struct thread *self = zalloc(sizeof(*self));
 
 	if (self != NULL) {
 		map_groups__init(&self->mg);
-		self->pid = pid;
+		self->tid = tid;
 		self->ppid = -1;
 		self->comm = malloc(32);
 		if (self->comm)
-			snprintf(self->comm, 32, ":%d", self->pid);
+			snprintf(self->comm, 32, ":%d", self->tid);
 	}
 
 	return self;
@@ -57,7 +57,7 @@ int thread__comm_len(struct thread *self)
 
 size_t thread__fprintf(struct thread *thread, FILE *fp)
 {
-	return fprintf(fp, "Thread %d %s\n", thread->pid, thread->comm) +
+	return fprintf(fp, "Thread %d %s\n", thread->tid, thread->comm) +
 	       map_groups__fprintf(&thread->mg, verbose, fp);
 }
 
@@ -84,7 +84,7 @@ int thread__fork(struct thread *self, struct thread *parent)
 		if (map_groups__clone(&self->mg, &parent->mg, i) < 0)
 			return -ENOMEM;
 
-	self->ppid = parent->pid;
+	self->ppid = parent->tid;
 
 	return 0;
 }
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 5e7ba35..0fe1f9c 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -12,7 +12,7 @@ struct thread {
 		struct list_head node;
 	};
 	struct map_groups	mg;
-	pid_t			pid;
+	pid_t			tid;
 	pid_t			ppid;
 	char			shortname[3];
 	bool			comm_set;
@@ -24,7 +24,7 @@ struct thread {
 
 struct machine;
 
-struct thread *thread__new(pid_t pid);
+struct thread *thread__new(pid_t tid);
 void thread__delete(struct thread *self);
 
 int thread__set_comm(struct thread *self, const char *comm);
-- 
cgit v0.10.2


From 50e200f07948400694238e08e7add73df5ba8f83 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 20 Apr 2013 11:02:28 -0700
Subject: perf tools: Default to cpu// for events v5

When an event fails to parse and it's not in a new style format,
try to parse it again as a cpu event.

This allows to use sysfs exported events directly without //, so you can use

perf record -e mem-loads ...

instead of

perf record -e cpu/mem-loads/

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1366480949-32292-1-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h
index 6f19c54..97a8007 100644
--- a/tools/perf/util/include/linux/string.h
+++ b/tools/perf/util/include/linux/string.h
@@ -1,3 +1,4 @@
 #include <string.h>
 
 void *memdup(const void *src, size_t len);
+int str_append(char **s, int *len, const char *a);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e853769..a5076f4 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -6,7 +6,7 @@
 #include "parse-options.h"
 #include "parse-events.h"
 #include "exec_cmd.h"
-#include "string.h"
+#include "linux/string.h"
 #include "symbol.h"
 #include "cache.h"
 #include "header.h"
@@ -823,6 +823,32 @@ int parse_events_name(struct list_head *list, char *name)
 	return 0;
 }
 
+static int parse_events__scanner(const char *str, void *data, int start_token);
+
+static int parse_events_fixup(int ret, const char *str, void *data,
+			      int start_token)
+{
+	char *o = strdup(str);
+	char *s = NULL;
+	char *t = o;
+	char *p;
+	int len = 0;
+
+	if (!o)
+		return ret;
+	while ((p = strsep(&t, ",")) != NULL) {
+		if (s)
+			str_append(&s, &len, ",");
+		str_append(&s, &len, "cpu/");
+		str_append(&s, &len, p);
+		str_append(&s, &len, "/");
+	}
+	free(o);
+	if (!s)
+		return -ENOMEM;
+	return parse_events__scanner(s, data, start_token);
+}
+
 static int parse_events__scanner(const char *str, void *data, int start_token)
 {
 	YY_BUFFER_STATE buffer;
@@ -843,6 +869,8 @@ static int parse_events__scanner(const char *str, void *data, int start_token)
 	parse_events__flush_buffer(buffer, scanner);
 	parse_events__delete_buffer(buffer, scanner);
 	parse_events_lex_destroy(scanner);
+	if (ret && !strchr(str, '/'))
+		ret = parse_events_fixup(ret, str, data, start_token);
 	return ret;
 }
 
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 29c7b2c..f0b0c00 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -387,3 +387,27 @@ void *memdup(const void *src, size_t len)
 
 	return p;
 }
+
+/**
+ * str_append - reallocate string and append another
+ * @s: pointer to string pointer
+ * @len: pointer to len (initialized)
+ * @a: string to append.
+ */
+int str_append(char **s, int *len, const char *a)
+{
+	int olen = *s ? strlen(*s) : 0;
+	int nlen = olen + strlen(a) + 1;
+	if (*len < nlen) {
+		*len = *len * 2;
+		if (*len < nlen)
+			*len = nlen;
+		*s = realloc(*s, *len);
+		if (!*s)
+			return -ENOMEM;
+		if (olen == 0)
+			**s = 0;
+	}
+	strcat(*s, a);
+	return 0;
+}
-- 
cgit v0.10.2


From dc098b35b56f83ae088e4291a4e389a6ff126965 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 20 Apr 2013 11:02:29 -0700
Subject: perf list: List kernel supplied event aliases

List the kernel supplied pmu event aliases in perf list

It's better when the users can actually see them.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1366480949-32292-2-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index d1e39dc..826f3d6 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
 SYNOPSIS
 --------
 [verse]
-'perf list' [hw|sw|cache|tracepoint|event_glob]
+'perf list' [hw|sw|cache|tracepoint|pmu|event_glob]
 
 DESCRIPTION
 -----------
@@ -104,6 +104,8 @@ To limit the list use:
   'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
   block, etc.
 
+. 'pmu' to print the kernel supplied PMU events.
+
 . If none of the above is matched, it will apply the supplied glob to all
   events, printing the ones that match.
 
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 1948ece..e79f423 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -13,6 +13,7 @@
 
 #include "util/parse-events.h"
 #include "util/cache.h"
+#include "util/pmu.h"
 
 int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
 {
@@ -37,6 +38,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
 			else if (strcmp(argv[i], "cache") == 0 ||
 				 strcmp(argv[i], "hwcache") == 0)
 				print_hwcache_events(NULL, false);
+			else if (strcmp(argv[i], "pmu") == 0)
+				print_pmu_events(NULL, false);
 			else if (strcmp(argv[i], "--raw-dump") == 0)
 				print_events(NULL, true);
 			else {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a5076f4..2c460ed 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1110,6 +1110,8 @@ int print_hwcache_events(const char *event_glob, bool name_only)
 		}
 	}
 
+	if (printed)
+		printf("\n");
 	return printed;
 }
 
@@ -1164,11 +1166,12 @@ void print_events(const char *event_glob, bool name_only)
 
 	print_hwcache_events(event_glob, name_only);
 
+	print_pmu_events(event_glob, name_only);
+
 	if (event_glob != NULL)
 		return;
 
 	if (!name_only) {
-		printf("\n");
 		printf("  %-50s [%s]\n",
 		       "rNNN",
 		       event_type_descriptors[PERF_TYPE_RAW]);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 1d1862d..bc9d806 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -564,3 +564,76 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
 	for (b = from; b <= to; b++)
 		set_bit(b, bits);
 }
+
+static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
+			  struct perf_pmu_alias *alias)
+{
+	snprintf(buf, len, "%s/%s/", pmu->name, alias->name);
+	return buf;
+}
+
+static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
+			     struct perf_pmu_alias *alias)
+{
+	snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name);
+	return buf;
+}
+
+static int cmp_string(const void *a, const void *b)
+{
+	const char * const *as = a;
+	const char * const *bs = b;
+	return strcmp(*as, *bs);
+}
+
+void print_pmu_events(const char *event_glob, bool name_only)
+{
+	struct perf_pmu *pmu;
+	struct perf_pmu_alias *alias;
+	char buf[1024];
+	int printed = 0;
+	int len, j;
+	char **aliases;
+
+	pmu = NULL;
+	len = 0;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		list_for_each_entry(alias, &pmu->aliases, list)
+			len++;
+	aliases = malloc(sizeof(char *) * len);
+	if (!aliases)
+		return;
+	pmu = NULL;
+	j = 0;
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		list_for_each_entry(alias, &pmu->aliases, list) {
+			char *name = format_alias(buf, sizeof(buf), pmu, alias);
+			bool is_cpu = !strcmp(pmu->name, "cpu");
+
+			if (event_glob != NULL &&
+			    !(strglobmatch(name, event_glob) ||
+			      (!is_cpu && strglobmatch(alias->name,
+						       event_glob))))
+				continue;
+			aliases[j] = name;
+			if (is_cpu && !name_only)
+				aliases[j] = format_alias_or(buf, sizeof(buf),
+							      pmu, alias);
+			aliases[j] = strdup(aliases[j]);
+			j++;
+		}
+	len = j;
+	qsort(aliases, len, sizeof(char *), cmp_string);
+	for (j = 0; j < len; j++) {
+		if (name_only) {
+			printf("%s ", aliases[j]);
+			continue;
+		}
+		printf("  %-50s [Kernel PMU event]\n", aliases[j]);
+		free(aliases[j]);
+		printed++;
+	}
+	if (printed)
+		printf("\n");
+	free(aliases);
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index d17b565..6b2cbe2 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -3,6 +3,7 @@
 
 #include <linux/bitops.h>
 #include <linux/perf_event.h>
+#include <stdbool.h>
 
 enum {
 	PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -40,5 +41,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
 
 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
 
+void print_pmu_events(const char *event_glob, bool name_only);
+
 int perf_pmu__test(void);
 #endif /* __PMU_H */
-- 
cgit v0.10.2


From b21484f1a1f300d422cfe5d4f8f50015e22cea24 Mon Sep 17 00:00:00 2001
From: Greg Price <price@MIT.EDU>
Date: Thu, 6 Dec 2012 21:48:05 -0800
Subject: perf report/top: Add option to collapse undesired parts of call graph

For example, in an application with an expensive function implemented
with deeply nested recursive calls, the default call-graph presentation
is dominated by the different callchains within that function.  By
ignoring these callees, we can collect the callchains leading into the
function and compactly identify what to blame for expensive calls.

For example, in this report the callers of garbage_collect() are
scattered across the tree:

  $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
      22.03%     ruby  [.] gc_mark
                 --- gc_mark
                    |--59.40%-- mark_keyvalue
                    |          st_foreach
                    |          gc_mark_children
                    |          |--99.75%-- rb_gc_mark
                    |          |          rb_vm_mark
                    |          |          gc_mark_children
                    |          |          gc_marks
                    |          |          |--99.00%-- garbage_collect

If we ignore the callees of garbage_collect(), its callers are coalesced:

  $ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
      72.92%     ruby  [.] garbage_collect
                 --- garbage_collect
                     vm_xmalloc
                    |--47.08%-- ruby_xmalloc
                    |          st_insert2
                    |          rb_hash_aset
                    |          |--98.45%-- features_index_add
                    |          |          rb_provide_feature
                    |          |          rb_require_safe
                    |          |          vm_call_method

Signed-off-by: Greg Price <price@mit.edu>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu
Link: http://lkml.kernel.org/r/20130708115746.GO22203@biohazard-cafe.mit.edu
Cc: Fengguang Wu <fengguang.wu@intel.com>
[ remove spaces at beginning of line, reported by Fengguang Wu ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 66dab74..747ff50 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -135,6 +135,11 @@ OPTIONS
 --inverted::
         alias for inverted caller based call graph.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --pretty=<key>::
         Pretty printing style.  key: normal, raw
 
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 7fdd190..58d6598 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -155,6 +155,11 @@ Default is to monitor all CPUS.
 
 	Default: fractal,0.5,callee.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --percent-limit::
 	Do not show entries which have an overhead under that percent.
 	(Default: 0).
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ee2ca3e..9a7e54d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -89,7 +89,7 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain) &&
 	    sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -180,7 +180,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
 	if ((sort__has_parent || symbol_conf.use_callchain)
 	    && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -254,7 +254,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
 		err = machine__resolve_callchain(machine, evsel, al->thread,
-						 sample, &parent);
+						 sample, &parent, al);
 		if (err)
 			return err;
 	}
@@ -681,6 +681,24 @@ setup:
 	return 0;
 }
 
+int
+report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
+				const char *arg, int unset __maybe_unused)
+{
+	if (arg) {
+		int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED);
+		if (err) {
+			char buf[BUFSIZ];
+			regerror(err, &ignore_callees_regex, buf, sizeof(buf));
+			pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf);
+			return -1;
+		}
+		have_ignore_callees = 1;
+	}
+
+	return 0;
+}
+
 static int
 parse_branch_mode(const struct option *opt __maybe_unused,
 		  const char *str __maybe_unused, int unset)
@@ -771,6 +789,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
 	OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a237059..bbf4635 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -773,8 +773,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		    sample->callchain) {
 			err = machine__resolve_callchain(machine, evsel,
 							 al.thread, sample,
-							 &parent);
-
+							 &parent, &al);
 			if (err)
 				return;
 		}
@@ -1109,6 +1108,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
 			     "mode[,dump_size]", record_callchain_help,
 			     &parse_callchain_opt, "fp"),
+	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+		   "ignore callees of these functions in call graphs",
+		   report_parse_ignore_callees_opt),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
 	OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5dd5026..f9f9d63 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1058,11 +1058,10 @@ int machine__process_event(struct machine *machine, union perf_event *event)
 	return ret;
 }
 
-static bool symbol__match_parent_regex(struct symbol *sym)
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
-	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+	if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
 		return 1;
-
 	return 0;
 }
 
@@ -1159,8 +1158,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine,
 static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
-					     struct symbol **parent)
-
+					     struct symbol **parent,
+					     struct addr_location *root_al)
 {
 	u8 cpumode = PERF_RECORD_MISC_USER;
 	unsigned int i;
@@ -1211,8 +1210,15 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 					   MAP__FUNCTION, ip, &al, NULL);
 		if (al.sym != NULL) {
 			if (sort__has_parent && !*parent &&
-			    symbol__match_parent_regex(al.sym))
+			    symbol__match_regex(al.sym, &parent_regex))
 				*parent = al.sym;
+			else if (have_ignore_callees && root_al &&
+			  symbol__match_regex(al.sym, &ignore_callees_regex)) {
+				/* Treat this symbol as the root,
+				   forgetting its callees. */
+				*root_al = al;
+				callchain_cursor_reset(&callchain_cursor);
+			}
 			if (!symbol_conf.use_callchain)
 				break;
 		}
@@ -1237,13 +1243,13 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent)
-
+			       struct symbol **parent,
+			       struct addr_location *root_al)
 {
 	int ret;
 
 	ret = machine__resolve_callchain_sample(machine, thread,
-						sample->callchain, parent);
+						sample->callchain, parent, root_al);
 	if (ret)
 		return ret;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e49ba01..5bb6244 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -5,6 +5,7 @@
 #include <linux/rbtree.h>
 #include "map.h"
 
+struct addr_location;
 struct branch_stack;
 struct perf_evsel;
 struct perf_sample;
@@ -83,7 +84,8 @@ int machine__resolve_callchain(struct machine *machine,
 			       struct perf_evsel *evsel,
 			       struct thread *thread,
 			       struct perf_sample *sample,
-			       struct symbol **parent);
+			       struct symbol **parent,
+			       struct addr_location *root_al);
 
 /*
  * Default guest kernel is defined by parameter --guestkallsyms
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 951a1cf..1eb58ee 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1406,9 +1406,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 
 	if (symbol_conf.use_callchain && sample->callchain) {
 
-
 		if (machine__resolve_callchain(machine, evsel, al.thread,
-					       sample, NULL) != 0) {
+					       sample, NULL, NULL) != 0) {
 			if (verbose)
 				error("Failed to resolve callchain. Skipping\n");
 			return;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 8deee19..cb2b108 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -7,6 +7,8 @@ const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
 const char	default_sort_order[] = "comm,dso,symbol";
 const char	*sort_order = default_sort_order;
+regex_t		ignore_callees_regex;
+int		have_ignore_callees = 0;
 int		sort__need_collapse = 0;
 int		sort__has_parent = 0;
 int		sort__has_sym = 0;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 45ac84c..a4a6d0b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -29,6 +29,8 @@ extern const char *sort_order;
 extern const char default_parent_pattern[];
 extern const char *parent_pattern;
 extern const char default_sort_order[];
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
@@ -183,4 +185,6 @@ int setup_sorting(void);
 extern int sort_dimension__add(const char *);
 void sort__setup_elide(FILE *fp);
 
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
 #endif	/* __PERF_SORT_H */
-- 
cgit v0.10.2


From 2c5d4b4a087c448d7818b89294c98d4977dfe76c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 31 Jan 2013 23:31:11 +0100
Subject: perf tools: Add struct perf_hpp_fmt into hpp callbacks

Adding 'struct perf_hpp_fmt' into hpp callbacks, so commands can access
their private data.

It'll be handy for diff command in future to be able to access file
related data for each column.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-7vy2m18574b1bicoljn8e9lw@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 06e892f..2cb3916 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -685,8 +685,10 @@ static u64 __hpp_get_##_field(struct hist_entry *he)			\
 	return he->stat._field;						\
 }									\
 									\
-static int hist_browser__hpp_color_##_type(struct perf_hpp *hpp,	\
-					   struct hist_entry *he)	\
+static int								\
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\
+				struct perf_hpp *hpp,			\
+				struct hist_entry *he)			\
 {									\
 	return __hpp__color_fmt(hpp, he, __hpp_get_##_field, _cb);	\
 }
@@ -762,9 +764,9 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 			first = false;
 
 			if (fmt->color) {
-				width -= fmt->color(&hpp, entry);
+				width -= fmt->color(fmt, &hpp, entry);
 			} else {
-				width -= fmt->entry(&hpp, entry);
+				width -= fmt->entry(fmt, &hpp, entry);
 				slsmg_printf("%s", s);
 			}
 		}
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 3254903..cb2ed198 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -91,7 +91,8 @@ static u64 he_get_##_field(struct hist_entry *he)				\
 	return he->stat._field;							\
 }										\
 										\
-static int perf_gtk__hpp_color_##_type(struct perf_hpp *hpp,			\
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
+				       struct perf_hpp *hpp,			\
 				       struct hist_entry *he)			\
 {										\
 	return __hpp__color_fmt(hpp, he, he_get_##_field);			\
@@ -244,7 +245,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 	col_idx = 0;
 
 	perf_hpp__for_each_format(fmt) {
-		fmt->header(&hpp);
+		fmt->header(fmt, &hpp);
 
 		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
 							    -1, ltrim(s),
@@ -296,9 +297,9 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
 		perf_hpp__for_each_format(fmt) {
 			if (fmt->color)
-				fmt->color(&hpp, h);
+				fmt->color(fmt, &hpp, h);
 			else
-				fmt->entry(&hpp, h);
+				fmt->entry(fmt, &hpp, h);
 
 			gtk_tree_store_set(store, &iter, col_idx++, s, -1);
 		}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 4bf91b0..5440d56 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -1,4 +1,5 @@
 #include <math.h>
+#include <linux/compiler.h>
 
 #include "../util/hist.h"
 #include "../util/util.h"
@@ -79,7 +80,8 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
 }
 
 #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) 		\
-static int hpp__header_##_type(struct perf_hpp *hpp)			\
+static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
+			       struct perf_hpp *hpp)			\
 {									\
 	int len = _min_width;						\
 									\
@@ -92,7 +94,8 @@ static int hpp__header_##_type(struct perf_hpp *hpp)			\
 }
 
 #define __HPP_WIDTH_FN(_type, _min_width, _unit_width) 			\
-static int hpp__width_##_type(struct perf_hpp *hpp __maybe_unused)	\
+static int hpp__width_##_type(struct perf_hpp_fmt *fmt __maybe_unused,	\
+			      struct perf_hpp *hpp __maybe_unused)	\
 {									\
 	int len = _min_width;						\
 									\
@@ -110,14 +113,16 @@ static u64 he_get_##_field(struct hist_entry *he)				\
 	return he->stat._field;							\
 }										\
 										\
-static int hpp__color_##_type(struct perf_hpp *hpp, struct hist_entry *he) 	\
+static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,		\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
 {										\
 	return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%",			\
 			  (hpp_snprint_fn)percent_color_snprintf, true);	\
 }
 
 #define __HPP_ENTRY_PERCENT_FN(_type, _field)					\
-static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) 	\
+static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused,		\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
 {										\
 	const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%";		\
 	return __hpp__fmt(hpp, he, he_get_##_field, fmt,			\
@@ -130,7 +135,8 @@ static u64 he_get_raw_##_field(struct hist_entry *he)				\
 	return he->stat._field;							\
 }										\
 										\
-static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) 	\
+static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused,		\
+			      struct perf_hpp *hpp, struct hist_entry *he) 	\
 {										\
 	const char *fmt = symbol_conf.field_sep ? " %"PRIu64 : " %11"PRIu64;	\
 	return __hpp__fmt(hpp, he, he_get_raw_##_field, fmt, scnprintf, false);	\
@@ -158,12 +164,14 @@ HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
 HPP_RAW_FNS(period, "Period", period, 12, 12)
 
 
-static int hpp__header_baseline(struct perf_hpp *hpp)
+static int hpp__header_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
+				struct perf_hpp *hpp)
 {
 	return scnprintf(hpp->buf, hpp->size, "Baseline");
 }
 
-static int hpp__width_baseline(struct perf_hpp *hpp __maybe_unused)
+static int hpp__width_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
+			       struct perf_hpp *hpp __maybe_unused)
 {
 	return 8;
 }
@@ -184,7 +192,8 @@ static double baseline_percent(struct hist_entry *he)
 	return percent;
 }
 
-static int hpp__color_baseline(struct perf_hpp *hpp, struct hist_entry *he)
+static int hpp__color_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
+			       struct perf_hpp *hpp, struct hist_entry *he)
 {
 	double percent = baseline_percent(he);
 
@@ -194,7 +203,8 @@ static int hpp__color_baseline(struct perf_hpp *hpp, struct hist_entry *he)
 		return scnprintf(hpp->buf, hpp->size, "        ");
 }
 
-static int hpp__entry_baseline(struct perf_hpp *hpp, struct hist_entry *he)
+static int hpp__entry_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
+			       struct perf_hpp *hpp, struct hist_entry *he)
 {
 	double percent = baseline_percent(he);
 	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
@@ -205,19 +215,22 @@ static int hpp__entry_baseline(struct perf_hpp *hpp, struct hist_entry *he)
 		return scnprintf(hpp->buf, hpp->size, "            ");
 }
 
-static int hpp__header_period_baseline(struct perf_hpp *hpp)
+static int hpp__header_period_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
+				       struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
 
 	return scnprintf(hpp->buf, hpp->size, fmt, "Period Base");
 }
 
-static int hpp__width_period_baseline(struct perf_hpp *hpp __maybe_unused)
+static int hpp__width_period_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
+				      struct perf_hpp *hpp __maybe_unused)
 {
 	return 12;
 }
 
-static int hpp__entry_period_baseline(struct perf_hpp *hpp, struct hist_entry *he)
+static int hpp__entry_period_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
+				      struct perf_hpp *hpp, struct hist_entry *he)
 {
 	struct hist_entry *pair = hist_entry__next_pair(he);
 	u64 period = pair ? pair->stat.period : 0;
@@ -226,19 +239,22 @@ static int hpp__entry_period_baseline(struct perf_hpp *hpp, struct hist_entry *h
 	return scnprintf(hpp->buf, hpp->size, fmt, period);
 }
 
-static int hpp__header_delta(struct perf_hpp *hpp)
+static int hpp__header_delta(struct perf_hpp_fmt *_fmt __maybe_unused,
+			     struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
 
 	return scnprintf(hpp->buf, hpp->size, fmt, "Delta");
 }
 
-static int hpp__width_delta(struct perf_hpp *hpp __maybe_unused)
+static int hpp__width_delta(struct perf_hpp_fmt *fmt __maybe_unused,
+			    struct perf_hpp *hpp __maybe_unused)
 {
 	return 7;
 }
 
-static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he)
+static int hpp__entry_delta(struct perf_hpp_fmt *_fmt __maybe_unused,
+			    struct perf_hpp *hpp, struct hist_entry *he)
 {
 	struct hist_entry *pair = hist_entry__next_pair(he);
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s";
@@ -259,19 +275,22 @@ static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he)
 	return scnprintf(hpp->buf, hpp->size, fmt, buf);
 }
 
-static int hpp__header_ratio(struct perf_hpp *hpp)
+static int hpp__header_ratio(struct perf_hpp_fmt *_fmt __maybe_unused,
+			     struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
 
 	return scnprintf(hpp->buf, hpp->size, fmt, "Ratio");
 }
 
-static int hpp__width_ratio(struct perf_hpp *hpp __maybe_unused)
+static int hpp__width_ratio(struct perf_hpp_fmt *fmt __maybe_unused,
+			    struct perf_hpp *hpp __maybe_unused)
 {
 	return 14;
 }
 
-static int hpp__entry_ratio(struct perf_hpp *hpp, struct hist_entry *he)
+static int hpp__entry_ratio(struct perf_hpp_fmt *_fmt __maybe_unused,
+			    struct perf_hpp *hpp, struct hist_entry *he)
 {
 	struct hist_entry *pair = hist_entry__next_pair(he);
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
@@ -291,19 +310,22 @@ static int hpp__entry_ratio(struct perf_hpp *hpp, struct hist_entry *he)
 	return scnprintf(hpp->buf, hpp->size, fmt, buf);
 }
 
-static int hpp__header_wdiff(struct perf_hpp *hpp)
+static int hpp__header_wdiff(struct perf_hpp_fmt *_fmt __maybe_unused,
+			     struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
 
 	return scnprintf(hpp->buf, hpp->size, fmt, "Weighted diff");
 }
 
-static int hpp__width_wdiff(struct perf_hpp *hpp __maybe_unused)
+static int hpp__width_wdiff(struct perf_hpp_fmt *fmt __maybe_unused,
+			    struct perf_hpp *hpp __maybe_unused)
 {
 	return 14;
 }
 
-static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
+static int hpp__entry_wdiff(struct perf_hpp_fmt *_fmt __maybe_unused,
+			    struct perf_hpp *hpp, struct hist_entry *he)
 {
 	struct hist_entry *pair = hist_entry__next_pair(he);
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
@@ -323,19 +345,22 @@ static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he)
 	return scnprintf(hpp->buf, hpp->size, fmt, buf);
 }
 
-static int hpp__header_formula(struct perf_hpp *hpp)
+static int hpp__header_formula(struct perf_hpp_fmt *_fmt __maybe_unused,
+			       struct perf_hpp *hpp)
 {
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%70s";
 
 	return scnprintf(hpp->buf, hpp->size, fmt, "Formula");
 }
 
-static int hpp__width_formula(struct perf_hpp *hpp __maybe_unused)
+static int hpp__width_formula(struct perf_hpp_fmt *fmt __maybe_unused,
+			      struct perf_hpp *hpp __maybe_unused)
 {
 	return 70;
 }
 
-static int hpp__entry_formula(struct perf_hpp *hpp, struct hist_entry *he)
+static int hpp__entry_formula(struct perf_hpp_fmt *_fmt __maybe_unused,
+			      struct perf_hpp *hpp, struct hist_entry *he)
 {
 	struct hist_entry *pair = hist_entry__next_pair(he);
 	const char *fmt = symbol_conf.field_sep ? "%s" : "%-70s";
@@ -454,9 +479,9 @@ int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
 			first = false;
 
 		if (color && fmt->color)
-			ret = fmt->color(hpp, he);
+			ret = fmt->color(fmt, hpp, he);
 		else
-			ret = fmt->entry(hpp, he);
+			ret = fmt->entry(fmt, hpp, he);
 
 		advance_hpp(hpp, ret);
 	}
@@ -499,7 +524,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
 		if (i)
 			ret += 2;
 
-		ret += fmt->width(&dummy_hpp);
+		ret += fmt->width(fmt, &dummy_hpp);
 	}
 
 	list_for_each_entry(se, &hist_entry__sort_list, list)
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index ae7a754..ee70372 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -365,7 +365,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		else
 			first = false;
 
-		fmt->header(&dummy_hpp);
+		fmt->header(fmt, &dummy_hpp);
 		fprintf(fp, "%s", bf);
 	}
 
@@ -410,7 +410,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		else
 			first = false;
 
-		width = fmt->width(&dummy_hpp);
+		width = fmt->width(fmt, &dummy_hpp);
 		for (i = 0; i < width; i++)
 			fprintf(fp, ".");
 	}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2d3790f..0c62116 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -141,10 +141,12 @@ struct perf_hpp {
 };
 
 struct perf_hpp_fmt {
-	int (*header)(struct perf_hpp *hpp);
-	int (*width)(struct perf_hpp *hpp);
-	int (*color)(struct perf_hpp *hpp, struct hist_entry *he);
-	int (*entry)(struct perf_hpp *hpp, struct hist_entry *he);
+	int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp);
+	int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp);
+	int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		     struct hist_entry *he);
+	int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+		     struct hist_entry *he);
 
 	struct list_head list;
 };
-- 
cgit v0.10.2


From 2b8bfa6bb8a7d26935207710397386759b42125c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 31 Jan 2013 23:34:25 +0100
Subject: perf tools: Centralize default columns init in perf_hpp__init

Now when diff command is separated from other standard outputs,
we can use perf_hpp__init to initialize all standard columns.

Moving PERF_HPP__OVERHEAD column init back to perf_hpp__init,
and removing extra enable calls.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-nj2xk89tj972tbqswfs498ex@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 9a7e54d..188c265 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -861,7 +861,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		setup_browser(true);
 	else {
 		use_browser = 0;
-		perf_hpp__column_enable(PERF_HPP__OVERHEAD);
 		perf_hpp__init();
 	}
 
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 2cb3916..7ef36c3 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -703,8 +703,6 @@ __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, NULL)
 
 void hist_browser__init_hpp(void)
 {
-	perf_hpp__column_enable(PERF_HPP__OVERHEAD);
-
 	perf_hpp__init();
 
 	perf_hpp__format[PERF_HPP__OVERHEAD].color =
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 5440d56..f45c97f 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -421,6 +421,8 @@ LIST_HEAD(perf_hpp__list);
 
 void perf_hpp__init(void)
 {
+	perf_hpp__column_enable(PERF_HPP__OVERHEAD);
+
 	if (symbol_conf.show_cpu_utilization) {
 		perf_hpp__column_enable(PERF_HPP__OVERHEAD_SYS);
 		perf_hpp__column_enable(PERF_HPP__OVERHEAD_US);
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
index ae6a789..47d9a57 100644
--- a/tools/perf/ui/setup.c
+++ b/tools/perf/ui/setup.c
@@ -30,7 +30,6 @@ void setup_browser(bool fallback_to_pager)
 		if (fallback_to_pager)
 			setup_pager();
 
-		perf_hpp__column_enable(PERF_HPP__OVERHEAD);
 		perf_hpp__init();
 		break;
 	}
-- 
cgit v0.10.2


From ec308426ea743469ec6c2b0e06e20b3671546e8f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 25 Mar 2013 00:02:01 +0100
Subject: perf diff: Introducing diff_data object to hold files

Introducing struct diff_data to hold data file specifics.  It will be
handy when dealing with more than 2 data files.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-981q265sf6h05zuu8fnvw842@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 0aac5f3..015ca2d 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -19,10 +19,24 @@
 
 #include <stdlib.h>
 
-static char const *input_old = "perf.data.old",
-		  *input_new = "perf.data";
-static char	  diff__default_sort_order[] = "dso,symbol";
-static bool  force;
+struct data__file {
+	struct perf_session	*session;
+	const char		*file;
+	int			 idx;
+};
+
+static struct data__file *data__files;
+static int data__files_cnt;
+
+#define data__for_each_file_start(i, d, s)	\
+	for (i = s, d = &data__files[s];	\
+	     i < data__files_cnt;		\
+	     i++, d = &data__files[i])
+
+#define data__for_each_file(i, d) data__for_each_file_start(i, d, 0)
+
+static char diff__default_sort_order[] = "dso,symbol";
+static bool force;
 static bool show_period;
 static bool show_formula;
 static bool show_baseline_only;
@@ -467,56 +481,62 @@ static void hists__process(struct hists *old, struct hists *new)
 	hists__fprintf(new, true, 0, 0, 0, stdout);
 }
 
-static int __cmd_diff(void)
+static void data_process(void)
 {
-	int ret, i;
-#define older (session[0])
-#define newer (session[1])
-	struct perf_session *session[2];
-	struct perf_evlist *evlist_new, *evlist_old;
-	struct perf_evsel *evsel;
+	struct perf_evlist *evlist_old = data__files[0].session->evlist;
+	struct perf_evlist *evlist_new = data__files[1].session->evlist;
+	struct perf_evsel *evsel_old;
 	bool first = true;
 
-	older = perf_session__new(input_old, O_RDONLY, force, false,
-				  &tool);
-	newer = perf_session__new(input_new, O_RDONLY, force, false,
-				  &tool);
-	if (session[0] == NULL || session[1] == NULL)
-		return -ENOMEM;
+	list_for_each_entry(evsel_old, &evlist_old->entries, node) {
+		struct perf_evsel *evsel_new;
 
-	for (i = 0; i < 2; ++i) {
-		ret = perf_session__process_events(session[i], &tool);
-		if (ret)
-			goto out_delete;
-	}
+		evsel_new = evsel_match(evsel_old, evlist_new);
+		if (!evsel_new)
+			continue;
 
-	evlist_old = older->evlist;
-	evlist_new = newer->evlist;
+		fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
+			perf_evsel__name(evsel_old));
 
-	perf_evlist__collapse_resort(evlist_old);
-	perf_evlist__collapse_resort(evlist_new);
+		first = false;
 
-	list_for_each_entry(evsel, &evlist_new->entries, node) {
-		struct perf_evsel *evsel_old;
+		hists__process(&evsel_old->hists, &evsel_new->hists);
+	}
+}
 
-		evsel_old = evsel_match(evsel, evlist_old);
-		if (!evsel_old)
-			continue;
+static int __cmd_diff(void)
+{
+	struct data__file *d;
+	int ret = -EINVAL, i;
+
+	data__for_each_file(i, d) {
+		d->session = perf_session__new(d->file, O_RDONLY, force,
+					       false, &tool);
+		if (!d->session) {
+			pr_err("Failed to open %s\n", d->file);
+			ret = -ENOMEM;
+			goto out_delete;
+		}
 
-		fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
-			perf_evsel__name(evsel));
+		ret = perf_session__process_events(d->session, &tool);
+		if (ret) {
+			pr_err("Failed to process %s\n", d->file);
+			goto out_delete;
+		}
 
-		first = false;
+		perf_evlist__collapse_resort(d->session->evlist);
+	}
+
+	data_process();
 
-		hists__process(&evsel_old->hists, &evsel->hists);
+ out_delete:
+	data__for_each_file(i, d) {
+		if (d->session)
+			perf_session__delete(d->session);
 	}
 
-out_delete:
-	for (i = 0; i < 2; ++i)
-		perf_session__delete(session[i]);
+	free(data__files);
 	return ret;
-#undef older
-#undef newer
 }
 
 static const char * const diff_usage[] = {
@@ -589,27 +609,54 @@ static void ui_init(void)
 	}
 }
 
-int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
+static int data_init(int argc, const char **argv)
 {
-	sort_order = diff__default_sort_order;
-	argc = parse_options(argc, argv, options, diff_usage, 0);
+	struct data__file *d;
+	static const char *defaults[] = {
+		"perf.data.old",
+		"perf.data",
+	};
+	int i;
+
+	data__files_cnt = 2;
+
 	if (argc) {
 		if (argc > 2)
 			usage_with_options(diff_usage, options);
 		if (argc == 2) {
-			input_old = argv[0];
-			input_new = argv[1];
+			defaults[0] = argv[0];
+			defaults[1] = argv[1];
 		} else
-			input_new = argv[0];
+			defaults[1] = argv[0];
 	} else if (symbol_conf.default_guest_vmlinux_name ||
 		   symbol_conf.default_guest_kallsyms) {
-		input_old = "perf.data.host";
-		input_new = "perf.data.guest";
+		defaults[0] = "perf.data.host";
+		defaults[1] = "perf.data.guest";
 	}
 
+	data__files = zalloc(sizeof(*data__files) * data__files_cnt);
+	if (!data__files)
+		return -ENOMEM;
+
+	data__for_each_file(i, d) {
+		d->file = defaults[i];
+		d->idx  = i;
+	}
+
+	return 0;
+}
+
+int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+	sort_order = diff__default_sort_order;
+	argc = parse_options(argc, argv, options, diff_usage, 0);
+
 	if (symbol__init() < 0)
 		return -1;
 
+	if (data_init(argc, argv) < 0)
+		return -1;
+
 	ui_init();
 
 	if (setup_sorting() < 0)
-- 
cgit v0.10.2


From 9af303e22a317d1cc6f440e08f72428830708b37 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sat, 1 Dec 2012 21:15:40 +0100
Subject: perf diff: Switching the base hists to be pairs head

Making the baseline hists to act as a pairs head.

So far we don't care which hists act as a pairs head, because we have
only 2 files to deal with and any of them is suitable to do the job.

But if we want to process more files, we need to pick up one hists to
act as pairs head, and the baseline hists is the most suitable.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-cklmt2o4j87i9viz900245ae@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 015ca2d..0cfe99e 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -167,34 +167,34 @@ double perf_diff__period_percent(struct hist_entry *he, u64 period)
 
 double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair)
 {
-	double new_percent = perf_diff__period_percent(he, he->stat.period);
-	double old_percent = perf_diff__period_percent(pair, pair->stat.period);
+	double old_percent = perf_diff__period_percent(he, he->stat.period);
+	double new_percent = perf_diff__period_percent(pair, pair->stat.period);
 
-	he->diff.period_ratio_delta = new_percent - old_percent;
-	he->diff.computed = true;
-	return he->diff.period_ratio_delta;
+	pair->diff.period_ratio_delta = new_percent - old_percent;
+	pair->diff.computed = true;
+	return pair->diff.period_ratio_delta;
 }
 
 double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair)
 {
-	double new_period = he->stat.period;
-	double old_period = pair->stat.period;
+	double old_period = he->stat.period ?: 1;
+	double new_period = pair->stat.period;
 
-	he->diff.computed = true;
-	he->diff.period_ratio = new_period / old_period;
-	return he->diff.period_ratio;
+	pair->diff.computed = true;
+	pair->diff.period_ratio = new_period / old_period;
+	return pair->diff.period_ratio;
 }
 
 s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
 {
-	u64 new_period = he->stat.period;
-	u64 old_period = pair->stat.period;
+	u64 old_period = he->stat.period;
+	u64 new_period = pair->stat.period;
 
-	he->diff.computed = true;
-	he->diff.wdiff = new_period * compute_wdiff_w2 -
-			 old_period * compute_wdiff_w1;
+	pair->diff.computed = true;
+	pair->diff.wdiff = new_period * compute_wdiff_w2 -
+			   old_period * compute_wdiff_w1;
 
-	return he->diff.wdiff;
+	return pair->diff.wdiff;
 }
 
 static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
@@ -203,15 +203,15 @@ static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
 	return scnprintf(buf, size,
 			 "(%" PRIu64 " * 100 / %" PRIu64 ") - "
 			 "(%" PRIu64 " * 100 / %" PRIu64 ")",
-			  he->stat.period, he->hists->stats.total_period,
-			  pair->stat.period, pair->hists->stats.total_period);
+			  pair->stat.period, pair->hists->stats.total_period,
+			  he->stat.period, he->hists->stats.total_period);
 }
 
 static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
 			 char *buf, size_t size)
 {
-	double new_period = he->stat.period;
-	double old_period = pair->stat.period;
+	double old_period = he->stat.period;
+	double new_period = pair->stat.period;
 
 	return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
 }
@@ -219,8 +219,8 @@ static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
 static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
 			 char *buf, size_t size)
 {
-	u64 new_period = he->stat.period;
-	u64 old_period = pair->stat.period;
+	u64 old_period = he->stat.period;
+	u64 new_period = pair->stat.period;
 
 	return scnprintf(buf, size,
 		  "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
@@ -462,23 +462,23 @@ static void hists__compute_resort(struct hists *hists)
 	}
 }
 
-static void hists__process(struct hists *old, struct hists *new)
+static void hists__process(struct hists *base, struct hists *new)
 {
-	hists__match(new, old);
+	hists__match(base, new);
 
 	if (show_baseline_only)
-		hists__baseline_only(new);
+		hists__baseline_only(base);
 	else
-		hists__link(new, old);
+		hists__link(base, new);
 
 	if (sort_compute) {
-		hists__precompute(new);
-		hists__compute_resort(new);
+		hists__precompute(base);
+		hists__compute_resort(base);
 	} else {
-		hists__output_resort(new);
+		hists__output_resort(base);
 	}
 
-	hists__fprintf(new, true, 0, 0, 0, stdout);
+	hists__fprintf(base, true, 0, 0, 0, stdout);
 }
 
 static void data_process(void)
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index f45c97f..02313a9 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -178,18 +178,8 @@ static int hpp__width_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
 
 static double baseline_percent(struct hist_entry *he)
 {
-	struct hist_entry *pair = hist_entry__next_pair(he);
-	struct hists *pair_hists = pair ? pair->hists : NULL;
-	double percent = 0.0;
-
-	if (pair) {
-		u64 total_period = pair_hists->stats.total_period;
-		u64 base_period  = pair->stat.period;
-
-		percent = 100.0 * base_period / total_period;
-	}
-
-	return percent;
+	struct hists *hists = he->hists;
+	return 100.0 * he->stat.period / hists->stats.total_period;
 }
 
 static int hpp__color_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
@@ -197,10 +187,8 @@ static int hpp__color_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
 {
 	double percent = baseline_percent(he);
 
-	if (hist_entry__has_pairs(he) || symbol_conf.field_sep)
-		return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
-	else
-		return scnprintf(hpp->buf, hpp->size, "        ");
+	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%",
+				      percent);
 }
 
 static int hpp__entry_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
@@ -209,10 +197,7 @@ static int hpp__entry_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
 	double percent = baseline_percent(he);
 	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
 
-	if (hist_entry__has_pairs(he) || symbol_conf.field_sep)
-		return scnprintf(hpp->buf, hpp->size, fmt, percent);
-	else
-		return scnprintf(hpp->buf, hpp->size, "            ");
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
 }
 
 static int hpp__header_period_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
-- 
cgit v0.10.2


From e0af43d2486fc50208076cfd93af55615fd4adfd Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sat, 1 Dec 2012 21:18:20 +0100
Subject: perf hists: Marking dummy hists entries

It does not make sense to make some computation (ratio, wdiff), when the
hist_entry is 'dummy' - added via hists__link.

Adding dummy field to struct hist_entry which indicates that it was
added by hists__link and avoiding some of the processing for such
entries.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-g8bxml0n0pnqsrpyd98p0ird@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 02313a9..a359b75 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -187,8 +187,11 @@ static int hpp__color_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
 {
 	double percent = baseline_percent(he);
 
-	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%",
-				      percent);
+	if (!he->dummy)
+		return percent_color_snprintf(hpp->buf, hpp->size,
+					      " %6.2f%%", percent);
+	else
+		return scnprintf(hpp->buf, hpp->size, "        ");
 }
 
 static int hpp__entry_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
@@ -197,7 +200,10 @@ static int hpp__entry_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
 	double percent = baseline_percent(he);
 	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
 
-	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+	if (!he->dummy)
+		return scnprintf(hpp->buf, hpp->size, fmt, percent);
+	else
+		return scnprintf(hpp->buf, hpp->size, "            ");
 }
 
 static int hpp__header_period_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
@@ -251,8 +257,7 @@ static int hpp__entry_delta(struct perf_hpp_fmt *_fmt __maybe_unused,
 			diff = he->diff.period_ratio_delta;
 		else
 			diff = perf_diff__compute_delta(he, pair);
-	} else
-		diff = perf_diff__period_percent(he, he->stat.period);
+	}
 
 	if (fabs(diff) >= 0.01)
 		scnprintf(buf, sizeof(buf), "%+4.2F%%", diff);
@@ -282,7 +287,8 @@ static int hpp__entry_ratio(struct perf_hpp_fmt *_fmt __maybe_unused,
 	char buf[32] = " ";
 	double ratio = 0.0;
 
-	if (pair) {
+	/* No point for ratio number if we are dummy.. */
+	if (!he->dummy && pair) {
 		if (he->diff.computed)
 			ratio = he->diff.period_ratio;
 		else
@@ -317,7 +323,8 @@ static int hpp__entry_wdiff(struct perf_hpp_fmt *_fmt __maybe_unused,
 	char buf[32] = " ";
 	s64 wdiff = 0;
 
-	if (pair) {
+	/* No point for wdiff number if we are dummy.. */
+	if (!he->dummy && pair) {
 		if (he->diff.computed)
 			wdiff = he->diff.wdiff;
 		else
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b11a6cf..a9dd1b9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -912,6 +912,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
 		rb_link_node(&he->rb_node_in, parent, p);
 		rb_insert_color(&he->rb_node_in, root);
 		hists__inc_nr_entries(hists, he);
+		he->dummy = true;
 	}
 out:
 	return he;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index a4a6d0b..586022d 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -89,6 +89,9 @@ struct hist_entry {
 
 	struct hist_entry_diff	diff;
 
+	/* We are added by hists__add_dummy_entry. */
+	bool			dummy;
+
 	/* XXX These two should move to some tree widget lib */
 	u16			row_offset;
 	u16			nr_rows;
-- 
cgit v0.10.2


From 1d81c7fc25c0f0559e3306fc73ecfe78b740c9e8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sat, 1 Dec 2012 21:56:03 +0100
Subject: perf diff: Display data file info ahead of the diff output

Data files are referenced through the index of the file on the command
line. Adding list of data files for each index to ease up navigation for
user.

It's displayed only if in verbose mode.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-dfjxa6n116ughjjxohpkuvi8@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 0cfe99e..9574ba1 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -481,6 +481,21 @@ static void hists__process(struct hists *base, struct hists *new)
 	hists__fprintf(base, true, 0, 0, 0, stdout);
 }
 
+static void data__fprintf(void)
+{
+	struct data__file *d;
+	int i;
+
+	fprintf(stdout, "# Data files:\n");
+
+	data__for_each_file(i, d)
+		fprintf(stdout, "#  [%d] %s %s\n",
+			d->idx, d->file,
+			!d->idx ? "(Baseline)" : "");
+
+	fprintf(stdout, "#\n");
+}
+
 static void data_process(void)
 {
 	struct perf_evlist *evlist_old = data__files[0].session->evlist;
@@ -500,6 +515,9 @@ static void data_process(void)
 
 		first = false;
 
+		if (verbose)
+			data__fprintf();
+
 		hists__process(&evsel_old->hists, &evsel_new->hists);
 	}
 }
-- 
cgit v0.10.2


From 345dc0b45ecc37a239723f2b6392cab04d8b0eff Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sun, 3 Feb 2013 20:08:34 +0100
Subject: perf diff: Move diff related columns into diff command

Moving diff related columns into diff command, because they are not used
by any other command.

Also moving the column entry functions under generic one with baseline
as an exception.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-v58qfl75xkqojz54h1v5fy6p@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 9574ba1..8734f1c 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -18,6 +18,27 @@
 #include "util/util.h"
 
 #include <stdlib.h>
+#include <math.h>
+
+/* Diff command specific HPP columns. */
+enum {
+	PERF_HPP_DIFF__BASELINE,
+	PERF_HPP_DIFF__PERIOD,
+	PERF_HPP_DIFF__PERIOD_BASELINE,
+	PERF_HPP_DIFF__DELTA,
+	PERF_HPP_DIFF__RATIO,
+	PERF_HPP_DIFF__WEIGHTED_DIFF,
+	PERF_HPP_DIFF__FORMULA,
+
+	PERF_HPP_DIFF__MAX_INDEX
+};
+
+struct diff_hpp_fmt {
+	struct perf_hpp_fmt	 fmt;
+	int			 idx;
+	char			*header;
+	int			 header_width;
+};
 
 struct data__file {
 	struct perf_session	*session;
@@ -60,6 +81,47 @@ const char *compute_names[COMPUTE_MAX] = {
 
 static int compute;
 
+static int compute_2_hpp[COMPUTE_MAX] = {
+	[COMPUTE_DELTA]		= PERF_HPP_DIFF__DELTA,
+	[COMPUTE_RATIO]		= PERF_HPP_DIFF__RATIO,
+	[COMPUTE_WEIGHTED_DIFF]	= PERF_HPP_DIFF__WEIGHTED_DIFF,
+};
+
+#define MAX_COL_WIDTH 70
+
+static struct header_column {
+	const char *name;
+	int width;
+} columns[PERF_HPP_DIFF__MAX_INDEX] = {
+	[PERF_HPP_DIFF__BASELINE] = {
+		.name  = "Baseline",
+	},
+	[PERF_HPP_DIFF__PERIOD] = {
+		.name  = "Period",
+		.width = 14,
+	},
+	[PERF_HPP_DIFF__PERIOD_BASELINE] = {
+		.name  = "Base period",
+		.width = 14,
+	},
+	[PERF_HPP_DIFF__DELTA] = {
+		.name  = "Delta",
+		.width = 7,
+	},
+	[PERF_HPP_DIFF__RATIO] = {
+		.name  = "Ratio",
+		.width = 14,
+	},
+	[PERF_HPP_DIFF__WEIGHTED_DIFF] = {
+		.name  = "Weighted diff",
+		.width = 14,
+	},
+	[PERF_HPP_DIFF__FORMULA] = {
+		.name  = "Formula",
+		.width = MAX_COL_WIDTH,
+	}
+};
+
 static int setup_compute_opt_wdiff(char *opt)
 {
 	char *w1_str = opt;
@@ -596,34 +658,246 @@ static const struct option options[] = {
 	OPT_END()
 };
 
-static void ui_init(void)
+static double baseline_percent(struct hist_entry *he)
 {
-	/*
-	 * Display baseline/delta/ratio
-	 * formula/periods columns.
-	 */
-	perf_hpp__column_enable(PERF_HPP__BASELINE);
+	struct hists *hists = he->hists;
+	return 100.0 * he->stat.period / hists->stats.total_period;
+}
 
-	switch (compute) {
-	case COMPUTE_DELTA:
-		perf_hpp__column_enable(PERF_HPP__DELTA);
+static int hpp__color_baseline(struct perf_hpp_fmt *fmt,
+			       struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(fmt, struct diff_hpp_fmt, fmt);
+	double percent = baseline_percent(he);
+	char pfmt[20] = " ";
+
+	if (!he->dummy) {
+		scnprintf(pfmt, 20, "%%%d.2f%%%%", dfmt->header_width - 1);
+		return percent_color_snprintf(hpp->buf, hpp->size,
+					      pfmt, percent);
+	} else
+		return scnprintf(hpp->buf, hpp->size, "%*s",
+				 dfmt->header_width, pfmt);
+}
+
+static int hpp__entry_baseline(struct hist_entry *he, char *buf, size_t size)
+{
+	double percent = baseline_percent(he);
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
+	int ret = 0;
+
+	if (!he->dummy)
+		ret = scnprintf(buf, size, fmt, percent);
+
+	return ret;
+}
+
+static void
+hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
+{
+	switch (idx) {
+	case PERF_HPP_DIFF__PERIOD_BASELINE:
+		scnprintf(buf, size, "%" PRIu64, he->stat.period);
 		break;
-	case COMPUTE_RATIO:
-		perf_hpp__column_enable(PERF_HPP__RATIO);
+
+	default:
 		break;
-	case COMPUTE_WEIGHTED_DIFF:
-		perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF);
+	}
+}
+
+static void
+hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
+		int idx, char *buf, size_t size)
+{
+	double diff;
+	double ratio;
+	s64 wdiff;
+
+	switch (idx) {
+	case PERF_HPP_DIFF__DELTA:
+		if (pair->diff.computed)
+			diff = pair->diff.period_ratio_delta;
+		else
+			diff = perf_diff__compute_delta(he, pair);
+
+		if (fabs(diff) >= 0.01)
+			scnprintf(buf, size, "%+4.2F%%", diff);
+		break;
+
+	case PERF_HPP_DIFF__RATIO:
+		/* No point for ratio number if we are dummy.. */
+		if (he->dummy)
+			break;
+
+		if (pair->diff.computed)
+			ratio = pair->diff.period_ratio;
+		else
+			ratio = perf_diff__compute_ratio(he, pair);
+
+		if (ratio > 0.0)
+			scnprintf(buf, size, "%14.6F", ratio);
+		break;
+
+	case PERF_HPP_DIFF__WEIGHTED_DIFF:
+		/* No point for wdiff number if we are dummy.. */
+		if (he->dummy)
+			break;
+
+		if (pair->diff.computed)
+			wdiff = pair->diff.wdiff;
+		else
+			wdiff = perf_diff__compute_wdiff(he, pair);
+
+		if (wdiff != 0)
+			scnprintf(buf, size, "%14ld", wdiff);
+		break;
+
+	case PERF_HPP_DIFF__FORMULA:
+		perf_diff__formula(he, pair, buf, size);
 		break;
+
+	case PERF_HPP_DIFF__PERIOD:
+		scnprintf(buf, size, "%" PRIu64, pair->stat.period);
+		break;
+
 	default:
 		BUG_ON(1);
 	};
+}
+
+static void
+__hpp__entry_global(struct hist_entry *he, int idx, char *buf, size_t size)
+{
+	struct hist_entry *pair = hist_entry__next_pair(he);
+
+	/* baseline is special */
+	if (idx == PERF_HPP_DIFF__BASELINE)
+		hpp__entry_baseline(he, buf, size);
+	else {
+		if (pair)
+			hpp__entry_pair(he, pair, idx, buf, size);
+		else
+			hpp__entry_unpair(he, idx, buf, size);
+	}
+}
+
+static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
+			     struct hist_entry *he)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(_fmt, struct diff_hpp_fmt, fmt);
+	char buf[MAX_COL_WIDTH] = " ";
+
+	__hpp__entry_global(he, dfmt->idx, buf, MAX_COL_WIDTH);
+
+	if (symbol_conf.field_sep)
+		return scnprintf(hpp->buf, hpp->size, "%s", buf);
+	else
+		return scnprintf(hpp->buf, hpp->size, "%*s",
+				 dfmt->header_width, buf);
+}
+
+static int hpp__header(struct perf_hpp_fmt *fmt,
+		       struct perf_hpp *hpp)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(fmt, struct diff_hpp_fmt, fmt);
+
+	BUG_ON(!dfmt->header);
+	return scnprintf(hpp->buf, hpp->size, dfmt->header);
+}
+
+static int hpp__width(struct perf_hpp_fmt *fmt,
+		      struct perf_hpp *hpp __maybe_unused)
+{
+	struct diff_hpp_fmt *dfmt =
+		container_of(fmt, struct diff_hpp_fmt, fmt);
+
+	BUG_ON(dfmt->header_width <= 0);
+	return dfmt->header_width;
+}
+
+#define hpp__color_global hpp__entry_global
+
+#define FMT(_i, _entry, _color)					\
+	[_i] = {						\
+		.fmt = {					\
+			.header	= hpp__header,			\
+			.width	= hpp__width,			\
+			.entry	= hpp__entry_ ## _entry,	\
+			.color	= hpp__color_ ## _color,	\
+		},						\
+		.idx = _i,					\
+	}
+
+#define FMT_GLOBAL(_i)	 FMT(_i, global, global)
+#define FMT_BASELINE(_i) FMT(_i, global, baseline)
+
+static struct diff_hpp_fmt diff_fmt[] = {
+	FMT_BASELINE(PERF_HPP_DIFF__BASELINE),
+	FMT_GLOBAL(PERF_HPP_DIFF__PERIOD),
+	FMT_GLOBAL(PERF_HPP_DIFF__PERIOD_BASELINE),
+	FMT_GLOBAL(PERF_HPP_DIFF__DELTA),
+	FMT_GLOBAL(PERF_HPP_DIFF__RATIO),
+	FMT_GLOBAL(PERF_HPP_DIFF__WEIGHTED_DIFF),
+	FMT_GLOBAL(PERF_HPP_DIFF__FORMULA),
+};
+
+static void init_header(struct diff_hpp_fmt *dfmt)
+{
+#define MAX_HEADER_NAME 100
+	char buf_indent[MAX_HEADER_NAME];
+	char buf[MAX_HEADER_NAME];
+	const char *header = NULL;
+	int width = 0;
+
+	BUG_ON(dfmt->idx >= PERF_HPP_DIFF__MAX_INDEX);
+	header = columns[dfmt->idx].name;
+	width  = columns[dfmt->idx].width;
+
+	/* Only our defined HPP fmts should appear here. */
+	BUG_ON(!header);
+
+#define NAME (data__files_cnt > 2 ? buf : header)
+	dfmt->header_width = width;
+	width = (int) strlen(NAME);
+	if (dfmt->header_width < width)
+		dfmt->header_width = width;
+
+	scnprintf(buf_indent, MAX_HEADER_NAME, "%*s",
+		  dfmt->header_width, NAME);
+
+	dfmt->header = strdup(buf_indent);
+#undef MAX_HEADER_NAME
+#undef NAME
+}
+
+static void column_enable(unsigned col)
+{
+	struct diff_hpp_fmt *dfmt;
+
+	BUG_ON(col >= PERF_HPP_DIFF__MAX_INDEX);
+	dfmt = &diff_fmt[col];
+	init_header(dfmt);
+	perf_hpp__column_register(&dfmt->fmt);
+}
+
+static void ui_init(void)
+{
+	/*
+	 * Display baseline/delta/ratio/
+	 * formula/periods columns.
+	 */
+	column_enable(PERF_HPP_DIFF__BASELINE);
+	column_enable(compute_2_hpp[compute]);
 
 	if (show_formula)
-		perf_hpp__column_enable(PERF_HPP__FORMULA);
+		column_enable(PERF_HPP_DIFF__FORMULA);
 
 	if (show_period) {
-		perf_hpp__column_enable(PERF_HPP__PERIOD);
-		perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE);
+		column_enable(PERF_HPP_DIFF__PERIOD);
+		column_enable(PERF_HPP_DIFF__PERIOD_BASELINE);
 	}
 }
 
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index a359b75..dc900d7 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -163,207 +163,6 @@ HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8)
 HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12)
 HPP_RAW_FNS(period, "Period", period, 12, 12)
 
-
-static int hpp__header_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
-				struct perf_hpp *hpp)
-{
-	return scnprintf(hpp->buf, hpp->size, "Baseline");
-}
-
-static int hpp__width_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
-			       struct perf_hpp *hpp __maybe_unused)
-{
-	return 8;
-}
-
-static double baseline_percent(struct hist_entry *he)
-{
-	struct hists *hists = he->hists;
-	return 100.0 * he->stat.period / hists->stats.total_period;
-}
-
-static int hpp__color_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
-			       struct perf_hpp *hpp, struct hist_entry *he)
-{
-	double percent = baseline_percent(he);
-
-	if (!he->dummy)
-		return percent_color_snprintf(hpp->buf, hpp->size,
-					      " %6.2f%%", percent);
-	else
-		return scnprintf(hpp->buf, hpp->size, "        ");
-}
-
-static int hpp__entry_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
-			       struct perf_hpp *hpp, struct hist_entry *he)
-{
-	double percent = baseline_percent(he);
-	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
-
-	if (!he->dummy)
-		return scnprintf(hpp->buf, hpp->size, fmt, percent);
-	else
-		return scnprintf(hpp->buf, hpp->size, "            ");
-}
-
-static int hpp__header_period_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
-				       struct perf_hpp *hpp)
-{
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
-
-	return scnprintf(hpp->buf, hpp->size, fmt, "Period Base");
-}
-
-static int hpp__width_period_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
-				      struct perf_hpp *hpp __maybe_unused)
-{
-	return 12;
-}
-
-static int hpp__entry_period_baseline(struct perf_hpp_fmt *_fmt __maybe_unused,
-				      struct perf_hpp *hpp, struct hist_entry *he)
-{
-	struct hist_entry *pair = hist_entry__next_pair(he);
-	u64 period = pair ? pair->stat.period : 0;
-	const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64;
-
-	return scnprintf(hpp->buf, hpp->size, fmt, period);
-}
-
-static int hpp__header_delta(struct perf_hpp_fmt *_fmt __maybe_unused,
-			     struct perf_hpp *hpp)
-{
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
-
-	return scnprintf(hpp->buf, hpp->size, fmt, "Delta");
-}
-
-static int hpp__width_delta(struct perf_hpp_fmt *fmt __maybe_unused,
-			    struct perf_hpp *hpp __maybe_unused)
-{
-	return 7;
-}
-
-static int hpp__entry_delta(struct perf_hpp_fmt *_fmt __maybe_unused,
-			    struct perf_hpp *hpp, struct hist_entry *he)
-{
-	struct hist_entry *pair = hist_entry__next_pair(he);
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s";
-	char buf[32] = " ";
-	double diff = 0.0;
-
-	if (pair) {
-		if (he->diff.computed)
-			diff = he->diff.period_ratio_delta;
-		else
-			diff = perf_diff__compute_delta(he, pair);
-	}
-
-	if (fabs(diff) >= 0.01)
-		scnprintf(buf, sizeof(buf), "%+4.2F%%", diff);
-
-	return scnprintf(hpp->buf, hpp->size, fmt, buf);
-}
-
-static int hpp__header_ratio(struct perf_hpp_fmt *_fmt __maybe_unused,
-			     struct perf_hpp *hpp)
-{
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
-
-	return scnprintf(hpp->buf, hpp->size, fmt, "Ratio");
-}
-
-static int hpp__width_ratio(struct perf_hpp_fmt *fmt __maybe_unused,
-			    struct perf_hpp *hpp __maybe_unused)
-{
-	return 14;
-}
-
-static int hpp__entry_ratio(struct perf_hpp_fmt *_fmt __maybe_unused,
-			    struct perf_hpp *hpp, struct hist_entry *he)
-{
-	struct hist_entry *pair = hist_entry__next_pair(he);
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
-	char buf[32] = " ";
-	double ratio = 0.0;
-
-	/* No point for ratio number if we are dummy.. */
-	if (!he->dummy && pair) {
-		if (he->diff.computed)
-			ratio = he->diff.period_ratio;
-		else
-			ratio = perf_diff__compute_ratio(he, pair);
-	}
-
-	if (ratio > 0.0)
-		scnprintf(buf, sizeof(buf), "%+14.6F", ratio);
-
-	return scnprintf(hpp->buf, hpp->size, fmt, buf);
-}
-
-static int hpp__header_wdiff(struct perf_hpp_fmt *_fmt __maybe_unused,
-			     struct perf_hpp *hpp)
-{
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
-
-	return scnprintf(hpp->buf, hpp->size, fmt, "Weighted diff");
-}
-
-static int hpp__width_wdiff(struct perf_hpp_fmt *fmt __maybe_unused,
-			    struct perf_hpp *hpp __maybe_unused)
-{
-	return 14;
-}
-
-static int hpp__entry_wdiff(struct perf_hpp_fmt *_fmt __maybe_unused,
-			    struct perf_hpp *hpp, struct hist_entry *he)
-{
-	struct hist_entry *pair = hist_entry__next_pair(he);
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%14s";
-	char buf[32] = " ";
-	s64 wdiff = 0;
-
-	/* No point for wdiff number if we are dummy.. */
-	if (!he->dummy && pair) {
-		if (he->diff.computed)
-			wdiff = he->diff.wdiff;
-		else
-			wdiff = perf_diff__compute_wdiff(he, pair);
-	}
-
-	if (wdiff != 0)
-		scnprintf(buf, sizeof(buf), "%14ld", wdiff);
-
-	return scnprintf(hpp->buf, hpp->size, fmt, buf);
-}
-
-static int hpp__header_formula(struct perf_hpp_fmt *_fmt __maybe_unused,
-			       struct perf_hpp *hpp)
-{
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%70s";
-
-	return scnprintf(hpp->buf, hpp->size, fmt, "Formula");
-}
-
-static int hpp__width_formula(struct perf_hpp_fmt *fmt __maybe_unused,
-			      struct perf_hpp *hpp __maybe_unused)
-{
-	return 70;
-}
-
-static int hpp__entry_formula(struct perf_hpp_fmt *_fmt __maybe_unused,
-			      struct perf_hpp *hpp, struct hist_entry *he)
-{
-	struct hist_entry *pair = hist_entry__next_pair(he);
-	const char *fmt = symbol_conf.field_sep ? "%s" : "%-70s";
-	char buf[96] = " ";
-
-	if (pair)
-		perf_diff__formula(he, pair, buf, sizeof(buf));
-
-	return scnprintf(hpp->buf, hpp->size, fmt, buf);
-}
-
 #define HPP__COLOR_PRINT_FNS(_name)			\
 	{						\
 		.header	= hpp__header_ ## _name,	\
@@ -380,19 +179,13 @@ static int hpp__entry_formula(struct perf_hpp_fmt *_fmt __maybe_unused,
 	}
 
 struct perf_hpp_fmt perf_hpp__format[] = {
-	HPP__COLOR_PRINT_FNS(baseline),
 	HPP__COLOR_PRINT_FNS(overhead),
 	HPP__COLOR_PRINT_FNS(overhead_sys),
 	HPP__COLOR_PRINT_FNS(overhead_us),
 	HPP__COLOR_PRINT_FNS(overhead_guest_sys),
 	HPP__COLOR_PRINT_FNS(overhead_guest_us),
 	HPP__PRINT_FNS(samples),
-	HPP__PRINT_FNS(period),
-	HPP__PRINT_FNS(period_baseline),
-	HPP__PRINT_FNS(delta),
-	HPP__PRINT_FNS(ratio),
-	HPP__PRINT_FNS(wdiff),
-	HPP__PRINT_FNS(formula)
+	HPP__PRINT_FNS(period)
 };
 
 LIST_HEAD(perf_hpp__list);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 0c62116..79681f6 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -159,7 +159,7 @@ extern struct list_head perf_hpp__list;
 extern struct perf_hpp_fmt perf_hpp__format[];
 
 enum {
-	PERF_HPP__BASELINE,
+	/* Matches perf_hpp__format array. */
 	PERF_HPP__OVERHEAD,
 	PERF_HPP__OVERHEAD_SYS,
 	PERF_HPP__OVERHEAD_US,
@@ -167,11 +167,6 @@ enum {
 	PERF_HPP__OVERHEAD_GUEST_US,
 	PERF_HPP__SAMPLES,
 	PERF_HPP__PERIOD,
-	PERF_HPP__PERIOD_BASELINE,
-	PERF_HPP__DELTA,
-	PERF_HPP__RATIO,
-	PERF_HPP__WEIGHTED_DIFF,
-	PERF_HPP__FORMULA,
 
 	PERF_HPP__MAX_INDEX
 };
-- 
cgit v0.10.2


From c818b49820aea96d6a1b43815bae0ee38b09ca0d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sat, 1 Dec 2012 21:57:04 +0100
Subject: perf diff: Move columns into struct data__file

Another step towards multiple data files support. Having columns
definition within struct data__file force each data file having its own
columns.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-lnfqj7k7fqw8bz07pupi5464@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8734f1c..7787ee2 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -44,6 +44,7 @@ struct data__file {
 	struct perf_session	*session;
 	const char		*file;
 	int			 idx;
+	struct diff_hpp_fmt	 fmt[PERF_HPP_DIFF__MAX_INDEX];
 };
 
 static struct data__file *data__files;
@@ -584,6 +585,17 @@ static void data_process(void)
 	}
 }
 
+static void data__free(struct data__file *d)
+{
+	int col;
+
+	for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) {
+		struct diff_hpp_fmt *fmt = &d->fmt[col];
+
+		free(fmt->header);
+	}
+}
+
 static int __cmd_diff(void)
 {
 	struct data__file *d;
@@ -613,6 +625,8 @@ static int __cmd_diff(void)
 	data__for_each_file(i, d) {
 		if (d->session)
 			perf_session__delete(d->session);
+
+		data__free(d);
 	}
 
 	free(data__files);
@@ -818,32 +832,6 @@ static int hpp__width(struct perf_hpp_fmt *fmt,
 	return dfmt->header_width;
 }
 
-#define hpp__color_global hpp__entry_global
-
-#define FMT(_i, _entry, _color)					\
-	[_i] = {						\
-		.fmt = {					\
-			.header	= hpp__header,			\
-			.width	= hpp__width,			\
-			.entry	= hpp__entry_ ## _entry,	\
-			.color	= hpp__color_ ## _color,	\
-		},						\
-		.idx = _i,					\
-	}
-
-#define FMT_GLOBAL(_i)	 FMT(_i, global, global)
-#define FMT_BASELINE(_i) FMT(_i, global, baseline)
-
-static struct diff_hpp_fmt diff_fmt[] = {
-	FMT_BASELINE(PERF_HPP_DIFF__BASELINE),
-	FMT_GLOBAL(PERF_HPP_DIFF__PERIOD),
-	FMT_GLOBAL(PERF_HPP_DIFF__PERIOD_BASELINE),
-	FMT_GLOBAL(PERF_HPP_DIFF__DELTA),
-	FMT_GLOBAL(PERF_HPP_DIFF__RATIO),
-	FMT_GLOBAL(PERF_HPP_DIFF__WEIGHTED_DIFF),
-	FMT_GLOBAL(PERF_HPP_DIFF__FORMULA),
-};
-
 static void init_header(struct diff_hpp_fmt *dfmt)
 {
 #define MAX_HEADER_NAME 100
@@ -873,31 +861,56 @@ static void init_header(struct diff_hpp_fmt *dfmt)
 #undef NAME
 }
 
-static void column_enable(unsigned col)
+static void data__hpp_register(struct data__file *d, int idx)
 {
-	struct diff_hpp_fmt *dfmt;
+	struct diff_hpp_fmt *dfmt = &d->fmt[idx];
+	struct perf_hpp_fmt *fmt = &dfmt->fmt;
+
+	dfmt->idx = idx;
+
+	fmt->header = hpp__header;
+	fmt->width  = hpp__width;
+	fmt->entry  = hpp__entry_global;
+
+	/* TODO more colors */
+	if (idx == PERF_HPP_DIFF__BASELINE)
+		fmt->color = hpp__color_baseline;
 
-	BUG_ON(col >= PERF_HPP_DIFF__MAX_INDEX);
-	dfmt = &diff_fmt[col];
 	init_header(dfmt);
-	perf_hpp__column_register(&dfmt->fmt);
+	perf_hpp__column_register(fmt);
 }
 
 static void ui_init(void)
 {
-	/*
-	 * Display baseline/delta/ratio/
-	 * formula/periods columns.
-	 */
-	column_enable(PERF_HPP_DIFF__BASELINE);
-	column_enable(compute_2_hpp[compute]);
+	struct data__file *d;
+	int i;
+
+	data__for_each_file(i, d) {
+
+		/*
+		 * Baseline or compute realted columns:
+		 *
+		 *   PERF_HPP_DIFF__BASELINE
+		 *   PERF_HPP_DIFF__DELTA
+		 *   PERF_HPP_DIFF__RATIO
+		 *   PERF_HPP_DIFF__WEIGHTED_DIFF
+		 */
+		data__hpp_register(d, i ? compute_2_hpp[compute] :
+					  PERF_HPP_DIFF__BASELINE);
 
-	if (show_formula)
-		column_enable(PERF_HPP_DIFF__FORMULA);
+		/*
+		 * And the rest:
+		 *
+		 * PERF_HPP_DIFF__FORMULA
+		 * PERF_HPP_DIFF__PERIOD
+		 * PERF_HPP_DIFF__PERIOD_BASELINE
+		 */
+		if (show_formula && i)
+			data__hpp_register(d, PERF_HPP_DIFF__FORMULA);
 
-	if (show_period) {
-		column_enable(PERF_HPP_DIFF__PERIOD);
-		column_enable(PERF_HPP_DIFF__PERIOD_BASELINE);
+		if (show_period)
+			data__hpp_register(d, i ? PERF_HPP_DIFF__PERIOD :
+						  PERF_HPP_DIFF__PERIOD_BASELINE);
 	}
 }
 
-- 
cgit v0.10.2


From 22aeb7f597650284591ad0f61b069ded3ecf91db Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sat, 1 Dec 2012 22:00:00 +0100
Subject: perf diff: Change diff command to work over multiple data files

Adding diff command the flexibility to specify multiple data files on
input. If not input file is given the standard behaviour stands and diff
inspects 'perf.data' and 'perf.data.old' files.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-8j3xer54ltvs76t0fh01gcvu@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 7787ee2..cc7bf4f 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -44,6 +44,7 @@ struct data__file {
 	struct perf_session	*session;
 	const char		*file;
 	int			 idx;
+	struct hists		*hists;
 	struct diff_hpp_fmt	 fmt[PERF_HPP_DIFF__MAX_INDEX];
 };
 
@@ -56,6 +57,7 @@ static int data__files_cnt;
 	     i++, d = &data__files[i])
 
 #define data__for_each_file(i, d) data__for_each_file_start(i, d, 0)
+#define data__for_each_file_new(i, d) data__for_each_file_start(i, d, 1)
 
 static char diff__default_sort_order[] = "dso,symbol";
 static bool force;
@@ -525,23 +527,19 @@ static void hists__compute_resort(struct hists *hists)
 	}
 }
 
-static void hists__process(struct hists *base, struct hists *new)
+static void hists__process(struct hists *hists)
 {
-	hists__match(base, new);
-
 	if (show_baseline_only)
-		hists__baseline_only(base);
-	else
-		hists__link(base, new);
+		hists__baseline_only(hists);
 
 	if (sort_compute) {
-		hists__precompute(base);
-		hists__compute_resort(base);
+		hists__precompute(hists);
+		hists__compute_resort(hists);
 	} else {
-		hists__output_resort(base);
+		hists__output_resort(hists);
 	}
 
-	hists__fprintf(base, true, 0, 0, 0, stdout);
+	hists__fprintf(hists, true, 0, 0, 0, stdout);
 }
 
 static void data__fprintf(void)
@@ -561,27 +559,40 @@ static void data__fprintf(void)
 
 static void data_process(void)
 {
-	struct perf_evlist *evlist_old = data__files[0].session->evlist;
-	struct perf_evlist *evlist_new = data__files[1].session->evlist;
-	struct perf_evsel *evsel_old;
+	struct perf_evlist *evlist_base = data__files[0].session->evlist;
+	struct perf_evsel *evsel_base;
 	bool first = true;
 
-	list_for_each_entry(evsel_old, &evlist_old->entries, node) {
-		struct perf_evsel *evsel_new;
+	list_for_each_entry(evsel_base, &evlist_base->entries, node) {
+		struct data__file *d;
+		int i;
 
-		evsel_new = evsel_match(evsel_old, evlist_new);
-		if (!evsel_new)
-			continue;
+		data__for_each_file_new(i, d) {
+			struct perf_evlist *evlist = d->session->evlist;
+			struct perf_evsel *evsel;
+
+			evsel = evsel_match(evsel_base, evlist);
+			if (!evsel)
+				continue;
+
+			d->hists = &evsel->hists;
+
+			hists__match(&evsel_base->hists, &evsel->hists);
+
+			if (!show_baseline_only)
+				hists__link(&evsel_base->hists,
+					    &evsel->hists);
+		}
 
 		fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
-			perf_evsel__name(evsel_old));
+			perf_evsel__name(evsel_base));
 
 		first = false;
 
-		if (verbose)
+		if (verbose || data__files_cnt > 2)
 			data__fprintf();
 
-		hists__process(&evsel_old->hists, &evsel_new->hists);
+		hists__process(&evsel_base->hists);
 	}
 }
 
@@ -780,10 +791,29 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
 	};
 }
 
+static struct hist_entry *get_pair(struct hist_entry *he,
+				   struct diff_hpp_fmt *dfmt)
+{
+	void *ptr = dfmt - dfmt->idx;
+	struct data__file *d = container_of(ptr, struct data__file, fmt);
+
+	if (hist_entry__has_pairs(he)) {
+		struct hist_entry *pair;
+
+		list_for_each_entry(pair, &he->pairs.head, pairs.node)
+			if (pair->hists == d->hists)
+				return pair;
+	}
+
+	return NULL;
+}
+
 static void
-__hpp__entry_global(struct hist_entry *he, int idx, char *buf, size_t size)
+__hpp__entry_global(struct hist_entry *he, struct diff_hpp_fmt *dfmt,
+		    char *buf, size_t size)
 {
-	struct hist_entry *pair = hist_entry__next_pair(he);
+	struct hist_entry *pair = get_pair(he, dfmt);
+	int idx = dfmt->idx;
 
 	/* baseline is special */
 	if (idx == PERF_HPP_DIFF__BASELINE)
@@ -803,7 +833,7 @@ static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
 		container_of(_fmt, struct diff_hpp_fmt, fmt);
 	char buf[MAX_COL_WIDTH] = " ";
 
-	__hpp__entry_global(he, dfmt->idx, buf, MAX_COL_WIDTH);
+	__hpp__entry_global(he, dfmt, buf, MAX_COL_WIDTH);
 
 	if (symbol_conf.field_sep)
 		return scnprintf(hpp->buf, hpp->size, "%s", buf);
@@ -832,7 +862,7 @@ static int hpp__width(struct perf_hpp_fmt *fmt,
 	return dfmt->header_width;
 }
 
-static void init_header(struct diff_hpp_fmt *dfmt)
+static void init_header(struct data__file *d, struct diff_hpp_fmt *dfmt)
 {
 #define MAX_HEADER_NAME 100
 	char buf_indent[MAX_HEADER_NAME];
@@ -847,6 +877,9 @@ static void init_header(struct diff_hpp_fmt *dfmt)
 	/* Only our defined HPP fmts should appear here. */
 	BUG_ON(!header);
 
+	if (data__files_cnt > 2)
+		scnprintf(buf, MAX_HEADER_NAME, "%s/%d", header, d->idx);
+
 #define NAME (data__files_cnt > 2 ? buf : header)
 	dfmt->header_width = width;
 	width = (int) strlen(NAME);
@@ -876,7 +909,7 @@ static void data__hpp_register(struct data__file *d, int idx)
 	if (idx == PERF_HPP_DIFF__BASELINE)
 		fmt->color = hpp__color_baseline;
 
-	init_header(dfmt);
+	init_header(d, dfmt);
 	perf_hpp__column_register(fmt);
 }
 
@@ -921,18 +954,18 @@ static int data_init(int argc, const char **argv)
 		"perf.data.old",
 		"perf.data",
 	};
+	bool use_default = true;
 	int i;
 
 	data__files_cnt = 2;
 
 	if (argc) {
-		if (argc > 2)
-			usage_with_options(diff_usage, options);
-		if (argc == 2) {
-			defaults[0] = argv[0];
-			defaults[1] = argv[1];
-		} else
+		if (argc == 1)
 			defaults[1] = argv[0];
+		else {
+			data__files_cnt = argc;
+			use_default = false;
+		}
 	} else if (symbol_conf.default_guest_vmlinux_name ||
 		   symbol_conf.default_guest_kallsyms) {
 		defaults[0] = "perf.data.host";
@@ -944,7 +977,7 @@ static int data_init(int argc, const char **argv)
 		return -ENOMEM;
 
 	data__for_each_file(i, d) {
-		d->file = defaults[i];
+		d->file = use_default ? defaults[i] : argv[i];
 		d->idx  = i;
 	}
 
-- 
cgit v0.10.2


From 3a3beae81dae4960cac99fb6deeaca371f0790eb Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 24 Oct 2012 14:56:51 +0200
Subject: perf diff: Update perf diff documentation for multiple data
 comparison

Updating perf diff documentation to include multiple perf data files
comparison.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-tr6su3wfm20k2m5npjggyvtw@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 5b3123d..2d134f3 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -3,17 +3,17 @@ perf-diff(1)
 
 NAME
 ----
-perf-diff - Read two perf.data files and display the differential profile
+perf-diff - Read perf.data files and display the differential profile
 
 SYNOPSIS
 --------
 [verse]
-'perf diff' [oldfile] [newfile]
+'perf diff' [baseline file] [data file1] [[data file2] ... ]
 
 DESCRIPTION
 -----------
-This command displays the performance difference amongst two perf.data files
-captured via perf record.
+This command displays the performance difference amongst two or more perf.data
+files captured via perf record.
 
 If no parameters are passed it will assume perf.data.old and perf.data.
 
@@ -87,6 +87,59 @@ OPTIONS
 --formula::
         Show formula for given computation.
 
+COMPARISON
+----------
+The comparison is governed by the baseline file. The baseline perf.data
+file is iterated for samples. All other perf.data files specified on
+the command line are searched for the baseline sample pair. If the pair
+is found, specified computation is made and result is displayed.
+
+All samples from non-baseline perf.data files, that do not match any
+baseline entry, are displayed with empty space within baseline column
+and possible computation results (delta) in their related column.
+
+Example files samples:
+- file A with samples f1, f2, f3, f4,    f6
+- file B with samples     f2,     f4, f5
+- file C with samples f1, f2,         f5
+
+Example output:
+  x - computation takes place for pair
+  b - baseline sample percentage
+
+- perf diff A B C
+
+  baseline/A compute/B compute/C  samples
+  ---------------------------------------
+  b                    x          f1
+  b          x         x          f2
+  b                               f3
+  b          x                    f4
+  b                               f6
+             x         x          f5
+
+- perf diff B A C
+
+  baseline/B compute/A compute/C  samples
+  ---------------------------------------
+  b          x         x          f2
+  b          x                    f4
+  b                    x          f5
+             x         x          f1
+             x                    f3
+             x                    f6
+
+- perf diff C B A
+
+  baseline/C compute/B compute/A  samples
+  ---------------------------------------
+  b                    x          f1
+  b          x         x          f2
+  b          x                    f5
+                       x          f3
+             x         x          f4
+                       x          f6
+
 COMPARISON METHODS
 ------------------
 delta
@@ -96,7 +149,7 @@ If specified the 'Delta' column is displayed with value 'd' computed as:
   d = A->period_percent - B->period_percent
 
 with:
-  - A/B being matching hist entry from first/second file specified
+  - A/B being matching hist entry from data/baseline file specified
     (or perf.data/perf.data.old) respectively.
 
   - period_percent being the % of the hist entry period value within
@@ -109,24 +162,26 @@ If specified the 'Ratio' column is displayed with value 'r' computed as:
   r = A->period / B->period
 
 with:
-  - A/B being matching hist entry from first/second file specified
+  - A/B being matching hist entry from data/baseline file specified
     (or perf.data/perf.data.old) respectively.
 
   - period being the hist entry period value
 
-wdiff
-~~~~~
+wdiff:WEIGHT-B,WEIGHT-A
+~~~~~~~~~~~~~~~~~~~~~~~
 If specified the 'Weighted diff' column is displayed with value 'd' computed as:
 
    d = B->period * WEIGHT-A - A->period * WEIGHT-B
 
-  - A/B being matching hist entry from first/second file specified
+  - A/B being matching hist entry from data/baseline file specified
     (or perf.data/perf.data.old) respectively.
 
   - period being the hist entry period value
 
   - WEIGHT-A/WEIGHT-B being user suplied weights in the the '-c' option
     behind ':' separator like '-c wdiff:1,2'.
+    - WIEGHT-A being the weight of the data file
+    - WIEGHT-B being the weight of the baseline data file
 
 SEE ALSO
 --------
-- 
cgit v0.10.2


From ef358e6dcaba76d1c00dba5fc6cd4cde1d1a2f13 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sun, 21 Oct 2012 23:31:51 +0200
Subject: perf diff: Making compute functions static

All compute functions are now local to the diff command, making them
static.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-mpmm8l71mnlp7139voba3aak@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index cc7bf4f..f2fbf69 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -224,23 +224,23 @@ static int setup_compute(const struct option *opt, const char *str,
 	return -EINVAL;
 }
 
-double perf_diff__period_percent(struct hist_entry *he, u64 period)
+static double period_percent(struct hist_entry *he, u64 period)
 {
 	u64 total = he->hists->stats.total_period;
 	return (period * 100.0) / total;
 }
 
-double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair)
+static double compute_delta(struct hist_entry *he, struct hist_entry *pair)
 {
-	double old_percent = perf_diff__period_percent(he, he->stat.period);
-	double new_percent = perf_diff__period_percent(pair, pair->stat.period);
+	double old_percent = period_percent(he, he->stat.period);
+	double new_percent = period_percent(pair, pair->stat.period);
 
 	pair->diff.period_ratio_delta = new_percent - old_percent;
 	pair->diff.computed = true;
 	return pair->diff.period_ratio_delta;
 }
 
-double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair)
+static double compute_ratio(struct hist_entry *he, struct hist_entry *pair)
 {
 	double old_period = he->stat.period ?: 1;
 	double new_period = pair->stat.period;
@@ -250,7 +250,7 @@ double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair)
 	return pair->diff.period_ratio;
 }
 
-s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
+static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
 {
 	u64 old_period = he->stat.period;
 	u64 new_period = pair->stat.period;
@@ -292,8 +292,8 @@ static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
 		  new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
 }
 
-int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
-		       char *buf, size_t size)
+static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
+			   char *buf, size_t size)
 {
 	switch (compute) {
 	case COMPUTE_DELTA:
@@ -421,13 +421,13 @@ static void hists__precompute(struct hists *hists)
 
 		switch (compute) {
 		case COMPUTE_DELTA:
-			perf_diff__compute_delta(he, pair);
+			compute_delta(he, pair);
 			break;
 		case COMPUTE_RATIO:
-			perf_diff__compute_ratio(he, pair);
+			compute_ratio(he, pair);
 			break;
 		case COMPUTE_WEIGHTED_DIFF:
-			perf_diff__compute_wdiff(he, pair);
+			compute_wdiff(he, pair);
 			break;
 		default:
 			BUG_ON(1);
@@ -744,7 +744,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
 		if (pair->diff.computed)
 			diff = pair->diff.period_ratio_delta;
 		else
-			diff = perf_diff__compute_delta(he, pair);
+			diff = compute_delta(he, pair);
 
 		if (fabs(diff) >= 0.01)
 			scnprintf(buf, size, "%+4.2F%%", diff);
@@ -758,7 +758,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
 		if (pair->diff.computed)
 			ratio = pair->diff.period_ratio;
 		else
-			ratio = perf_diff__compute_ratio(he, pair);
+			ratio = compute_ratio(he, pair);
 
 		if (ratio > 0.0)
 			scnprintf(buf, size, "%14.6F", ratio);
@@ -772,14 +772,14 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
 		if (pair->diff.computed)
 			wdiff = pair->diff.wdiff;
 		else
-			wdiff = perf_diff__compute_wdiff(he, pair);
+			wdiff = compute_wdiff(he, pair);
 
 		if (wdiff != 0)
 			scnprintf(buf, size, "%14ld", wdiff);
 		break;
 
 	case PERF_HPP_DIFF__FORMULA:
-		perf_diff__formula(he, pair, buf, size);
+		formula_fprintf(he, pair, buf, size);
 		break;
 
 	case PERF_HPP_DIFF__PERIOD:
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 79681f6..bfcbb11 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -242,11 +242,4 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused,
 #endif
 
 unsigned int hists__sort_list_width(struct hists *self);
-
-double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair);
-double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair);
-s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair);
-int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair,
-		       char *buf, size_t size);
-double perf_diff__period_percent(struct hist_entry *he, u64 period);
 #endif	/* __PERF_HIST_H */
-- 
cgit v0.10.2


From 5f3f8d3b1207cba3664d57a33de43f5ee11c8a06 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sun, 25 Nov 2012 23:10:20 +0100
Subject: perf diff: Add generic order option for compute sorting

Adding option 'o' to allow sorting based on the input file number. By
default (without -o option) the output is sorted on baseline.

Also removing '+' sorting support from -c option, because it's not
needed anymore.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-l7dvhgt0azm7yiqg3fbn4dxw@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 2d134f3..fdfceee 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -75,8 +75,6 @@ OPTIONS
 -c::
 --compute::
         Differential computation selection - delta,ratio,wdiff (default is delta).
-        If '+' is specified as a first character, the output is sorted based
-        on the computation results.
         See COMPARISON METHODS section for more info.
 
 -p::
@@ -87,6 +85,10 @@ OPTIONS
 --formula::
         Show formula for given computation.
 
+-o::
+--order::
+       Specify compute sorting column number.
+
 COMPARISON
 ----------
 The comparison is governed by the baseline file. The baseline perf.data
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index f2fbf69..93de3ac 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -64,7 +64,7 @@ static bool force;
 static bool show_period;
 static bool show_formula;
 static bool show_baseline_only;
-static bool sort_compute;
+static unsigned int sort_compute;
 
 static s64 compute_wdiff_w1;
 static s64 compute_wdiff_w2;
@@ -188,13 +188,6 @@ static int setup_compute(const struct option *opt, const char *str,
 		return 0;
 	}
 
-	if (*str == '+') {
-		sort_compute = true;
-		cstr = (char *) ++str;
-		if (!*str)
-			return 0;
-	}
-
 	option = strchr(str, ':');
 	if (option) {
 		unsigned len = option++ - str;
@@ -378,6 +371,29 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
 	}
 }
 
+static struct hist_entry*
+get_pair_data(struct hist_entry *he, struct data__file *d)
+{
+	if (hist_entry__has_pairs(he)) {
+		struct hist_entry *pair;
+
+		list_for_each_entry(pair, &he->pairs.head, pairs.node)
+			if (pair->hists == d->hists)
+				return pair;
+	}
+
+	return NULL;
+}
+
+static struct hist_entry*
+get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt)
+{
+	void *ptr = dfmt - dfmt->idx;
+	struct data__file *d = container_of(ptr, struct data__file, fmt);
+
+	return get_pair_data(he, d);
+}
+
 static void hists__baseline_only(struct hists *hists)
 {
 	struct rb_root *root;
@@ -412,10 +428,12 @@ static void hists__precompute(struct hists *hists)
 
 	next = rb_first(root);
 	while (next != NULL) {
-		struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
-		struct hist_entry *pair = hist_entry__next_pair(he);
+		struct hist_entry *he, *pair;
 
+		he   = rb_entry(next, struct hist_entry, rb_node_in);
 		next = rb_next(&he->rb_node_in);
+
+		pair = get_pair_data(he, &data__files[sort_compute]);
 		if (!pair)
 			continue;
 
@@ -446,7 +464,7 @@ static int64_t cmp_doubles(double l, double r)
 }
 
 static int64_t
-hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+__hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
 			int c)
 {
 	switch (c) {
@@ -478,6 +496,36 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
 	return 0;
 }
 
+static int64_t
+hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+			int c)
+{
+	bool pairs_left  = hist_entry__has_pairs(left);
+	bool pairs_right = hist_entry__has_pairs(right);
+	struct hist_entry *p_right, *p_left;
+
+	if (!pairs_left && !pairs_right)
+		return 0;
+
+	if (!pairs_left || !pairs_right)
+		return pairs_left ? -1 : 1;
+
+	p_left  = get_pair_data(left,  &data__files[sort_compute]);
+	p_right = get_pair_data(right, &data__files[sort_compute]);
+
+	if (!p_left && !p_right)
+		return 0;
+
+	if (!p_left || !p_right)
+		return p_left ? -1 : 1;
+
+	/*
+	 * We have 2 entries of same kind, let's
+	 * make the data comparison.
+	 */
+	return __hist_entry__cmp_compute(p_left, p_right, c);
+}
+
 static void insert_hist_entry_by_compute(struct rb_root *root,
 					 struct hist_entry *he,
 					 int c)
@@ -680,6 +728,7 @@ static const struct option options[] = {
 		   "columns '.' is reserved."),
 	OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
 		    "Look for files with symbols relative to this directory"),
+	OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."),
 	OPT_END()
 };
 
@@ -791,28 +840,11 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
 	};
 }
 
-static struct hist_entry *get_pair(struct hist_entry *he,
-				   struct diff_hpp_fmt *dfmt)
-{
-	void *ptr = dfmt - dfmt->idx;
-	struct data__file *d = container_of(ptr, struct data__file, fmt);
-
-	if (hist_entry__has_pairs(he)) {
-		struct hist_entry *pair;
-
-		list_for_each_entry(pair, &he->pairs.head, pairs.node)
-			if (pair->hists == d->hists)
-				return pair;
-	}
-
-	return NULL;
-}
-
 static void
 __hpp__entry_global(struct hist_entry *he, struct diff_hpp_fmt *dfmt,
 		    char *buf, size_t size)
 {
-	struct hist_entry *pair = get_pair(he, dfmt);
+	struct hist_entry *pair = get_pair_fmt(he, dfmt);
 	int idx = dfmt->idx;
 
 	/* baseline is special */
@@ -972,6 +1004,11 @@ static int data_init(int argc, const char **argv)
 		defaults[1] = "perf.data.guest";
 	}
 
+	if (sort_compute >= (unsigned int) data__files_cnt) {
+		pr_err("Order option out of limit.\n");
+		return -EINVAL;
+	}
+
 	data__files = zalloc(sizeof(*data__files) * data__files_cnt);
 	if (!data__files)
 		return -ENOMEM;
-- 
cgit v0.10.2


From be0e6d105d31a5818608ae243411aef5c976147a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 4 Feb 2013 16:33:19 +0100
Subject: perf tools: Move hist_entry__period_snprintf into stdio code

Moving hist_entry__period_snprintf function into stdio code and making
it static, as it's no longer used anywhere else.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-ah8ms343h8xygt20iqz91kz4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index dc900d7..0a19328 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -236,46 +236,6 @@ void perf_hpp__column_enable(unsigned col)
 	perf_hpp__column_register(&perf_hpp__format[col]);
 }
 
-static inline void advance_hpp(struct perf_hpp *hpp, int inc)
-{
-	hpp->buf  += inc;
-	hpp->size -= inc;
-}
-
-int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
-				bool color)
-{
-	const char *sep = symbol_conf.field_sep;
-	struct perf_hpp_fmt *fmt;
-	char *start = hpp->buf;
-	int ret;
-	bool first = true;
-
-	if (symbol_conf.exclude_other && !he->parent)
-		return 0;
-
-	perf_hpp__for_each_format(fmt) {
-		/*
-		 * If there's no field_sep, we still need
-		 * to display initial '  '.
-		 */
-		if (!sep || !first) {
-			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
-			advance_hpp(hpp, ret);
-		} else
-			first = false;
-
-		if (color && fmt->color)
-			ret = fmt->color(fmt, hpp, he);
-		else
-			ret = fmt->entry(fmt, hpp, he);
-
-		advance_hpp(hpp, ret);
-	}
-
-	return hpp->buf - start;
-}
-
 int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size,
 			      struct hists *hists)
 {
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index ee70372..5b4fb33 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -308,6 +308,47 @@ static size_t hist_entry__callchain_fprintf(struct hist_entry *he,
 	return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
 }
 
+static inline void advance_hpp(struct perf_hpp *hpp, int inc)
+{
+	hpp->buf  += inc;
+	hpp->size -= inc;
+}
+
+static int hist_entry__period_snprintf(struct perf_hpp *hpp,
+				       struct hist_entry *he,
+				       bool color)
+{
+	const char *sep = symbol_conf.field_sep;
+	struct perf_hpp_fmt *fmt;
+	char *start = hpp->buf;
+	int ret;
+	bool first = true;
+
+	if (symbol_conf.exclude_other && !he->parent)
+		return 0;
+
+	perf_hpp__for_each_format(fmt) {
+		/*
+		 * If there's no field_sep, we still need
+		 * to display initial '  '.
+		 */
+		if (!sep || !first) {
+			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+			advance_hpp(hpp, ret);
+		} else
+			first = false;
+
+		if (color && fmt->color)
+			ret = fmt->color(fmt, hpp, he);
+		else
+			ret = fmt->entry(fmt, hpp, he);
+
+		advance_hpp(hpp, ret);
+	}
+
+	return hpp->buf - start;
+}
+
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 			       struct hists *hists, FILE *fp)
 {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index bfcbb11..1329b6b 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -174,8 +174,6 @@ enum {
 void perf_hpp__init(void);
 void perf_hpp__column_register(struct perf_hpp_fmt *format);
 void perf_hpp__column_enable(unsigned col);
-int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
-				bool color);
 
 struct perf_evlist;
 
-- 
cgit v0.10.2


From c85cffa5894fad8ad7c8051ccf7dd73a3a3f92b6 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 11 Jul 2013 17:28:29 +0200
Subject: perf timechart: Use traceevent lib event-parse.h include

Adding traceevent lib event-parse.h include to timechart command and
removing duplicated local 'enum trace_flag_type' definition.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Renninger <trenn@suse.de>
Link: http://lkml.kernel.org/r/1373556513-3000-2-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 4536a92..a512009 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -12,6 +12,8 @@
  * of the License.
  */
 
+#include <traceevent/event-parse.h>
+
 #include "builtin.h"
 
 #include "util/util.h"
@@ -328,25 +330,6 @@ struct wakeup_entry {
 	int   success;
 };
 
-/*
- * trace_flag_type is an enumeration that holds different
- * states when a trace occurs. These are:
- *  IRQS_OFF            - interrupts were disabled
- *  IRQS_NOSUPPORT      - arch does not support irqs_disabled_flags
- *  NEED_RESCED         - reschedule is requested
- *  HARDIRQ             - inside an interrupt handler
- *  SOFTIRQ             - inside a softirq handler
- */
-enum trace_flag_type {
-	TRACE_FLAG_IRQS_OFF		= 0x01,
-	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
-	TRACE_FLAG_NEED_RESCHED		= 0x04,
-	TRACE_FLAG_HARDIRQ		= 0x08,
-	TRACE_FLAG_SOFTIRQ		= 0x10,
-};
-
-
-
 struct sched_switch {
 	struct trace_entry te;
 	char prev_comm[TASK_COMM_LEN];
-- 
cgit v0.10.2


From 5936678e7da5f8d2944a2ad45d66c88b4a7ccb67 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 11 Jul 2013 17:28:30 +0200
Subject: perf timechart: Remove event types framework only user

The only user of the event types data is 'perf timechart' command and
uses this info to identify proper tracepoints based on its name.

Switching this code to use tracepoint callbacks handlers same as another
commands like builtin-{kmem,lock,sched}.c using the
perf_session__set_tracepoints_handlers function.

This way we get rid of the only event types user and can remove them
completely in next patches.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Renninger <trenn@suse.de>
Link: http://lkml.kernel.org/r/1373556513-3000-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index a512009..c2e0231 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -21,6 +21,7 @@
 #include "util/color.h"
 #include <linux/list.h>
 #include "util/cache.h"
+#include "util/evlist.h"
 #include "util/evsel.h"
 #include <linux/rbtree.h>
 #include "util/symbol.h"
@@ -462,6 +463,8 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
 	}
 }
 
+typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
+				  struct perf_sample *sample);
 
 static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				union perf_event *event __maybe_unused,
@@ -469,8 +472,6 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				struct perf_evsel *evsel,
 				struct machine *machine __maybe_unused)
 {
-	struct trace_entry *te;
-
 	if (evsel->attr.sample_type & PERF_SAMPLE_TIME) {
 		if (!first_time || first_time > sample->time)
 			first_time = sample->time;
@@ -478,69 +479,90 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 			last_time = sample->time;
 	}
 
-	te = (void *)sample->raw_data;
-	if ((evsel->attr.sample_type & PERF_SAMPLE_RAW) && sample->raw_size > 0) {
-		char *event_str;
-#ifdef SUPPORT_OLD_POWER_EVENTS
-		struct power_entry_old *peo;
-		peo = (void *)te;
-#endif
-		/*
-		 * FIXME: use evsel, its already mapped from id to perf_evsel,
-		 * remove perf_header__find_event infrastructure bits.
-		 * Mapping all these "power:cpu_idle" strings to the tracepoint
-		 * ID and then just comparing against evsel->attr.config.
-		 *
-		 * e.g.:
-		 *
-		 * if (evsel->attr.config == power_cpu_idle_id)
-		 */
-		event_str = perf_header__find_event(te->type);
-
-		if (!event_str)
-			return 0;
-
-		if (sample->cpu > numcpus)
-			numcpus = sample->cpu;
-
-		if (strcmp(event_str, "power:cpu_idle") == 0) {
-			struct power_processor_entry *ppe = (void *)te;
-			if (ppe->state == (u32)PWR_EVENT_EXIT)
-				c_state_end(ppe->cpu_id, sample->time);
-			else
-				c_state_start(ppe->cpu_id, sample->time,
-					      ppe->state);
-		}
-		else if (strcmp(event_str, "power:cpu_frequency") == 0) {
-			struct power_processor_entry *ppe = (void *)te;
-			p_state_change(ppe->cpu_id, sample->time, ppe->state);
-		}
+	if (sample->cpu > numcpus)
+		numcpus = sample->cpu;
+
+	if (evsel->handler.func != NULL) {
+		tracepoint_handler f = evsel->handler.func;
+		return f(evsel, sample);
+	}
+
+	return 0;
+}
+
+static int
+process_sample_cpu_idle(struct perf_evsel *evsel __maybe_unused,
+			struct perf_sample *sample)
+{
+	struct power_processor_entry *ppe = sample->raw_data;
+
+	if (ppe->state == (u32) PWR_EVENT_EXIT)
+		c_state_end(ppe->cpu_id, sample->time);
+	else
+		c_state_start(ppe->cpu_id, sample->time, ppe->state);
+	return 0;
+}
+
+static int
+process_sample_cpu_frequency(struct perf_evsel *evsel __maybe_unused,
+			     struct perf_sample *sample)
+{
+	struct power_processor_entry *ppe = sample->raw_data;
+
+	p_state_change(ppe->cpu_id, sample->time, ppe->state);
+	return 0;
+}
 
-		else if (strcmp(event_str, "sched:sched_wakeup") == 0)
-			sched_wakeup(sample->cpu, sample->time, sample->pid, te);
+static int
+process_sample_sched_wakeup(struct perf_evsel *evsel __maybe_unused,
+			    struct perf_sample *sample)
+{
+	struct trace_entry *te = sample->raw_data;
 
-		else if (strcmp(event_str, "sched:sched_switch") == 0)
-			sched_switch(sample->cpu, sample->time, te);
+	sched_wakeup(sample->cpu, sample->time, sample->pid, te);
+	return 0;
+}
+
+static int
+process_sample_sched_switch(struct perf_evsel *evsel __maybe_unused,
+			    struct perf_sample *sample)
+{
+	struct trace_entry *te = sample->raw_data;
+
+	sched_switch(sample->cpu, sample->time, te);
+	return 0;
+}
 
 #ifdef SUPPORT_OLD_POWER_EVENTS
-		if (use_old_power_events) {
-			if (strcmp(event_str, "power:power_start") == 0)
-				c_state_start(peo->cpu_id, sample->time,
-					      peo->value);
-
-			else if (strcmp(event_str, "power:power_end") == 0)
-				c_state_end(sample->cpu, sample->time);
-
-			else if (strcmp(event_str,
-					"power:power_frequency") == 0)
-				p_state_change(peo->cpu_id, sample->time,
-					       peo->value);
-		}
-#endif
-	}
+static int
+process_sample_power_start(struct perf_evsel *evsel __maybe_unused,
+			   struct perf_sample *sample)
+{
+	struct power_entry_old *peo = sample->raw_data;
+
+	c_state_start(peo->cpu_id, sample->time, peo->value);
 	return 0;
 }
 
+static int
+process_sample_power_end(struct perf_evsel *evsel __maybe_unused,
+			 struct perf_sample *sample)
+{
+	c_state_end(sample->cpu, sample->time);
+	return 0;
+}
+
+static int
+process_sample_power_frequency(struct perf_evsel *evsel __maybe_unused,
+			       struct perf_sample *sample)
+{
+	struct power_entry_old *peo = sample->raw_data;
+
+	p_state_change(peo->cpu_id, sample->time, peo->value);
+	return 0;
+}
+#endif /* SUPPORT_OLD_POWER_EVENTS */
+
 /*
  * After the last sample we need to wrap up the current C/P state
  * and close out each CPU for these.
@@ -957,6 +979,17 @@ static int __cmd_timechart(const char *output_name)
 		.sample		 = process_sample_event,
 		.ordered_samples = true,
 	};
+	const struct perf_evsel_str_handler power_tracepoints[] = {
+		{ "power:cpu_idle",		process_sample_cpu_idle },
+		{ "power:cpu_frequency",	process_sample_cpu_frequency },
+		{ "sched:sched_wakeup",		process_sample_sched_wakeup },
+		{ "sched:sched_switch",		process_sample_sched_switch },
+#ifdef SUPPORT_OLD_POWER_EVENTS
+		{ "power:power_start",		process_sample_power_start },
+		{ "power:power_end",		process_sample_power_end },
+		{ "power:power_frequency",	process_sample_power_frequency },
+#endif
+	};
 	struct perf_session *session = perf_session__new(input_name, O_RDONLY,
 							 0, false, &perf_timechart);
 	int ret = -EINVAL;
@@ -967,6 +1000,12 @@ static int __cmd_timechart(const char *output_name)
 	if (!perf_session__has_traces(session, "timechart record"))
 		goto out_delete;
 
+	if (perf_session__set_tracepoints_handlers(session,
+						   power_tracepoints)) {
+		pr_err("Initializing session tracepoint handlers failed\n");
+		goto out_delete;
+	}
+
 	ret = perf_session__process_events(session, &perf_timechart);
 	if (ret)
 		goto out_delete;
-- 
cgit v0.10.2


From 44b3c57802b7557d0e2d8c3bd39bce942e9324cc Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 11 Jul 2013 17:28:31 +0200
Subject: perf tools: Remove event types from perf data file

Removing event types data storing/reading to/from perf data file as it's
no longer needed. The only user of this data 'perf timechart' was
switched to use tracepoints handler callbacks.

The event_types offset and size stay in the perf data file header but
are ignored from now on.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1373556513-3000-4-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d12d79c..8862667 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2334,16 +2334,6 @@ int perf_session__write_header(struct perf_session *session,
 		}
 	}
 
-	header->event_offset = lseek(fd, 0, SEEK_CUR);
-	header->event_size = trace_event_count * sizeof(struct perf_trace_event_type);
-	if (trace_events) {
-		err = do_write(fd, trace_events, header->event_size);
-		if (err < 0) {
-			pr_debug("failed to write perf header events\n");
-			return err;
-		}
-	}
-
 	header->data_offset = lseek(fd, 0, SEEK_CUR);
 
 	if (at_exit) {
@@ -2364,10 +2354,7 @@ int perf_session__write_header(struct perf_session *session,
 			.offset = header->data_offset,
 			.size	= header->data_size,
 		},
-		.event_types = {
-			.offset = header->event_offset,
-			.size	= header->event_size,
-		},
+		/* event_types is ignored, store zeros */
 	};
 
 	memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
@@ -2614,8 +2601,6 @@ int perf_file_header__read(struct perf_file_header *header,
 	memcpy(&ph->adds_features, &header->adds_features,
 	       sizeof(ph->adds_features));
 
-	ph->event_offset = header->event_types.offset;
-	ph->event_size   = header->event_types.size;
 	ph->data_offset  = header->data.offset;
 	ph->data_size	 = header->data.size;
 	return 0;
@@ -2839,17 +2824,6 @@ int perf_session__read_header(struct perf_session *session, int fd)
 
 	symbol_conf.nr_events = nr_attrs;
 
-	if (f_header.event_types.size) {
-		lseek(fd, f_header.event_types.offset, SEEK_SET);
-		trace_events = malloc(f_header.event_types.size);
-		if (trace_events == NULL)
-			return -ENOMEM;
-		if (perf_header__getbuffer64(header, fd, trace_events,
-					     f_header.event_types.size))
-			goto out_errno;
-		trace_event_count =  f_header.event_types.size / sizeof(struct perf_trace_event_type);
-	}
-
 	perf_header__process_sections(header, fd, &session->pevent,
 				      perf_file_section__process);
 
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 2d1ca7d..298982f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -45,6 +45,7 @@ struct perf_file_header {
 	u64				attr_size;
 	struct perf_file_section	attrs;
 	struct perf_file_section	data;
+	/* event_types is ignored */
 	struct perf_file_section	event_types;
 	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
 };
@@ -88,8 +89,6 @@ struct perf_header {
 	s64			attr_offset;
 	u64			data_offset;
 	u64			data_size;
-	u64			event_offset;
-	u64			event_size;
 	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
 	struct perf_session_env env;
 };
-- 
cgit v0.10.2


From 30d350795e8e57c17eabaab454932889f1ae8159 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 11 Jul 2013 17:28:32 +0200
Subject: perf record: Remove event types pushing

Removing event types data pushing from record command. It's no longer
needed, because this data is ignored.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1373556513-3000-5-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ecca62e..1f5243c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -904,7 +904,6 @@ const struct option record_options[] = {
 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int err = -ENOMEM;
-	struct perf_evsel *pos;
 	struct perf_evlist *evsel_list;
 	struct perf_record *rec = &record;
 	char errbuf[BUFSIZ];
@@ -968,11 +967,6 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
 		usage_with_options(record_usage, record_options);
 
-	list_for_each_entry(pos, &evsel_list->entries, node) {
-		if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
-			goto out_free_fd;
-	}
-
 	if (rec->opts.user_interval != ULLONG_MAX)
 		rec->opts.default_interval = rec->opts.user_interval;
 	if (rec->opts.user_freq != UINT_MAX)
-- 
cgit v0.10.2


From 6065210db932fd183cbc8bc77558fee275360e71 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 11 Jul 2013 17:28:33 +0200
Subject: perf tools: Remove event types framework completely

Removing event types framework completely. The only remainder (apart
from few comments) is following enum:

  enum perf_user_event_type {
    ...
    PERF_RECORD_HEADER_EVENT_TYPE           = 65, /* deprecated */
    ...
  }

It's kept as deprecated, resulting in error when processed in
perf_session__process_user_event function.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1373556513-3000-6-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index ad1296c..1d8de2e 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -67,12 +67,6 @@ static int perf_event__repipe_op2_synth(struct perf_tool *tool,
 	return perf_event__repipe_synth(tool, event);
 }
 
-static int perf_event__repipe_event_type_synth(struct perf_tool *tool,
-					       union perf_event *event)
-{
-	return perf_event__repipe_synth(tool, event);
-}
-
 static int perf_event__repipe_attr(struct perf_tool *tool,
 				   union perf_event *event,
 				   struct perf_evlist **pevlist)
@@ -402,7 +396,6 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 			.throttle	= perf_event__repipe,
 			.unthrottle	= perf_event__repipe,
 			.attr		= perf_event__repipe_attr,
-			.event_type	= perf_event__repipe_event_type_synth,
 			.tracing_data	= perf_event__repipe_op2_synth,
 			.finished_round	= perf_event__repipe_op2_synth,
 			.build_id	= perf_event__repipe_op2_synth,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1f5243c..a41ac415 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -474,13 +474,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 			goto out_delete_session;
 		}
 
-		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
-							 machine);
-		if (err < 0) {
-			pr_err("Couldn't synthesize event_types.\n");
-			goto out_delete_session;
-		}
-
 		if (have_tracepoints(&evsel_list->entries)) {
 			/*
 			 * FIXME err <= 0 here actually means that
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 188c265..a34c587 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -741,7 +741,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 			.lost		 = perf_event__process_lost,
 			.read		 = process_read_event,
 			.attr		 = perf_event__process_attr,
-			.event_type	 = perf_event__process_event_type,
 			.tracing_data	 = perf_event__process_tracing_data,
 			.build_id	 = perf_event__process_build_id,
 			.ordered_samples = true,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3de8979..ecb6979 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -524,7 +524,6 @@ static struct perf_tool perf_script = {
 	.exit		 = perf_event__process_exit,
 	.fork		 = perf_event__process_fork,
 	.attr		 = perf_event__process_attr,
-	.event_type	 = perf_event__process_event_type,
 	.tracing_data	 = perf_event__process_tracing_data,
 	.build_id	 = perf_event__process_build_id,
 	.ordered_samples = true,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1813895..1ebb8fb 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -116,7 +116,7 @@ struct build_id_event {
 enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_USER_TYPE_START		= 64,
 	PERF_RECORD_HEADER_ATTR			= 64,
-	PERF_RECORD_HEADER_EVENT_TYPE		= 65,
+	PERF_RECORD_HEADER_EVENT_TYPE		= 65, /* depreceated */
 	PERF_RECORD_HEADER_TRACING_DATA		= 66,
 	PERF_RECORD_HEADER_BUILD_ID		= 67,
 	PERF_RECORD_FINISHED_ROUND		= 68,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 8862667..b28a65e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -25,41 +25,9 @@
 
 static bool no_buildid_cache = false;
 
-static int trace_event_count;
-static struct perf_trace_event_type *trace_events;
-
 static u32 header_argc;
 static const char **header_argv;
 
-int perf_header__push_event(u64 id, const char *name)
-{
-	struct perf_trace_event_type *nevents;
-
-	if (strlen(name) > MAX_EVENT_NAME)
-		pr_warning("Event %s will be truncated\n", name);
-
-	nevents = realloc(trace_events, (trace_event_count + 1) * sizeof(*trace_events));
-	if (nevents == NULL)
-		return -ENOMEM;
-	trace_events = nevents;
-
-	memset(&trace_events[trace_event_count], 0, sizeof(struct perf_trace_event_type));
-	trace_events[trace_event_count].event_id = id;
-	strncpy(trace_events[trace_event_count].name, name, MAX_EVENT_NAME - 1);
-	trace_event_count++;
-	return 0;
-}
-
-char *perf_header__find_event(u64 id)
-{
-	int i;
-	for (i = 0 ; i < trace_event_count; i++) {
-		if (trace_events[i].event_id == id)
-			return trace_events[i].name;
-	}
-	return NULL;
-}
-
 /*
  * magic2 = "PERFILE2"
  * must be a numerical value to let the endianness
@@ -2936,64 +2904,6 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-int perf_event__synthesize_event_type(struct perf_tool *tool,
-				      u64 event_id, char *name,
-				      perf_event__handler_t process,
-				      struct machine *machine)
-{
-	union perf_event ev;
-	size_t size = 0;
-	int err = 0;
-
-	memset(&ev, 0, sizeof(ev));
-
-	ev.event_type.event_type.event_id = event_id;
-	memset(ev.event_type.event_type.name, 0, MAX_EVENT_NAME);
-	strncpy(ev.event_type.event_type.name, name, MAX_EVENT_NAME - 1);
-
-	ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE;
-	size = strlen(ev.event_type.event_type.name);
-	size = PERF_ALIGN(size, sizeof(u64));
-	ev.event_type.header.size = sizeof(ev.event_type) -
-		(sizeof(ev.event_type.event_type.name) - size);
-
-	err = process(tool, &ev, NULL, machine);
-
-	return err;
-}
-
-int perf_event__synthesize_event_types(struct perf_tool *tool,
-				       perf_event__handler_t process,
-				       struct machine *machine)
-{
-	struct perf_trace_event_type *type;
-	int i, err = 0;
-
-	for (i = 0; i < trace_event_count; i++) {
-		type = &trace_events[i];
-
-		err = perf_event__synthesize_event_type(tool, type->event_id,
-							type->name, process,
-							machine);
-		if (err) {
-			pr_debug("failed to create perf header event type\n");
-			return err;
-		}
-	}
-
-	return err;
-}
-
-int perf_event__process_event_type(struct perf_tool *tool __maybe_unused,
-				   union perf_event *event)
-{
-	if (perf_header__push_event(event->event_type.event_type.event_id,
-				    event->event_type.event_type.name) < 0)
-		return -ENOMEM;
-
-	return 0;
-}
-
 int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
 					struct perf_evlist *evlist,
 					perf_event__handler_t process)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 298982f..669fda5 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -102,9 +102,6 @@ int perf_session__write_header(struct perf_session *session,
 			       int fd, bool at_exit);
 int perf_header__write_pipe(int fd);
 
-int perf_header__push_event(u64 id, const char *name);
-char *perf_header__find_event(u64 id);
-
 void perf_header__set_feat(struct perf_header *header, int feat);
 void perf_header__clear_feat(struct perf_header *header, int feat);
 bool perf_header__has_feat(const struct perf_header *header, int feat);
@@ -132,16 +129,6 @@ int perf_event__synthesize_attrs(struct perf_tool *tool,
 int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
 			     struct perf_evlist **pevlist);
 
-int perf_event__synthesize_event_type(struct perf_tool *tool,
-				      u64 event_id, char *name,
-				      perf_event__handler_t process,
-				      struct machine *machine);
-int perf_event__synthesize_event_types(struct perf_tool *tool,
-				       perf_event__handler_t process,
-				       struct machine *machine);
-int perf_event__process_event_type(struct perf_tool *tool,
-				   union perf_event *event);
-
 int perf_event__synthesize_tracing_data(struct perf_tool *tool,
 					int fd, struct perf_evlist *evlist,
 					perf_event__handler_t process);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1eb58ee..d0d9f94 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -241,13 +241,6 @@ static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-static int process_event_type_stub(struct perf_tool *tool __maybe_unused,
-				   union perf_event *event __maybe_unused)
-{
-	dump_printf(": unhandled!\n");
-	return 0;
-}
-
 static int process_finished_round(struct perf_tool *tool,
 				  union perf_event *event,
 				  struct perf_session *session);
@@ -274,8 +267,6 @@ static void perf_tool__fill_defaults(struct perf_tool *tool)
 		tool->unthrottle = process_event_stub;
 	if (tool->attr == NULL)
 		tool->attr = process_event_synth_attr_stub;
-	if (tool->event_type == NULL)
-		tool->event_type = process_event_type_stub;
 	if (tool->tracing_data == NULL)
 		tool->tracing_data = process_event_synth_tracing_data_stub;
 	if (tool->build_id == NULL)
@@ -928,8 +919,6 @@ static int perf_session__process_user_event(struct perf_session *session, union
 		if (err == 0)
 			perf_session__set_id_hdr_size(session);
 		return err;
-	case PERF_RECORD_HEADER_EVENT_TYPE:
-		return tool->event_type(tool, event);
 	case PERF_RECORD_HEADER_TRACING_DATA:
 		/* setup for reading amidst mmap */
 		lseek(session->fd, file_offset, SEEK_SET);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 88f8cbd..62b16b6 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -22,8 +22,6 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_evlist **pevlist);
 
-typedef int (*event_simple_op)(struct perf_tool *tool, union perf_event *event);
-
 typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
 			 struct perf_session *session);
 
@@ -39,7 +37,6 @@ struct perf_tool {
 			unthrottle;
 	event_attr_op	attr;
 	event_op2	tracing_data;
-	event_simple_op	event_type;
 	event_op2	finished_round,
 			build_id;
 	bool		ordered_samples;
-- 
cgit v0.10.2


From fd4363fff3d96795d3feb1b3fb48ce590f186bdd Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 12 Jul 2013 11:21:48 +0200
Subject: x86: Introduce int3 (breakpoint)-based instruction patching

Introduce a method for run-time instruction patching on a live SMP kernel
based on int3 breakpoint, completely avoiding the need for stop_machine().

The way this is achieved:

	- add a int3 trap to the address that will be patched
	- sync cores
	- update all but the first byte of the patched range
	- sync cores
	- replace the first byte (int3) by the first byte of
	  replacing opcode
	- sync cores

According to

	http://lkml.indiana.edu/hypermail/linux/kernel/1001.1/01530.html

synchronization after replacing "all but first" instructions should not
be necessary (on Intel hardware), as the syncing after the subsequent
patching of the first byte provides enough safety.
But there's not only Intel HW out there, and we'd rather be on a safe
side.

If any CPU instruction execution would collide with the patching,
it'd be trapped by the int3 breakpoint and redirected to the provided
"handler" (which would typically mean just skipping over the patched
region, acting as "nop" has been there, in case we are doing nop -> jump
and jump -> nop transitions).

Ftrace has been using this very technique since 08d636b ("ftrace/x86:
Have arch x86_64 use breakpoints instead of stop machine") for ages
already, and jump labels are another obvious potential user of this.

Based on activities of Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
a few years ago.

Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1307121102440.29788@pobox.suse.cz
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 58ed6d9..3abf8dd 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -233,6 +233,7 @@ struct text_poke_param {
 };
 
 extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c15cf9a..0ab4936 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
 #include <linux/memory.h>
 #include <linux/stop_machine.h>
 #include <linux/slab.h>
+#include <linux/kdebug.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
@@ -596,6 +597,111 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 	return addr;
 }
 
+static void do_sync_core(void *info)
+{
+	sync_core();
+}
+
+static bool bp_patching_in_progress;
+static void *bp_int3_handler, *bp_int3_addr;
+
+static int int3_notify(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct die_args *args = data;
+
+	/* bp_patching_in_progress */
+	smp_rmb();
+
+	if (likely(!bp_patching_in_progress))
+		return NOTIFY_DONE;
+
+	/* we are not interested in non-int3 faults and ring > 0 faults */
+	if (val != DIE_INT3 || !args->regs || user_mode_vm(args->regs)
+			    || args->regs->ip != (unsigned long)bp_int3_addr)
+		return NOTIFY_DONE;
+
+	/* set up the specified breakpoint handler */
+	args->regs->ip = (unsigned long) bp_int3_handler;
+
+	return NOTIFY_STOP;
+}
+/**
+ * text_poke_bp() -- update instructions on live kernel on SMP
+ * @addr:	address to patch
+ * @opcode:	opcode of new instruction
+ * @len:	length to copy
+ * @handler:	address to jump to when the temporary breakpoint is hit
+ *
+ * Modify multi-byte instruction by using int3 breakpoint on SMP.
+ * In contrary to text_poke_smp(), we completely avoid stop_machine() here,
+ * and achieve the synchronization using int3 breakpoint.
+ *
+ * The way it is done:
+ *	- add a int3 trap to the address that will be patched
+ *	- sync cores
+ *	- update all but the first byte of the patched range
+ *	- sync cores
+ *	- replace the first byte (int3) by the first byte of
+ *	  replacing opcode
+ *	- sync cores
+ *
+ * Note: must be called under text_mutex.
+ */
+void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+{
+	unsigned char int3 = 0xcc;
+
+	bp_int3_handler = handler;
+	bp_int3_addr = (u8 *)addr + sizeof(int3);
+	bp_patching_in_progress = true;
+	/*
+	 * Corresponding read barrier in int3 notifier for
+	 * making sure the in_progress flags is correctly ordered wrt.
+	 * patching
+	 */
+	smp_wmb();
+
+	text_poke(addr, &int3, sizeof(int3));
+
+	on_each_cpu(do_sync_core, NULL, 1);
+
+	if (len - sizeof(int3) > 0) {
+		/* patch all but the first byte */
+		text_poke((char *)addr + sizeof(int3),
+			  (const char *) opcode + sizeof(int3),
+			  len - sizeof(int3));
+		/*
+		 * According to Intel, this core syncing is very likely
+		 * not necessary and we'd be safe even without it. But
+		 * better safe than sorry (plus there's not only Intel).
+		 */
+		on_each_cpu(do_sync_core, NULL, 1);
+	}
+
+	/* patch the first byte */
+	text_poke(addr, opcode, sizeof(int3));
+
+	on_each_cpu(do_sync_core, NULL, 1);
+
+	bp_patching_in_progress = false;
+	smp_wmb();
+
+	return addr;
+}
+
+/* this one needs to run before anything else handles it as a
+ * regular exception */
+static struct notifier_block int3_nb = {
+	.priority = 0x7fffffff,
+	.notifier_call = int3_notify
+};
+
+static int __init int3_init(void)
+{
+	return register_die_notifier(&int3_nb);
+}
+
+arch_initcall(int3_init);
 /*
  * Cross-modifying kernel text with stop_machine().
  * This code originally comes from immediate value.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6e33498..b58b490 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1709,7 +1709,7 @@ EXPORT_SYMBOL_GPL(unregister_kprobes);
 
 static struct notifier_block kprobe_exceptions_nb = {
 	.notifier_call = kprobe_exceptions_notify,
-	.priority = 0x7fffffff /* we need to be notified first */
+	.priority = 0x7ffffff0 /* High priority, but not first.  */
 };
 
 unsigned long __weak arch_deref_entry_point(void *entry)
-- 
cgit v0.10.2


From 51b2c07b22261f19188d9a9071943d60a067481c Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 12 Jul 2013 11:22:09 +0200
Subject: x86: Make jump_label use int3-based patching

Make jump labels use text_poke_bp() for text patching instead of
text_poke_smp(), avoiding the need for stop_machine().

Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1307121120250.29788@pobox.suse.cz
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 2889b3d..460f5d9 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -37,7 +37,19 @@ static void __jump_label_transform(struct jump_entry *entry,
 	} else
 		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
 
-	(*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+	/*
+	 * Make text_poke_bp() a default fallback poker.
+	 *
+	 * At the time the change is being done, just ignore whether we
+	 * are doing nop -> jump or jump -> nop transition, and assume
+	 * always nop being the 'currently valid' instruction
+	 *
+	 */
+	if (poker)
+		(*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+	else
+		text_poke_bp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE,
+			     (void *)entry->code + JUMP_LABEL_NOP_SIZE);
 }
 
 void arch_jump_label_transform(struct jump_entry *entry,
@@ -45,7 +57,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 {
 	get_online_cpus();
 	mutex_lock(&text_mutex);
-	__jump_label_transform(entry, type, text_poke_smp);
+	__jump_label_transform(entry, type, NULL);
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
 }
-- 
cgit v0.10.2


From ab4ecda5205b56cb3b8b44f2c18ffdefb24313a2 Mon Sep 17 00:00:00 2001
From: Robert Richter <rric@kernel.org>
Date: Tue, 16 Jul 2013 16:50:36 +0200
Subject: perf tools: Fix 'make tools/perf'

Boris just raised another variant of building perf tools which is
broken:

 $ make tools/perf
 ...
     LINK /home/robert/cx/linux/tools/perf/perf
 gcc: error: ../linux/tools/lib/lk/liblk.a: No such file or directory

The variant wasn't considered by:

 107de37 perf tools: Fix build errors with O and DESTDIR make vars set

There are other variant of building perf too:

 $ make -C tools perf
 $ make -C tools/perf

Plus variants with O= and DESTDIR set.

This patch fixes the above and was tested with the following:

 $ make O=... DESTDIR=... tools/perf
 $ make O=... DESTDIR=... -C tools/ perf
 $ make O=... DESTDIR=... -C tools/perf
 $ make tools/perf
 $ make -C tools/ perf
 $ make -C tools/perf

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Robert Richter <robert.richter@linaro.org>
Signed-off-by: Robert Richter <rric@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-tip-commits@vger.kernel.org
Link: http://lkml.kernel.org/r/20130716145036.GH8731@rric.localhost
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 5b7c6db..2a69026 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -124,7 +124,7 @@ strip-libs = $(filter-out -l%,$(1))
 ifneq ($(OUTPUT),)
   TE_PATH=$(OUTPUT)
 ifneq ($(subdir),)
-  LK_PATH=$(objtree)/lib/lk/
+  LK_PATH=$(OUTPUT)/../lib/lk/
 else
   LK_PATH=$(OUTPUT)
 endif
-- 
cgit v0.10.2


From 5698d2c99ec11006b4c241ebad5252289902b358 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Wed, 17 Jul 2013 17:08:15 +0900
Subject: perf symbols: Do not apply symfs for an absolute vmlinux path

If an user gives both of --symfs and --vmlinux option, the vmlinux will
be searched under the symfs directory.  This is somewhat confusing since
vmlinux often lives in kernel build directory or somewhere other than
user space binaries.

So it'd be better not adding symfs prefix for a vmlinux if it has an
absolute pathname.

Reported-by: Kwanghyun Yoo <ykh815.yoo@lge.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374048495-3643-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index d5528e1..02718e7 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -888,8 +888,11 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
 	char symfs_vmlinux[PATH_MAX];
 	enum dso_binary_type symtab_type;
 
-	snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s",
-		 symbol_conf.symfs, vmlinux);
+	if (vmlinux[0] == '/')
+		snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s", vmlinux);
+	else
+		snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s",
+			 symbol_conf.symfs, vmlinux);
 
 	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
 		symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
-- 
cgit v0.10.2


From 8d76e1852794fac6ff0b538120d9856b0b07ef8a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 12 Jul 2013 16:29:06 +0200
Subject: perf tests: Check proper prev_state size for sched_switch tp

The prev_state is defined as long which is 4 bytes long on 32-bit x86.
Changing the check against sizeof(long).

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1373639346-4547-1-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index f79e7d0..9b98c15 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -49,7 +49,7 @@ int test__perf_evsel__tp_sched_test(void)
 	if (perf_evsel__test_field(evsel, "prev_prio", 4, true))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "prev_state", 8, true))
+	if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true))
 		ret = -1;
 
 	if (perf_evsel__test_field(evsel, "next_comm", 16, true))
-- 
cgit v0.10.2


From d4339569a6f8bb1e703337175dcd4fd4bcd5d891 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 17 Jul 2013 19:49:41 +0200
Subject: perf session: Use session->fd instead of passing fd as argument

Using session->fd instead of passing fd as argument because it's always
session->fd that's passed as fd argument.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374083403-14591-2-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b28a65e..b3f253f 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2619,19 +2619,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
 	return 0;
 }
 
-static int perf_header__read_pipe(struct perf_session *session, int fd)
+static int perf_header__read_pipe(struct perf_session *session)
 {
 	struct perf_header *header = &session->header;
 	struct perf_pipe_file_header f_header;
 
-	if (perf_file_header__read_pipe(&f_header, header, fd,
+	if (perf_file_header__read_pipe(&f_header, header, session->fd,
 					session->repipe) < 0) {
 		pr_debug("incompatible file format\n");
 		return -EINVAL;
 	}
 
-	session->fd = fd;
-
 	return 0;
 }
 
@@ -2725,20 +2723,21 @@ static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
 	return 0;
 }
 
-int perf_session__read_header(struct perf_session *session, int fd)
+int perf_session__read_header(struct perf_session *session)
 {
 	struct perf_header *header = &session->header;
 	struct perf_file_header	f_header;
 	struct perf_file_attr	f_attr;
 	u64			f_id;
 	int nr_attrs, nr_ids, i, j;
+	int fd = session->fd;
 
 	session->evlist = perf_evlist__new();
 	if (session->evlist == NULL)
 		return -ENOMEM;
 
 	if (session->fd_pipe)
-		return perf_header__read_pipe(session, fd);
+		return perf_header__read_pipe(session);
 
 	if (perf_file_header__read(&f_header, header, fd) < 0)
 		return -EINVAL;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 669fda5..0bacb2d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -96,7 +96,7 @@ struct perf_header {
 struct perf_evlist;
 struct perf_session;
 
-int perf_session__read_header(struct perf_session *session, int fd);
+int perf_session__read_header(struct perf_session *session);
 int perf_session__write_header(struct perf_session *session,
 			       struct perf_evlist *evlist,
 			       int fd, bool at_exit);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d0d9f94..272c9cf 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force)
 		self->fd_pipe = true;
 		self->fd = STDIN_FILENO;
 
-		if (perf_session__read_header(self, self->fd) < 0)
+		if (perf_session__read_header(self) < 0)
 			pr_err("incompatible file format (rerun with -v to learn more)");
 
 		return 0;
@@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force)
 		goto out_close;
 	}
 
-	if (perf_session__read_header(self, self->fd) < 0) {
+	if (perf_session__read_header(self) < 0) {
 		pr_err("incompatible file format (rerun with -v to learn more)");
 		goto out_close;
 	}
-- 
cgit v0.10.2


From 75211dbfabf1de3a42703f4d3cd6f6e047289d40 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 17 Jul 2013 19:49:42 +0200
Subject: perf header: Remove data_offset seek as it's not needed

Removing data_offset seek as it's not needed, because data are not read
by syscall but mmaped instead.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374083403-14591-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b3f253f..035efe7 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2794,8 +2794,6 @@ int perf_session__read_header(struct perf_session *session)
 	perf_header__process_sections(header, fd, &session->pevent,
 				      perf_file_section__process);
 
-	lseek(fd, header->data_offset, SEEK_SET);
-
 	if (perf_evlist__prepare_tracepoint_events(session->evlist,
 						   session->pevent))
 		goto out_delete_evlist;
-- 
cgit v0.10.2


From 944d62ba4c8e1c9bb2962a70e3d495106fae8cd9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 17 Jul 2013 19:49:43 +0200
Subject: perf header: Remove attr_offset from perf_header

Removing attr_offset from perf_header as it's possible to use it as a
local variable.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374083403-14591-4-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 035efe7..dcba56a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2272,6 +2272,7 @@ int perf_session__write_header(struct perf_session *session,
 	struct perf_file_attr   f_attr;
 	struct perf_header *header = &session->header;
 	struct perf_evsel *evsel;
+	u64 attr_offset;
 	int err;
 
 	lseek(fd, sizeof(f_header), SEEK_SET);
@@ -2285,7 +2286,7 @@ int perf_session__write_header(struct perf_session *session,
 		}
 	}
 
-	header->attr_offset = lseek(fd, 0, SEEK_CUR);
+	attr_offset = lseek(fd, 0, SEEK_CUR);
 
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		f_attr = (struct perf_file_attr){
@@ -2315,7 +2316,7 @@ int perf_session__write_header(struct perf_session *session,
 		.size	   = sizeof(f_header),
 		.attr_size = sizeof(f_attr),
 		.attrs = {
-			.offset = header->attr_offset,
+			.offset = attr_offset,
 			.size   = evlist->nr_entries * sizeof(f_attr),
 		},
 		.data = {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 0bacb2d..6fa80f9 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -86,7 +86,6 @@ struct perf_session_env {
 
 struct perf_header {
 	bool			needs_swap;
-	s64			attr_offset;
 	u64			data_offset;
 	u64			data_size;
 	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
-- 
cgit v0.10.2


From 8d541e974f4b3ed5db7e278b4d64481af34910a9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 17 Jul 2013 19:49:44 +0200
Subject: perf header: Introduce feat_offset into perf_header

Introducing feat_offset into perf_header to make the location of the
features section clear.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374083403-14591-5-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index dcba56a..eefb052 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2225,7 +2225,7 @@ static int perf_header__adds_write(struct perf_header *header,
 
 	sec_size = sizeof(*feat_sec) * nr_sections;
 
-	sec_start = header->data_offset + header->data_size;
+	sec_start = header->feat_offset;
 	lseek(fd, sec_start + sec_size, SEEK_SET);
 
 	for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
@@ -2304,6 +2304,7 @@ int perf_session__write_header(struct perf_session *session,
 	}
 
 	header->data_offset = lseek(fd, 0, SEEK_CUR);
+	header->feat_offset = header->data_offset + header->data_size;
 
 	if (at_exit) {
 		err = perf_header__adds_write(header, evlist, fd);
@@ -2373,7 +2374,7 @@ int perf_header__process_sections(struct perf_header *header, int fd,
 
 	sec_size = sizeof(*feat_sec) * nr_sections;
 
-	lseek(fd, header->data_offset + header->data_size, SEEK_SET);
+	lseek(fd, header->feat_offset, SEEK_SET);
 
 	err = perf_header__getbuffer64(header, fd, feat_sec, sec_size);
 	if (err < 0)
@@ -2572,6 +2573,7 @@ int perf_file_header__read(struct perf_file_header *header,
 
 	ph->data_offset  = header->data.offset;
 	ph->data_size	 = header->data.size;
+	ph->feat_offset  = header->data.offset + header->data.size;
 	return 0;
 }
 
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 6fa80f9..e6387dc 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -88,6 +88,7 @@ struct perf_header {
 	bool			needs_swap;
 	u64			data_offset;
 	u64			data_size;
+	u64			feat_offset;
 	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
 	struct perf_session_env env;
 };
-- 
cgit v0.10.2


From 2a08c3ec4f7d6058a450d2d4bc6e366955872707 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 17 Jul 2013 19:49:47 +0200
Subject: perf header: Recognize version number for perf data file

Keep the recognized data file version within 'struct perf_header'.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374083403-14591-8-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index eefb052..f558f83 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2480,6 +2480,7 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz,
 	/* check for legacy format */
 	ret = memcmp(&magic, __perf_magic1, sizeof(magic));
 	if (ret == 0) {
+		ph->version = PERF_HEADER_VERSION_1;
 		pr_debug("legacy perf.data format\n");
 		if (is_pipe)
 			return try_all_pipe_abis(hdr_sz, ph);
@@ -2501,6 +2502,7 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz,
 		return -1;
 
 	ph->needs_swap = true;
+	ph->version = PERF_HEADER_VERSION_2;
 
 	return 0;
 }
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index e6387dc..307c9ae 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -34,6 +34,11 @@ enum {
 	HEADER_FEAT_BITS	= 256,
 };
 
+enum perf_header_version {
+	PERF_HEADER_VERSION_1,
+	PERF_HEADER_VERSION_2,
+};
+
 struct perf_file_section {
 	u64 offset;
 	u64 size;
@@ -85,12 +90,13 @@ struct perf_session_env {
 };
 
 struct perf_header {
-	bool			needs_swap;
-	u64			data_offset;
-	u64			data_size;
-	u64			feat_offset;
+	enum perf_header_version	version;
+	bool				needs_swap;
+	u64				data_offset;
+	u64				data_size;
+	u64				feat_offset;
 	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
-	struct perf_session_env env;
+	struct perf_session_env 	env;
 };
 
 struct perf_evlist;
-- 
cgit v0.10.2


From c7e85c42be68fca743df58a306edd29aa295e155 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 18 Jul 2013 20:47:47 +0900
Subject: kprobes/x86: Remove an incorrect comment about int3 in NMI/MCE

Remove a comment about an int3 issue in NMI/MCE, since
commit:

  3f3c8b8c4b2a ("x86: Add workaround to NMI iret woes")

already fixed that. Keeping this incorrect comment can mislead developers.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@akamai.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: Borislav Petkov <bpetkov@suse.de>
Link: http://lkml.kernel.org/r/20130718114747.26675.84110.stgit@mhiramat-M0-7522
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 76dc6f0..d7d8a8c 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -415,11 +415,6 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist)
 			break;
 	}
 
-	/*
-	 * text_poke_smp doesn't support NMI/MCE code modifying.
-	 * However, since kprobes itself also doesn't support NMI/MCE
-	 * code probing, it's not a problem.
-	 */
 	text_poke_smp_batch(jump_poke_params, c);
 }
 
@@ -455,11 +450,6 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
 			break;
 	}
 
-	/*
-	 * text_poke_smp doesn't support NMI/MCE code modifying.
-	 * However, since kprobes itself also doesn't support NMI/MCE
-	 * code probing, it's not a problem.
-	 */
 	text_poke_smp_batch(jump_poke_params, c);
 }
 
-- 
cgit v0.10.2


From a7b0133ea94e4421a81702d5c0e6dcdbbbab8f6b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 18 Jul 2013 20:47:50 +0900
Subject: kprobes/x86: Use text_poke_bp() instead of text_poke_smp*()

Use text_poke_bp() for optimizing kprobes instead of
text_poke_smp*(). Since the number of kprobes is usually not so
large (<100) and text_poke_bp() is much lighter than
text_poke_smp() [which uses stop_machine()], this just stops
using batch processing.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@akamai.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: Borislav Petkov <bpetkov@suse.de>
Link: http://lkml.kernel.org/r/20130718114750.26675.9174.stgit@mhiramat-M0-7522
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2e9d4b5..c6ee63f 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -82,14 +82,9 @@ extern void synthesize_reljump(void *from, void *to);
 extern void synthesize_relcall(void *from, void *to);
 
 #ifdef	CONFIG_OPTPROBES
-extern int arch_init_optprobes(void);
 extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
 extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
 #else	/* !CONFIG_OPTPROBES */
-static inline int arch_init_optprobes(void)
-{
-	return 0;
-}
 static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 {
 	return 0;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 211bce4..cd49b2c 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1068,7 +1068,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 
 int __init arch_init_kprobes(void)
 {
-	return arch_init_optprobes();
+	return 0;
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index d7d8a8c..d71e994 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -371,31 +371,6 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
 	return 0;
 }
 
-#define MAX_OPTIMIZE_PROBES 256
-static struct text_poke_param *jump_poke_params;
-static struct jump_poke_buffer {
-	u8 buf[RELATIVEJUMP_SIZE];
-} *jump_poke_bufs;
-
-static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
-					    u8 *insn_buf,
-					    struct optimized_kprobe *op)
-{
-	s32 rel = (s32)((long)op->optinsn.insn -
-			((long)op->kp.addr + RELATIVEJUMP_SIZE));
-
-	/* Backup instructions which will be replaced by jump address */
-	memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
-	       RELATIVE_ADDR_SIZE);
-
-	insn_buf[0] = RELATIVEJUMP_OPCODE;
-	*(s32 *)(&insn_buf[1]) = rel;
-
-	tprm->addr = op->kp.addr;
-	tprm->opcode = insn_buf;
-	tprm->len = RELATIVEJUMP_SIZE;
-}
-
 /*
  * Replace breakpoints (int3) with relative jumps.
  * Caller must call with locking kprobe_mutex and text_mutex.
@@ -403,32 +378,38 @@ static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
 void __kprobes arch_optimize_kprobes(struct list_head *oplist)
 {
 	struct optimized_kprobe *op, *tmp;
-	int c = 0;
+	u8 insn_buf[RELATIVEJUMP_SIZE];
 
 	list_for_each_entry_safe(op, tmp, oplist, list) {
+		s32 rel = (s32)((long)op->optinsn.insn -
+			((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
 		WARN_ON(kprobe_disabled(&op->kp));
-		/* Setup param */
-		setup_optimize_kprobe(&jump_poke_params[c],
-				      jump_poke_bufs[c].buf, op);
+
+		/* Backup instructions which will be replaced by jump address */
+		memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+		       RELATIVE_ADDR_SIZE);
+
+		insn_buf[0] = RELATIVEJUMP_OPCODE;
+		*(s32 *)(&insn_buf[1]) = rel;
+
+		text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+			     op->optinsn.insn);
+
 		list_del_init(&op->list);
-		if (++c >= MAX_OPTIMIZE_PROBES)
-			break;
 	}
-
-	text_poke_smp_batch(jump_poke_params, c);
 }
 
-static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
-					      u8 *insn_buf,
-					      struct optimized_kprobe *op)
+/* Replace a relative jump with a breakpoint (int3).  */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
+	u8 insn_buf[RELATIVEJUMP_SIZE];
+
 	/* Set int3 to first byte for kprobes */
 	insn_buf[0] = BREAKPOINT_INSTRUCTION;
 	memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-
-	tprm->addr = op->kp.addr;
-	tprm->opcode = insn_buf;
-	tprm->len = RELATIVEJUMP_SIZE;
+	text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+		     op->optinsn.insn);
 }
 
 /*
@@ -439,29 +420,11 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
 				    struct list_head *done_list)
 {
 	struct optimized_kprobe *op, *tmp;
-	int c = 0;
 
 	list_for_each_entry_safe(op, tmp, oplist, list) {
-		/* Setup param */
-		setup_unoptimize_kprobe(&jump_poke_params[c],
-					jump_poke_bufs[c].buf, op);
+		arch_unoptimize_kprobe(op);
 		list_move(&op->list, done_list);
-		if (++c >= MAX_OPTIMIZE_PROBES)
-			break;
 	}
-
-	text_poke_smp_batch(jump_poke_params, c);
-}
-
-/* Replace a relative jump with a breakpoint (int3).  */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
-{
-	u8 buf[RELATIVEJUMP_SIZE];
-
-	/* Set int3 to first byte for kprobes */
-	buf[0] = BREAKPOINT_INSTRUCTION;
-	memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-	text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
 }
 
 int  __kprobes
@@ -481,22 +444,3 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 	}
 	return 0;
 }
-
-int __kprobes arch_init_optprobes(void)
-{
-	/* Allocate code buffer and parameter array */
-	jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
-				 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
-	if (!jump_poke_bufs)
-		return -ENOMEM;
-
-	jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
-				   MAX_OPTIMIZE_PROBES, GFP_KERNEL);
-	if (!jump_poke_params) {
-		kfree(jump_poke_bufs);
-		jump_poke_bufs = NULL;
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-- 
cgit v0.10.2


From ea8596bb2d8d37957f3e92db9511c50801689180 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 18 Jul 2013 20:47:53 +0900
Subject: kprobes/x86: Remove unused text_poke_smp() and text_poke_smp_batch()
 functions

Since introducing the text_poke_bp() for all text_poke_smp*()
callers, text_poke_smp*() are now unused. This patch basically
reverts:

  3d55cc8a058e ("x86: Add text_poke_smp for SMP cross modifying code")
  7deb18dcf047 ("x86: Introduce text_poke_smp_batch() for batch-code modifying")

and related commits.

This patch also fixes a Kconfig dependency issue on STOP_MACHINE
in the case of CONFIG_SMP && !CONFIG_MODULE_UNLOAD.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@akamai.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: Borislav Petkov <bpetkov@suse.de>
Link: http://lkml.kernel.org/r/20130718114753.26675.18714.stgit@mhiramat-M0-7522
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b32ebf9..3106e0e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,7 +81,6 @@ config X86
 	select HAVE_USER_RETURN_NOTIFIER
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select HAVE_ARCH_JUMP_LABEL
-	select HAVE_TEXT_POKE_SMP
 	select HAVE_GENERIC_HARDIRQS
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select SPARSE_IRQ
@@ -2332,10 +2331,6 @@ config HAVE_ATOMIC_IOMAP
 	def_bool y
 	depends on X86_32
 
-config HAVE_TEXT_POKE_SMP
-	bool
-	select STOP_MACHINE if SMP
-
 config X86_DEV_DMA_OPS
 	bool
 	depends on X86_64 || STA2X11
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 3abf8dd..4daf8c5 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -220,21 +220,10 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
  * no thread can be preempted in the instructions being modified (no iret to an
  * invalid instruction possible) or if the instructions are changed from a
  * consistent state to another consistent state atomically.
- * More care must be taken when modifying code in the SMP case because of
- * Intel's errata. text_poke_smp() takes care that errata, but still
- * doesn't support NMI/MCE handler code modifying.
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
  */
-struct text_poke_param {
-	void *addr;
-	const void *opcode;
-	size_t len;
-};
-
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
-extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
-extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 0ab4936..5d8782e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -633,8 +633,8 @@ static int int3_notify(struct notifier_block *self, unsigned long val, void *dat
  * @handler:	address to jump to when the temporary breakpoint is hit
  *
  * Modify multi-byte instruction by using int3 breakpoint on SMP.
- * In contrary to text_poke_smp(), we completely avoid stop_machine() here,
- * and achieve the synchronization using int3 breakpoint.
+ * We completely avoid stop_machine() here, and achieve the
+ * synchronization using int3 breakpoint.
  *
  * The way it is done:
  *	- add a int3 trap to the address that will be patched
@@ -702,97 +702,3 @@ static int __init int3_init(void)
 }
 
 arch_initcall(int3_init);
-/*
- * Cross-modifying kernel text with stop_machine().
- * This code originally comes from immediate value.
- */
-static atomic_t stop_machine_first;
-static int wrote_text;
-
-struct text_poke_params {
-	struct text_poke_param *params;
-	int nparams;
-};
-
-static int __kprobes stop_machine_text_poke(void *data)
-{
-	struct text_poke_params *tpp = data;
-	struct text_poke_param *p;
-	int i;
-
-	if (atomic_xchg(&stop_machine_first, 0)) {
-		for (i = 0; i < tpp->nparams; i++) {
-			p = &tpp->params[i];
-			text_poke(p->addr, p->opcode, p->len);
-		}
-		smp_wmb();	/* Make sure other cpus see that this has run */
-		wrote_text = 1;
-	} else {
-		while (!wrote_text)
-			cpu_relax();
-		smp_mb();	/* Load wrote_text before following execution */
-	}
-
-	for (i = 0; i < tpp->nparams; i++) {
-		p = &tpp->params[i];
-		flush_icache_range((unsigned long)p->addr,
-				   (unsigned long)p->addr + p->len);
-	}
-	/*
-	 * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
-	 * that a core serializing instruction such as "cpuid" should be
-	 * executed on _each_ core before the new instruction is made visible.
-	 */
-	sync_core();
-	return 0;
-}
-
-/**
- * text_poke_smp - Update instructions on a live kernel on SMP
- * @addr: address to modify
- * @opcode: source of the copy
- * @len: length to copy
- *
- * Modify multi-byte instruction by using stop_machine() on SMP. This allows
- * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
- * should be allowed, since stop_machine() does _not_ protect code against
- * NMI and MCE.
- *
- * Note: Must be called under get_online_cpus() and text_mutex.
- */
-void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
-{
-	struct text_poke_params tpp;
-	struct text_poke_param p;
-
-	p.addr = addr;
-	p.opcode = opcode;
-	p.len = len;
-	tpp.params = &p;
-	tpp.nparams = 1;
-	atomic_set(&stop_machine_first, 1);
-	wrote_text = 0;
-	/* Use __stop_machine() because the caller already got online_cpus. */
-	__stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
-	return addr;
-}
-
-/**
- * text_poke_smp_batch - Update instructions on a live kernel on SMP
- * @params: an array of text_poke parameters
- * @n: the number of elements in params.
- *
- * Modify multi-byte instruction by using stop_machine() on SMP. Since the
- * stop_machine() is heavy task, it is better to aggregate text_poke requests
- * and do it once if possible.
- *
- * Note: Must be called under get_online_cpus() and text_mutex.
- */
-void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
-{
-	struct text_poke_params tpp = {.params = params, .nparams = n};
-
-	atomic_set(&stop_machine_first, 1);
-	wrote_text = 0;
-	__stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
-}
-- 
cgit v0.10.2


From 2eaa1b407aa6592a884f1be061ef61de7012c97a Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 18 Jul 2013 16:06:15 -0600
Subject: perf script: Fix named threads support

Commit 73994dc broke named thread support in perf-script. The thread
struct in al is the main thread for a multithreaded process. The thread
struct used for analysis (e.g., dumping events) should be the specific
thread for the sample.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Feng Tang <feng.tang@intel.com>
Link: http://lkml.kernel.org/r/1374185175-28272-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ecb6979..1cad370 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -397,10 +397,10 @@ static void print_sample_bts(union perf_event *event,
 
 static void process_event(union perf_event *event, struct perf_sample *sample,
 			  struct perf_evsel *evsel, struct machine *machine,
-			  struct addr_location *al)
+			  struct thread *thread,
+			  struct addr_location *al __maybe_unused)
 {
 	struct perf_event_attr *attr = &evsel->attr;
-	struct thread *thread = al->thread;
 
 	if (output[attr->type].fields == 0)
 		return;
@@ -511,7 +511,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
 		return 0;
 
-	scripting_ops->process_event(event, sample, evsel, machine, &al);
+	scripting_ops->process_event(event, sample, evsel, machine, thread, &al);
 
 	evsel->hists.stats.total_period += sample->period;
 	return 0;
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index eacec85..a85e4ae 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -261,7 +261,8 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
 				    struct perf_sample *sample,
 				    struct perf_evsel *evsel,
 				    struct machine *machine __maybe_unused,
-				    struct addr_location *al)
+				    struct thread *thread,
+					struct addr_location *al)
 {
 	struct format_field *field;
 	static char handler[256];
@@ -272,7 +273,6 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
 	int cpu = sample->cpu;
 	void *data = sample->raw_data;
 	unsigned long long nsecs = sample->time;
-	struct thread *thread = al->thread;
 	char *comm = thread->comm;
 
 	dSP;
@@ -351,7 +351,8 @@ static void perl_process_event_generic(union perf_event *event,
 				       struct perf_sample *sample,
 				       struct perf_evsel *evsel,
 				       struct machine *machine __maybe_unused,
-				       struct addr_location *al __maybe_unused)
+				       struct thread *thread __maybe_unused,
+					   struct addr_location *al __maybe_unused)
 {
 	dSP;
 
@@ -377,10 +378,11 @@ static void perl_process_event(union perf_event *event,
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
 			       struct machine *machine,
-			       struct addr_location *al)
+			       struct thread *thread,
+				   struct addr_location *al)
 {
-	perl_process_tracepoint(event, sample, evsel, machine, al);
-	perl_process_event_generic(event, sample, evsel, machine, al);
+	perl_process_tracepoint(event, sample, evsel, machine, thread, al);
+	perl_process_event_generic(event, sample, evsel, machine, thread, al);
 }
 
 static void run_start_sub(void)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e87aa5d..cc75a3c 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -225,6 +225,7 @@ static void python_process_tracepoint(union perf_event *perf_event
 				 struct perf_sample *sample,
 				 struct perf_evsel *evsel,
 				 struct machine *machine __maybe_unused,
+				 struct thread *thread,
 				 struct addr_location *al)
 {
 	PyObject *handler, *retval, *context, *t, *obj, *dict = NULL;
@@ -238,7 +239,6 @@ static void python_process_tracepoint(union perf_event *perf_event
 	int cpu = sample->cpu;
 	void *data = sample->raw_data;
 	unsigned long long nsecs = sample->time;
-	struct thread *thread = al->thread;
 	char *comm = thread->comm;
 
 	t = PyTuple_New(MAX_FIELDS);
@@ -345,12 +345,12 @@ static void python_process_general_event(union perf_event *perf_event
 					 struct perf_sample *sample,
 					 struct perf_evsel *evsel,
 					 struct machine *machine __maybe_unused,
+					 struct thread *thread,
 					 struct addr_location *al)
 {
 	PyObject *handler, *retval, *t, *dict;
 	static char handler_name[64];
 	unsigned n = 0;
-	struct thread *thread = al->thread;
 
 	/*
 	 * Use the MAX_FIELDS to make the function expandable, though
@@ -404,17 +404,18 @@ static void python_process_event(union perf_event *perf_event,
 				 struct perf_sample *sample,
 				 struct perf_evsel *evsel,
 				 struct machine *machine,
+				 struct thread *thread,
 				 struct addr_location *al)
 {
 	switch (evsel->attr.type) {
 	case PERF_TYPE_TRACEPOINT:
 		python_process_tracepoint(perf_event, sample, evsel,
-					  machine, al);
+					  machine, thread, al);
 		break;
 	/* Reserve for future process_hw/sw/raw APIs */
 	default:
 		python_process_general_event(perf_event, sample, evsel,
-					     machine, al);
+					     machine, thread, al);
 	}
 }
 
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 8715a10..95199e4 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -39,7 +39,8 @@ static void process_event_unsupported(union perf_event *event __maybe_unused,
 				      struct perf_sample *sample __maybe_unused,
 				      struct perf_evsel *evsel __maybe_unused,
 				      struct machine *machine __maybe_unused,
-				      struct addr_location *al __maybe_unused)
+				      struct thread *thread __maybe_unused,
+					  struct addr_location *al __maybe_unused)
 {
 }
 
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 669a64a..fafe1a4 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -9,6 +9,7 @@ struct machine;
 struct perf_sample;
 union perf_event;
 struct perf_tool;
+struct thread;
 
 extern struct pevent *perf_pevent;
 
@@ -68,7 +69,8 @@ struct scripting_ops {
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
 			       struct machine *machine,
-			       struct addr_location *al);
+			       struct thread *thread,
+				   struct addr_location *al);
 	int (*generate_script) (struct pevent *pevent, const char *outfile);
 };
 
-- 
cgit v0.10.2


From 2b821cce1c950cead081d779db0dda680192c4bd Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 18 Jul 2013 17:27:59 -0600
Subject: perf evsel: Handle ENODEV on default cycles event

Some systems (e.g., VMs on qemu-0.13 with the default vcpu model) report
an unsupported CPU model:

Performance Events: unsupported p6 CPU model 2 no PMU driver, software events only.

Subsequent invocations of perf fail with:

The sys_perf_event_open() syscall returned with 19 (No such device) for event (cycles).
/bin/dmesg may provide additional information.
No CONFIG_PERF_EVENTS=y kernel support configured?

Add ENODEV to the list of errno's to fallback to cpu-clock.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374190079-28507-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a635461..8bed0c1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1482,7 +1482,7 @@ out:
 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
 			  char *msg, size_t msgsize)
 {
-	if ((err == ENOENT || err == ENXIO) &&
+	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
 	    evsel->attr.type   == PERF_TYPE_HARDWARE &&
 	    evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
 		/*
-- 
cgit v0.10.2


From a198996c7afae0097c67a61851f19863e59697b2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 18 Jul 2013 15:33:46 -0700
Subject: perf bench: Fix memcpy benchmark for large sizes

The glibc calloc() function has an optimization to not explicitely
memset() very large calloc allocations that just came from mmap(),
because they are known to be zero.

This could result in the perf memcpy benchmark reading only from
the zero page, which gives unrealistic results.

Always call memset explicitly on the source area to avoid this problem.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Hitoshi Mitake <h.mitake@gmail.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: http://lkml.kernel.org/n/tip-pzz2qrdq9eymxda0y8yxdn33@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 25fd3f1..8cdca43 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -117,6 +117,8 @@ static void alloc_mem(void **dst, void **src, size_t length)
 	*src = zalloc(length);
 	if (!*src)
 		die("memory allocation failed - maybe length is too large?\n");
+	/* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
+	memset(*src, 0, length);
 }
 
 static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
-- 
cgit v0.10.2


From 99571ab3d9b342a717295a9c7e2b4495ee19e32a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 18 Jul 2013 15:33:57 -0700
Subject: perf tools: Support callchain sorting based on addresses

With programs with very large functions it can be useful to distinguish
the callgraph nodes on more than just function names. So for example if
you have multiple calls to the same function, it ends up being separate
nodes in the chain.

This patch adds a new key field to the callgraph options, that allows
comparing nodes on functions (as today, default) and addresses.

Longer term it would be nice to also handle src lines, but that would
need more changes and address is a reasonable proxy for it today.

I right now reference the global params, as there was no simple way to
register a params pointer.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-0uskktybf0e7wrnoi5e9b9it@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 747ff50..2b8097e 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -115,7 +115,7 @@ OPTIONS
 --dump-raw-trace::
         Dump raw trace in ASCII.
 
--g [type,min[,limit],order]::
+-g [type,min[,limit],order[,key]]::
 --call-graph::
         Display call chains using type, min percent threshold, optional print
 	limit and order.
@@ -129,7 +129,11 @@ OPTIONS
 	- callee: callee based call graph.
 	- caller: inverted caller based call graph.
 
-	Default: fractal,0.5,callee.
+	key can be:
+	- function: compare on functions
+	- address: compare on individual code addresses
+
+	Default: fractal,0.5,callee,function.
 
 -G::
 --inverted::
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index a34c587..d785d89 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -667,12 +667,23 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 	}
 
 	/* get the call chain order */
-	if (!strcmp(tok2, "caller"))
+	if (!strncmp(tok2, "caller", strlen("caller")))
 		callchain_param.order = ORDER_CALLER;
-	else if (!strcmp(tok2, "callee"))
+	else if (!strncmp(tok2, "callee", strlen("callee")))
 		callchain_param.order = ORDER_CALLEE;
 	else
 		return -1;
+
+	/* Get the sort key */
+	tok2 = strtok(NULL, ",");
+	if (!tok2)
+		goto setup;
+	if (!strncmp(tok2, "function", strlen("function")))
+		callchain_param.key = CCKEY_FUNCTION;
+	else if (!strncmp(tok2, "address", strlen("address")))
+		callchain_param.key = CCKEY_ADDRESS;
+	else
+		return -1;
 setup:
 	if (callchain_register_param(&callchain_param) < 0) {
 		fprintf(stderr, "Can't register callchain params\n");
@@ -784,8 +795,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
 		    "Only display entries with parent-match"),
 	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
-		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. "
-		     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
+		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
+		     "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 42b6a63..4fee33b 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -15,6 +15,7 @@
 #include <errno.h>
 #include <math.h>
 
+#include "hist.h"
 #include "util.h"
 #include "callchain.h"
 
@@ -327,7 +328,8 @@ append_chain(struct callchain_node *root,
 	/*
 	 * Lookup in the current node
 	 * If we have a symbol, then compare the start to match
-	 * anywhere inside a function.
+	 * anywhere inside a function, unless function
+	 * mode is disabled.
 	 */
 	list_for_each_entry(cnode, &root->val, list) {
 		struct callchain_cursor_node *node;
@@ -339,7 +341,8 @@ append_chain(struct callchain_node *root,
 
 		sym = node->sym;
 
-		if (cnode->ms.sym && sym) {
+		if (cnode->ms.sym && sym &&
+		    callchain_param.key == CCKEY_FUNCTION) {
 			if (cnode->ms.sym->start != sym->start)
 				break;
 		} else if (cnode->ip != node->ip)
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 3ee9f67..812d5a0 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -41,12 +41,18 @@ struct callchain_param;
 typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
 				 u64, struct callchain_param *);
 
+enum chain_key {
+	CCKEY_FUNCTION,
+	CCKEY_ADDRESS
+};
+
 struct callchain_param {
 	enum chain_mode 	mode;
 	u32			print_limit;
 	double			min_percent;
 	sort_chain_func_t	sort;
 	enum chain_order	order;
+	enum chain_key		key;
 };
 
 struct callchain_list {
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index a9dd1b9..46a0d35 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -24,7 +24,8 @@ enum hist_filter {
 struct callchain_param	callchain_param = {
 	.mode	= CHAIN_GRAPH_REL,
 	.min_percent = 0.5,
-	.order  = ORDER_CALLEE
+	.order  = ORDER_CALLEE,
+	.key	= CCKEY_FUNCTION
 };
 
 u16 hists__col_len(struct hists *hists, enum hist_column col)
-- 
cgit v0.10.2


From 575bf1d04e908469d26da424b52fc1b12a1db9d8 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Mon, 24 Jun 2013 11:43:14 +0300
Subject: perf tools: Fix build with perl 5.18
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

perl.h from new Perl release doesn't like -Wundef and -Wswitch-default:

/usr/lib/perl5/core_perl/CORE/perl.h:548:5: error: "SILENT_NO_TAINT_SUPPORT" is not defined [-Werror=undef]
 #if SILENT_NO_TAINT_SUPPORT && !defined(NO_TAINT_SUPPORT)
     ^
/usr/lib/perl5/core_perl/CORE/perl.h:556:5: error: "NO_TAINT_SUPPORT" is not defined [-Werror=undef]
 #if NO_TAINT_SUPPORT
     ^
In file included from /usr/lib/perl5/core_perl/CORE/perl.h:3471:0,
                 from util/scripting-engines/trace-event-perl.c:30:
/usr/lib/perl5/core_perl/CORE/sv.h:1455:5: error: "NO_TAINT_SUPPORT" is not defined [-Werror=undef]
 #if NO_TAINT_SUPPORT
     ^
In file included from /usr/lib/perl5/core_perl/CORE/perl.h:3472:0,
                 from util/scripting-engines/trace-event-perl.c:30:
/usr/lib/perl5/core_perl/CORE/regexp.h:436:5: error: "NO_TAINT_SUPPORT" is not defined [-Werror=undef]
 #if NO_TAINT_SUPPORT
     ^
In file included from /usr/lib/perl5/core_perl/CORE/hv.h:592:0,
                 from /usr/lib/perl5/core_perl/CORE/perl.h:3480,
                 from util/scripting-engines/trace-event-perl.c:30:
/usr/lib/perl5/core_perl/CORE/hv_func.h: In function ‘S_perl_hash_siphash_2_4’:
/usr/lib/perl5/core_perl/CORE/hv_func.h:222:3: error: switch missing default case [-Werror=switch-default]
   switch( left )
   ^
/usr/lib/perl5/core_perl/CORE/hv_func.h: In function ‘S_perl_hash_superfast’:
/usr/lib/perl5/core_perl/CORE/hv_func.h:274:5: error: switch missing default case [-Werror=switch-default]
     switch (rem) { \
     ^
/usr/lib/perl5/core_perl/CORE/hv_func.h: In function ‘S_perl_hash_murmur3’:
/usr/lib/perl5/core_perl/CORE/hv_func.h:398:5: error: switch missing default case [-Werror=switch-default]
     switch(bytes_in_carry) { /* how many bytes in carry */
     ^

Let's disable the warnings for code which uses perl.h.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1372063394-20126-1-git-send-email-kirill@shutemov.name
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 2a69026..024680b 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -631,10 +631,10 @@ $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
 
 $(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<
 
 $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
-	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<
 
 $(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
-- 
cgit v0.10.2


From 0659e66981ab137c11a746899672c886e1973a7b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 22 Jul 2013 14:43:30 +0200
Subject: perf tests: Run ctags/cscope make tests only with needed binaries

Running tags and cscope make tests only if the 'ctags' and 'cscope'
binaries are installed, so we don't have false alarm test failures.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374497014-2817-2-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index c441a28..d1efef9 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -1,6 +1,8 @@
 PERF := .
 MK   := Makefile
 
+has = $(shell which $1 2>/dev/null)
+
 # standard single make variable specified
 make_clean_all      := clean all
 make_python_perf_so := python/perf.so
@@ -50,14 +52,19 @@ run += make_no_backtrace
 run += make_no_libnuma
 run += make_no_libaudit
 run += make_no_libbionic
-run += make_tags
-run += make_cscope
 run += make_help
 run += make_doc
 run += make_perf_o
 run += make_util_map_o
 run += make_minimal
 
+ifneq ($(call has,ctags),)
+run += make_tags
+endif
+ifneq ($(call has,cscope),)
+run += make_cscope
+endif
+
 # $(run_O) contains same portion of $(run) tests with '_O' attached
 # to distinguish O=... tests
 run_O := $(addsuffix _O,$(run))
-- 
cgit v0.10.2


From 8ba7cdea321d4d5bb53ac210f4e0a4bca5b0dba5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 22 Jul 2013 14:43:31 +0200
Subject: perf tests: Rename TMP to TMP_O tests/make variable

Renaming TMP to TMP_O tests/make variable to make a name space for other
temp variables.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374497014-2817-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index d1efef9..dbbb62c 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -102,7 +102,7 @@ test_make_util_map_o_O := true
 test_default = test -x $(PERF)/perf
 test = $(if $(test_$1),$(test_$1),$(test_default))
 
-test_default_O = test -x $$TMP/perf
+test_default_O = test -x $$TMP_O/perf
 test_O = $(if $(test_$1),$(test_$1),$(test_default_O))
 
 all:
@@ -127,14 +127,14 @@ $(run):
 
 $(run_O):
 	$(call clean)
-	@TMP=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) $($(patsubst %_O,%,$@)) O=$$TMP"; \
+	@TMP_O=$$(mktemp -d); \
+	cmd="cd $(PERF) && make -f $(MK) $($(patsubst %_O,%,$@)) O=$$TMP_O"; \
 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1 && \
 	echo "  test: $(call test_O,$@)"; \
 	$(call test_O,$@) && \
 	rm -f $@ && \
-	rm -rf $$TMP
+	rm -rf $$TMP_O
 
 all: $(run) $(run_O)
 	@echo OK
-- 
cgit v0.10.2


From c9311674e73345d4d02595c80d43f4bcf9ff3a3f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 22 Jul 2013 14:43:32 +0200
Subject: perf tests: Add DESTDIR=TMP_DEST tests/make variable

Adding TMP_DEST tests/make variable to provide the DESTDIR directory for
installation tests.

Adding this to existing test targets, since DESTDIR variable 'should
not' affect other than install* targets. We can always separate this if
there's a need for DESTDIR-free build test.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374497014-2817-4-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index dbbb62c..7646a00 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -118,23 +118,27 @@ clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
 
 $(run):
 	$(call clean)
-	@cmd="cd $(PERF) && make -f $(MK) $($@)"; \
+	@TMP_DEST=$$(mktemp -d); \
+	cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1; \
 	echo "  test: $(call test,$@)"; \
 	$(call test,$@) && \
-	rm -f $@
+	rm -f $@ \
+	rm -rf $$TMP_DEST
 
 $(run_O):
 	$(call clean)
 	@TMP_O=$$(mktemp -d); \
-	cmd="cd $(PERF) && make -f $(MK) $($(patsubst %_O,%,$@)) O=$$TMP_O"; \
+	TMP_DEST=$$(mktemp -d); \
+	cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1 && \
 	echo "  test: $(call test_O,$@)"; \
 	$(call test_O,$@) && \
 	rm -f $@ && \
-	rm -rf $$TMP_O
+	rm -rf $$TMP_O \
+	rm -rf $$TMP_DEST
 
 all: $(run) $(run_O)
 	@echo OK
-- 
cgit v0.10.2


From c0ec11088800e1a5efbccc291a42580727798123 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 22 Jul 2013 14:43:33 +0200
Subject: perf tests: Add 'make install/install-bin' tests into tests/make

Adding 'make install' and 'make install-bin' tests into tests/make. It's
run as part of the suite, but could be run separately like:

  $ make -f tests/make make_install
  - make_install: cd . && make -f Makefile DESTDIR=/tmp/tmp.LpkYbk5pfs install
    test: test -x /tmp/tmp.LpkYbk5pfs/bin/perf
  $ make -f tests/make make_install_bin
  - make_install_bin: cd . && make -f Makefile DESTDIR=/tmp/tmp.dMxePBMcFT
    install-bin
    test: test -x /tmp/tmp.dMxePBMcFT/bin/perf

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374497014-2817-5-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 7646a00..d3819f2 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -27,6 +27,8 @@ make_help           := help
 make_doc            := doc
 make_perf_o         := perf.o
 make_util_map_o     := util/map.o
+make_install        := install
+make_install_bin    := install-bin
 
 # all the NO_* variable combined
 make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
@@ -56,6 +58,8 @@ run += make_help
 run += make_doc
 run += make_perf_o
 run += make_util_map_o
+run += make_install
+run += make_install_bin
 run += make_minimal
 
 ifneq ($(call has,ctags),)
@@ -91,6 +95,11 @@ test_make_python_perf_so := test -f $(PERF)/python/perf.so
 test_make_perf_o     := test -f $(PERF)/perf.o
 test_make_util_map_o := test -f $(PERF)/util/map.o
 
+test_make_install       := test -x $$TMP_DEST/bin/perf
+test_make_install_O     := $(test_make_install)
+test_make_install_bin   := $(test_make_install)
+test_make_install_bin_O := $(test_make_install)
+
 # Kbuild tests only
 #test_make_python_perf_so_O := test -f $$TMP/tools/perf/python/perf.so
 #test_make_perf_o_O         := test -f $$TMP/tools/perf/perf.o
-- 
cgit v0.10.2


From dbad41893c7941f3c53805f8206993719f971bfa Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 22 Jul 2013 14:43:34 +0200
Subject: perf tests: Add broken install-* tests into tests/make

Adding install-* tests into tests/make. Those tests are
broken, so commenting them out right away.

* Nothing get installed for install-man, install_doc and
  install_html targets, they just rebuild the documentation.

* I've got following error for 'install-info':

  $ make -f tests/make make_install_info
  - make_install_info: cd . && make -f Makefile DESTDIR=/tmp/tmp.Xi4mb9J1a0 install-info

  $ tail -f make_install_info
  ...
  PERF_VERSION = 3.11.rc1.g9b3c2d
  make[2]: *** No rule to make target `user-manual.xml', needed by `user-manual.texi'.  Stop.
  make[1]: *** [install-info] Error 2

* I've got following error for 'install-pdf':

  $ make -f tests/make make_install_pdf
  - make_install_pdf: cd . && make -f Makefile DESTDIR=/tmp/tmp.fXseECBbt1 install-pdf

  $ tail -f make_install_pdf
  ...
  PERF_VERSION = 3.11.rc1.g9b3c2d
  make[2]: *** No rule to make target `user-manual.xml', needed by `user-manual.pdf'.  Stop.
  make[1]: *** [install-pdf] Error 2

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1374497014-2817-6-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index d3819f2..2ca0abf 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -29,6 +29,11 @@ make_perf_o         := perf.o
 make_util_map_o     := util/map.o
 make_install        := install
 make_install_bin    := install-bin
+make_install_doc    := install-doc
+make_install_man    := install-man
+make_install_html   := install-html
+make_install_info   := install-info
+make_install_pdf    := install-pdf
 
 # all the NO_* variable combined
 make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
@@ -60,6 +65,12 @@ run += make_perf_o
 run += make_util_map_o
 run += make_install
 run += make_install_bin
+# FIXME 'install-*' commented out till they're fixed
+# run += make_install_doc
+# run += make_install_man
+# run += make_install_html
+# run += make_install_info
+# run += make_install_pdf
 run += make_minimal
 
 ifneq ($(call has,ctags),)
@@ -100,6 +111,26 @@ test_make_install_O     := $(test_make_install)
 test_make_install_bin   := $(test_make_install)
 test_make_install_bin_O := $(test_make_install)
 
+# FIXME nothing gets installed
+test_make_install_man    := test -f $$TMP_DEST/share/man/man1/perf.1
+test_make_install_man_O  := $(test_make_install_man)
+
+# FIXME nothing gets installed
+test_make_install_doc    := $(test_ok)
+test_make_install_doc_O  := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_html   := $(test_ok)
+test_make_install_html_O := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_info   := $(test_ok)
+test_make_install_info_O := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_pdf    := $(test_ok)
+test_make_install_pdf_O  := $(test_ok)
+
 # Kbuild tests only
 #test_make_python_perf_so_O := test -f $$TMP/tools/perf/python/perf.so
 #test_make_perf_o_O         := test -f $$TMP/tools/perf/perf.o
-- 
cgit v0.10.2


From f9ea55d0ddf66ed030b2a478625cd5792d30df16 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 18 Jul 2013 15:58:53 -0700
Subject: perf tools: Move weight back to common sort keys

This is a partial revert of Namhyung's patch

 afab87b91f3f331d55664172dad8e476e6ffca9d
 perf sort: Separate out memory-specific sort keys

He wrote

 For global/local weights, I'm not entirely sure to place them into the
 memory dimension.  But it's the only user at this time.

Well TSX is another (in fact the original) user of the flags, and it
needs them to be common. So move local/global weight back to the common
sort keys.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Link: http://lkml.kernel.org/r/1374188333-17899-1-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index cb2b108..5f118a0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -874,6 +874,8 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_PARENT, "parent", sort_parent),
 	DIM(SORT_CPU, "cpu", sort_cpu),
 	DIM(SORT_SRCLINE, "srcline", sort_srcline),
+	DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
+	DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
 };
 
 #undef DIM
@@ -893,8 +895,6 @@ static struct sort_dimension bstack_sort_dimensions[] = {
 #define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
 
 static struct sort_dimension memory_sort_dimensions[] = {
-	DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
-	DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
 	DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
 	DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
 	DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 586022d..4e80dbd 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -143,6 +143,8 @@ enum sort_type {
 	SORT_PARENT,
 	SORT_CPU,
 	SORT_SRCLINE,
+	SORT_LOCAL_WEIGHT,
+	SORT_GLOBAL_WEIGHT,
 
 	/* branch stack specific sort keys */
 	__SORT_BRANCH_STACK,
@@ -154,9 +156,7 @@ enum sort_type {
 
 	/* memory mode specific sort keys */
 	__SORT_MEMORY_MODE,
-	SORT_LOCAL_WEIGHT = __SORT_MEMORY_MODE,
-	SORT_GLOBAL_WEIGHT,
-	SORT_MEM_DADDR_SYMBOL,
+	SORT_MEM_DADDR_SYMBOL = __SORT_MEMORY_MODE,
 	SORT_MEM_DADDR_DSO,
 	SORT_MEM_LOCKED,
 	SORT_MEM_TLB,
-- 
cgit v0.10.2


From 17f41571bb2c4a398785452ac2718a6c5d77180e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 23 Jul 2013 10:09:28 +0200
Subject: kprobes/x86: Call out into INT3 handler directly instead of using
 notifier

In fd4363fff3d96 ("x86: Introduce int3 (breakpoint)-based
instruction patching"), the mechanism that was introduced for
notifying alternatives code from int3 exception handler that and
exception occured was die_notifier.

This is however problematic, as early code might be using jump
labels even before the notifier registration has been performed,
which will then lead to an oops due to unhandled exception. One
of such occurences has been encountered by Fengguang:

 int3: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
 Modules linked in:
 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.11.0-rc1-01429-g04bf576 #8
 task: ffff88000da1b040 ti: ffff88000da1c000 task.ti: ffff88000da1c000
 RIP: 0010:[<ffffffff811098cc>]  [<ffffffff811098cc>] ttwu_do_wakeup+0x28/0x225
 RSP: 0000:ffff88000dd03f10  EFLAGS: 00000006
 RAX: 0000000000000000 RBX: ffff88000dd12940 RCX: ffffffff81769c40
 RDX: 0000000000000002 RSI: 0000000000000000 RDI: 0000000000000001
 RBP: ffff88000dd03f28 R08: ffffffff8176a8c0 R09: 0000000000000002
 R10: ffffffff810ff484 R11: ffff88000dd129e8 R12: ffff88000dbc90c0
 R13: ffff88000dbc90c0 R14: ffff88000da1dfd8 R15: ffff88000da1dfd8
 FS:  0000000000000000(0000) GS:ffff88000dd00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
 CR2: 00000000ffffffff CR3: 0000000001c88000 CR4: 00000000000006e0
 Stack:
  ffff88000dd12940 ffff88000dbc90c0 ffff88000da1dfd8 ffff88000dd03f48
  ffffffff81109e2b ffff88000dd12940 0000000000000000 ffff88000dd03f68
  ffffffff81109e9e 0000000000000000 0000000000012940 ffff88000dd03f98
 Call Trace:
  <IRQ>
  [<ffffffff81109e2b>] ttwu_do_activate.constprop.56+0x6d/0x79
  [<ffffffff81109e9e>] sched_ttwu_pending+0x67/0x84
  [<ffffffff8110c845>] scheduler_ipi+0x15a/0x2b0
  [<ffffffff8104dfb4>] smp_reschedule_interrupt+0x38/0x41
  [<ffffffff8173bf5d>] reschedule_interrupt+0x6d/0x80
  <EOI>
  [<ffffffff810ff484>] ? __atomic_notifier_call_chain+0x5/0xc1
  [<ffffffff8105cc30>] ? native_safe_halt+0xd/0x16
  [<ffffffff81015f10>] default_idle+0x147/0x282
  [<ffffffff81017026>] arch_cpu_idle+0x3d/0x5d
  [<ffffffff81127d6a>] cpu_idle_loop+0x46d/0x5db
  [<ffffffff81127f5c>] cpu_startup_entry+0x84/0x84
  [<ffffffff8104f4f8>] start_secondary+0x3c8/0x3d5
  [...]

Fix this by directly calling poke_int3_handler() from the int3
exception handler (analogically to what ftrace has been doing
already), instead of relying on notifier, registration of which
might not have yet been finalized by the time of the first trap.

Reported-and-tested-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1307231007490.14024@pobox.suse.cz
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4daf8c5..0a3f9c9 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -5,6 +5,7 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <asm/asm.h>
+#include <asm/ptrace.h>
 
 /*
  * Alternative inline assembly for SMP.
@@ -224,6 +225,7 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
  * inconsistent instruction while you patch.
  */
 extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern int poke_int3_handler(struct pt_regs *regs);
 extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5d8782e..15e8563 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -605,26 +605,24 @@ static void do_sync_core(void *info)
 static bool bp_patching_in_progress;
 static void *bp_int3_handler, *bp_int3_addr;
 
-static int int3_notify(struct notifier_block *self, unsigned long val, void *data)
+int poke_int3_handler(struct pt_regs *regs)
 {
-	struct die_args *args = data;
-
 	/* bp_patching_in_progress */
 	smp_rmb();
 
 	if (likely(!bp_patching_in_progress))
-		return NOTIFY_DONE;
+		return 0;
 
-	/* we are not interested in non-int3 faults and ring > 0 faults */
-	if (val != DIE_INT3 || !args->regs || user_mode_vm(args->regs)
-			    || args->regs->ip != (unsigned long)bp_int3_addr)
-		return NOTIFY_DONE;
+	if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+		return 0;
 
 	/* set up the specified breakpoint handler */
-	args->regs->ip = (unsigned long) bp_int3_handler;
+	regs->ip = (unsigned long) bp_int3_handler;
+
+	return 1;
 
-	return NOTIFY_STOP;
 }
+
 /**
  * text_poke_bp() -- update instructions on live kernel on SMP
  * @addr:	address to patch
@@ -689,16 +687,3 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 	return addr;
 }
 
-/* this one needs to run before anything else handles it as a
- * regular exception */
-static struct notifier_block int3_nb = {
-	.priority = 0x7fffffff,
-	.notifier_call = int3_notify
-};
-
-static int __init int3_init(void)
-{
-	return register_die_notifier(&int3_nb);
-}
-
-arch_initcall(int3_init);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1b23a1c..8c8093b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -58,6 +58,7 @@
 #include <asm/mce.h>
 #include <asm/fixmap.h>
 #include <asm/mach_traps.h>
+#include <asm/alternative.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -327,6 +328,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
 	    ftrace_int3_handler(regs))
 		return;
 #endif
+	if (poke_int3_handler(regs))
+		return;
+
 	prev_state = exception_enter();
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b58b490..6e33498 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1709,7 +1709,7 @@ EXPORT_SYMBOL_GPL(unregister_kprobes);
 
 static struct notifier_block kprobe_exceptions_nb = {
 	.notifier_call = kprobe_exceptions_notify,
-	.priority = 0x7ffffff0 /* High priority, but not first.  */
+	.priority = 0x7fffffff /* we need to be notified first */
 };
 
 unsigned long __weak arch_deref_entry_point(void *entry)
-- 
cgit v0.10.2


From a5cdd40c9877e9aba704c020fd65d26b5cfecf18 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 16 Jul 2013 17:09:07 +0200
Subject: perf: Update perf_event_type documentation

Due to a discussion with Adrian I had a good look at the perf_event_type record
layout and found the documentation to be somewhat unclear.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130716150907.GL23818@dyad.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 0b1df41..00d8274 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -478,6 +478,16 @@ enum perf_event_type {
 	 * file will be supported by older perf tools, with these new optional
 	 * fields being ignored.
 	 *
+	 * struct sample_id {
+	 * 	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 * 	{ u64			time;     } && PERF_SAMPLE_TIME
+	 * 	{ u64			id;       } && PERF_SAMPLE_ID
+	 * 	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 * 	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 * } && perf_event_attr::sample_id_all
+	 */
+
+	/*
 	 * The MMAP events record the PROT_EXEC mappings so that we can
 	 * correlate userspace IPs to code. They have the following structure:
 	 *
@@ -498,6 +508,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u64				id;
 	 *	u64				lost;
+	 * 	struct sample_id		sample_id;
 	 * };
 	 */
 	PERF_RECORD_LOST			= 2,
@@ -508,6 +519,7 @@ enum perf_event_type {
 	 *
 	 *	u32				pid, tid;
 	 *	char				comm[];
+	 * 	struct sample_id		sample_id;
 	 * };
 	 */
 	PERF_RECORD_COMM			= 3,
@@ -518,6 +530,7 @@ enum perf_event_type {
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
 	 *	u64				time;
+	 * 	struct sample_id		sample_id;
 	 * };
 	 */
 	PERF_RECORD_EXIT			= 4,
@@ -528,6 +541,7 @@ enum perf_event_type {
 	 *	u64				time;
 	 *	u64				id;
 	 *	u64				stream_id;
+	 * 	struct sample_id		sample_id;
 	 * };
 	 */
 	PERF_RECORD_THROTTLE			= 5,
@@ -539,6 +553,7 @@ enum perf_event_type {
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
 	 *	u64				time;
+	 * 	struct sample_id		sample_id;
 	 * };
 	 */
 	PERF_RECORD_FORK			= 7,
@@ -549,6 +564,7 @@ enum perf_event_type {
 	 *	u32				pid, tid;
 	 *
 	 *	struct read_format		values;
+	 * 	struct sample_id		sample_id;
 	 * };
 	 */
 	PERF_RECORD_READ			= 8,
@@ -596,7 +612,7 @@ enum perf_event_type {
 	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
 	 *
 	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
-	 *	{ u64			data_src;     } && PERF_SAMPLE_DATA_SRC
+	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5e2bce9..1274114 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4462,20 +4462,6 @@ void perf_output_sample(struct perf_output_handle *handle,
 		}
 	}
 
-	if (!event->attr.watermark) {
-		int wakeup_events = event->attr.wakeup_events;
-
-		if (wakeup_events) {
-			struct ring_buffer *rb = handle->rb;
-			int events = local_inc_return(&rb->events);
-
-			if (events >= wakeup_events) {
-				local_sub(wakeup_events, &rb->events);
-				local_inc(&rb->wakeup);
-			}
-		}
-	}
-
 	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
 		if (data->br_stack) {
 			size_t size;
@@ -4511,16 +4497,31 @@ void perf_output_sample(struct perf_output_handle *handle,
 		}
 	}
 
-	if (sample_type & PERF_SAMPLE_STACK_USER)
+	if (sample_type & PERF_SAMPLE_STACK_USER) {
 		perf_output_sample_ustack(handle,
 					  data->stack_user_size,
 					  data->regs_user.regs);
+	}
 
 	if (sample_type & PERF_SAMPLE_WEIGHT)
 		perf_output_put(handle, data->weight);
 
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		perf_output_put(handle, data->data_src.val);
+
+	if (!event->attr.watermark) {
+		int wakeup_events = event->attr.wakeup_events;
+
+		if (wakeup_events) {
+			struct ring_buffer *rb = handle->rb;
+			int events = local_inc_return(&rb->events);
+
+			if (events >= wakeup_events) {
+				local_sub(wakeup_events, &rb->events);
+				local_inc(&rb->wakeup);
+			}
+		}
+	}
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
-- 
cgit v0.10.2


From 860f085b74e9f0075de8140ed3a1e5b5e3e39aa8 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 28 Jun 2013 16:22:17 +0300
Subject: perf: Fix broken union in 'struct perf_event_mmap_page'

The capabilities bits must not be "union'ed" together.
Put them in a separate struct.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1372425741-1676-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 00d8274..0041aed 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -375,9 +375,11 @@ struct perf_event_mmap_page {
 	__u64	time_running;		/* time event on cpu */
 	union {
 		__u64	capabilities;
-		__u64	cap_usr_time  : 1,
-			cap_usr_rdpmc : 1,
-			cap_____res   : 62;
+		struct {
+			__u64	cap_usr_time		: 1,
+				cap_usr_rdpmc		: 1,
+				cap_____res		: 62;
+		};
 	};
 
 	/*
-- 
cgit v0.10.2


From c73deb6aecda2955716f31572516f09d930ef450 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 28 Jun 2013 16:22:18 +0300
Subject: perf/x86: Add ability to calculate TSC from perf sample timestamps

For modern CPUs, perf clock is directly related to TSC.  TSC
can be calculated from perf clock and vice versa using a simple
calculation.  Two of the three componenets of that calculation
are already exported in struct perf_event_mmap_page.  This patch
exports the third.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1372425741-1676-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index c91e8b9..235be70 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -49,6 +49,7 @@ extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
+extern int check_tsc_disabled(void);
 extern unsigned long native_calibrate_tsc(void);
 
 extern int tsc_clocksource_reliable;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a7c7305..8355c84 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1884,6 +1884,7 @@ static struct pmu pmu = {
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 {
 	userpg->cap_usr_time = 0;
+	userpg->cap_usr_time_zero = 0;
 	userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
 	userpg->pmc_width = x86_pmu.cntval_bits;
 
@@ -1897,6 +1898,11 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 	userpg->time_mult = this_cpu_read(cyc2ns);
 	userpg->time_shift = CYC2NS_SCALE_FACTOR;
 	userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+
+	if (sched_clock_stable && !check_tsc_disabled()) {
+		userpg->cap_usr_time_zero = 1;
+		userpg->time_zero = this_cpu_read(cyc2ns_offset);
+	}
 }
 
 /*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6ff4924..930e5d4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -89,6 +89,12 @@ int check_tsc_unstable(void)
 }
 EXPORT_SYMBOL_GPL(check_tsc_unstable);
 
+int check_tsc_disabled(void)
+{
+	return tsc_disabled;
+}
+EXPORT_SYMBOL_GPL(check_tsc_disabled);
+
 #ifdef CONFIG_X86_TSC
 int __init notsc_setup(char *str)
 {
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 0041aed..efef1d3 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -378,7 +378,8 @@ struct perf_event_mmap_page {
 		struct {
 			__u64	cap_usr_time		: 1,
 				cap_usr_rdpmc		: 1,
-				cap_____res		: 62;
+				cap_usr_time_zero	: 1,
+				cap_____res		: 61;
 		};
 	};
 
@@ -420,12 +421,29 @@ struct perf_event_mmap_page {
 	__u16	time_shift;
 	__u32	time_mult;
 	__u64	time_offset;
+	/*
+	 * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+	 * from sample timestamps.
+	 *
+	 *   time = timestamp - time_zero;
+	 *   quot = time / time_mult;
+	 *   rem  = time % time_mult;
+	 *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+	 *
+	 * And vice versa:
+	 *
+	 *   quot = cyc >> time_shift;
+	 *   rem  = cyc & ((1 << time_shift) - 1);
+	 *   timestamp = time_zero + quot * time_mult +
+	 *               ((rem * time_mult) >> time_shift);
+	 */
+	__u64	time_zero;
 
 		/*
 		 * Hole for extension of the self monitor capabilities
 		 */
 
-	__u64	__reserved[120];	/* align to 1k */
+	__u64	__reserved[119];	/* align to 1k */
 
 	/*
 	 * Control data for the mmap() data buffer.
-- 
cgit v0.10.2


From 3bd5a5fc8c6b9fe769777abf74b0ab5fbd7930b4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 28 Jun 2013 16:22:19 +0300
Subject: perf tools: Add test for converting perf time to/from TSC

The test uses the newly added cap_usr_time_zero and time_zero of
perf_event_mmap_page.  TSC from rdtsc is compared with the time
from 2 perf events.  The test passes if the calculated times are
all in the correct order.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1372425741-1676-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 024680b..bfd12d0 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -389,6 +389,9 @@ LIB_OBJS += $(OUTPUT)tests/bp_signal.o
 LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
 LIB_OBJS += $(OUTPUT)tests/task-exit.o
 LIB_OBJS += $(OUTPUT)tests/sw-clock.o
+ifeq ($(ARCH),x86)
+LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
+endif
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 815841c..8801fe0 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -6,3 +6,5 @@ ifndef NO_LIBUNWIND
 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o
 endif
 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o
+LIB_H += arch/$(ARCH)/util/tsc.h
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
new file mode 100644
index 0000000..f111744
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -0,0 +1,59 @@
+#include <stdbool.h>
+#include <errno.h>
+
+#include <linux/perf_event.h>
+
+#include "../../perf.h"
+#include "../../util/types.h"
+#include "../../util/debug.h"
+#include "tsc.h"
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
+{
+	u64 time, quot, rem;
+
+	time = ns - tc->time_zero;
+	quot = time / tc->time_mult;
+	rem  = time % tc->time_mult;
+	return (quot << tc->time_shift) +
+	       (rem << tc->time_shift) / tc->time_mult;
+}
+
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
+{
+	u64 quot, rem;
+
+	quot = cyc >> tc->time_shift;
+	rem  = cyc & ((1 << tc->time_shift) - 1);
+	return tc->time_zero + quot * tc->time_mult +
+	       ((rem * tc->time_mult) >> tc->time_shift);
+}
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+			     struct perf_tsc_conversion *tc)
+{
+	bool cap_usr_time_zero;
+	u32 seq;
+	int i = 0;
+
+	while (1) {
+		seq = pc->lock;
+		rmb();
+		tc->time_mult = pc->time_mult;
+		tc->time_shift = pc->time_shift;
+		tc->time_zero = pc->time_zero;
+		cap_usr_time_zero = pc->cap_usr_time_zero;
+		rmb();
+		if (pc->lock == seq && !(seq & 1))
+			break;
+		if (++i > 10000) {
+			pr_debug("failed to get perf_event_mmap_page lock\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!cap_usr_time_zero)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h
new file mode 100644
index 0000000..a24dec8
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.h
@@ -0,0 +1,20 @@
+#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
+#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
+
+#include "../../util/types.h"
+
+struct perf_tsc_conversion {
+	u16 time_shift;
+	u32 time_mult;
+	u64 time_zero;
+};
+
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+			     struct perf_tsc_conversion *tc);
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
+
+#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 35b45f1466..b7b4049 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -93,6 +93,12 @@ static struct test {
 		.desc = "Test software clock events have valid period values",
 		.func = test__sw_clock_freq,
 	},
+#if defined(__x86_64__) || defined(__i386__)
+	{
+		.desc = "Test converting perf time to TSC",
+		.func = test__perf_time_to_tsc,
+	},
+#endif
 	{
 		.func = NULL,
 	},
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
new file mode 100644
index 0000000..0ab61b1
--- /dev/null
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -0,0 +1,177 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/prctl.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+#include "../arch/x86/util/tsc.h"
+
+#define CHECK__(x) {				\
+	while ((x) < 0) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+#define CHECK_NOT_NULL__(x) {			\
+	while ((x) == NULL) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+static u64 rdtsc(void)
+{
+	unsigned int low, high;
+
+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+	return low | ((u64)high) << 32;
+}
+
+/**
+ * test__perf_time_to_tsc - test converting perf time to TSC.
+ *
+ * This function implements a test that checks that the conversion of perf time
+ * to and from TSC is consistent with the order of events.  If the test passes
+ * %0 is returned, otherwise %-1 is returned.  If TSC conversion is not
+ * supported then then the test passes but " (not supported)" is printed.
+ */
+int test__perf_time_to_tsc(void)
+{
+	struct perf_record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.freq		     = 4000,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+		.sample_time	     = true,
+	};
+	struct thread_map *threads = NULL;
+	struct cpu_map *cpus = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel = NULL;
+	int err = -1, ret, i;
+	const char *comm1, *comm2;
+	struct perf_tsc_conversion tc;
+	struct perf_event_mmap_page *pc;
+	union perf_event *event;
+	u64 test_tsc, comm1_tsc, comm2_tsc;
+	u64 test_time, comm1_time = 0, comm2_time = 0;
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	CHECK_NOT_NULL__(threads);
+
+	cpus = cpu_map__new(NULL);
+	CHECK_NOT_NULL__(cpus);
+
+	evlist = perf_evlist__new();
+	CHECK_NOT_NULL__(evlist);
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	CHECK__(parse_events(evlist, "cycles:u"));
+
+	perf_evlist__config(evlist, &opts);
+
+	evsel = perf_evlist__first(evlist);
+
+	evsel->attr.comm = 1;
+	evsel->attr.disabled = 1;
+	evsel->attr.enable_on_exec = 0;
+
+	CHECK__(perf_evlist__open(evlist));
+
+	CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false));
+
+	pc = evlist->mmap[0].base;
+	ret = perf_read_tsc_conversion(pc, &tc);
+	if (ret) {
+		if (ret == -EOPNOTSUPP) {
+			fprintf(stderr, " (not supported)");
+			return 0;
+		}
+		goto out_err;
+	}
+
+	perf_evlist__enable(evlist);
+
+	comm1 = "Test COMM 1";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0));
+
+	test_tsc = rdtsc();
+
+	comm2 = "Test COMM 2";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0));
+
+	perf_evlist__disable(evlist);
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+			struct perf_sample sample;
+
+			if (event->header.type != PERF_RECORD_COMM ||
+			    (pid_t)event->comm.pid != getpid() ||
+			    (pid_t)event->comm.tid != getpid())
+				continue;
+
+			if (strcmp(event->comm.comm, comm1) == 0) {
+				CHECK__(perf_evsel__parse_sample(evsel, event,
+								 &sample));
+				comm1_time = sample.time;
+			}
+			if (strcmp(event->comm.comm, comm2) == 0) {
+				CHECK__(perf_evsel__parse_sample(evsel, event,
+								 &sample));
+				comm2_time = sample.time;
+			}
+		}
+	}
+
+	if (!comm1_time || !comm2_time)
+		goto out_err;
+
+	test_time = tsc_to_perf_time(test_tsc, &tc);
+	comm1_tsc = perf_time_to_tsc(comm1_time, &tc);
+	comm2_tsc = perf_time_to_tsc(comm2_time, &tc);
+
+	pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n",
+		 comm1_time, comm1_tsc);
+	pr_debug("rdtsc          time %"PRIu64" tsc %"PRIu64"\n",
+		 test_time, test_tsc);
+	pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n",
+		 comm2_time, comm2_tsc);
+
+	if (test_time <= comm1_time ||
+	    test_time >= comm2_time)
+		goto out_err;
+
+	if (test_tsc <= comm1_tsc ||
+	    test_tsc >= comm2_tsc)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	if (evlist) {
+		perf_evlist__disable(evlist);
+		perf_evlist__munmap(evlist);
+		perf_evlist__close(evlist);
+		perf_evlist__delete(evlist);
+	}
+	if (cpus)
+		cpu_map__delete(cpus);
+	if (threads)
+		thread_map__delete(threads);
+
+	return err;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 07a92f9..d22202a 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -35,5 +35,6 @@ int test__bp_signal(void);
 int test__bp_signal_overflow(void);
 int test__task_exit(void);
 int test__sw_clock_freq(void);
+int test__perf_time_to_tsc(void);
 
 #endif /* TESTS_H */
-- 
cgit v0.10.2


From 685207963be973fbb73550db6edaf920a283e1a7 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Mon, 15 Jul 2013 17:49:19 +0400
Subject: sched: Move h_load calculation to task_h_load()

The bad thing about update_h_load(), which computes hierarchical load
factor for task groups, is that it is called for each task group in the
system before every load balancer run, and since rebalance can be
triggered very often, this function can eat really a lot of cpu time if
there are many cpu cgroups in the system.

Although the situation was improved significantly by commit a35b646
('sched, cgroup: Reduce rq->lock hold times for large cgroup
hierarchies'), the problem still can arise under some kinds of loads,
e.g. when cpus are switching from idle to busy and back very frequently.

For instance, when I start 1000 of processes that wake up every
millisecond on my 8 cpus host, 'top' and 'perf top' show:

Cpu(s): 17.8%us, 24.3%sy,  0.0%ni, 57.9%id,  0.0%wa,  0.0%hi,  0.0%si
Events: 243K cycles
  7.57%  [kernel]               [k] __schedule
  7.08%  [kernel]               [k] timerqueue_add
  6.13%  libc-2.12.so           [.] usleep

Then if I create 10000 *idle* cpu cgroups (no processes in them), cpu
usage increases significantly although the 'wakers' are still executing
in the root cpu cgroup:

Cpu(s): 19.1%us, 48.7%sy,  0.0%ni, 31.6%id,  0.0%wa,  0.0%hi,  0.7%si
Events: 230K cycles
 24.56%  [kernel]            [k] tg_load_down
  5.76%  [kernel]            [k] __schedule

This happens because this particular kind of load triggers 'new idle'
rebalance very frequently, which requires calling update_h_load(),
which, in turn, calls tg_load_down() for every *idle* cpu cgroup even
though it is absolutely useless, because idle cpu cgroups have no tasks
to pull.

This patch tries to improve the situation by making h_load calculation
proceed only when h_load is really necessary. To achieve this, it
substitutes update_h_load() with update_cfs_rq_h_load(), which computes
h_load only for a given cfs_rq and all its ascendants, and makes the
load balancer call this function whenever it considers if a task should
be pulled, i.e. it moves h_load calculations directly to task_h_load().
For h_load of the same cfs_rq not to be updated multiple times (in case
several tasks in the same cgroup are considered during the same balance
run), the patch keeps the time of the last h_load update for each cfs_rq
and breaks calculation when it finds h_load to be uptodate.

The benefit of it is that h_load is computed only for those cfs_rq's,
which really need it, in particular all idle task groups are skipped.
Although this, in fact, moves h_load calculation under rq lock, it
should not affect latency much, because the amount of work done under rq
lock while trying to pull tasks is limited by sched_nr_migrate.

After the patch applied with the setup described above (1000 wakers in
the root cgroup and 10000 idle cgroups), I get:

Cpu(s): 16.9%us, 24.8%sy,  0.0%ni, 58.4%id,  0.0%wa,  0.0%hi,  0.0%si
Events: 242K cycles
  7.57%  [kernel]                  [k] __schedule
  6.70%  [kernel]                  [k] timerqueue_add
  5.93%  libc-2.12.so              [.] usleep

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1373896159-1278-1-git-send-email-vdavydov@parallels.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bb456f4..765d87a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4171,47 +4171,48 @@ static void update_blocked_averages(int cpu)
 }
 
 /*
- * Compute the cpu's hierarchical load factor for each task group.
+ * Compute the hierarchical load factor for cfs_rq and all its ascendants.
  * This needs to be done in a top-down fashion because the load of a child
  * group is a fraction of its parents load.
  */
-static int tg_load_down(struct task_group *tg, void *data)
+static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
 {
-	unsigned long load;
-	long cpu = (long)data;
-
-	if (!tg->parent) {
-		load = cpu_rq(cpu)->avg.load_avg_contrib;
-	} else {
-		load = tg->parent->cfs_rq[cpu]->h_load;
-		load = div64_ul(load * tg->se[cpu]->avg.load_avg_contrib,
-				tg->parent->cfs_rq[cpu]->runnable_load_avg + 1);
-	}
-
-	tg->cfs_rq[cpu]->h_load = load;
-
-	return 0;
-}
-
-static void update_h_load(long cpu)
-{
-	struct rq *rq = cpu_rq(cpu);
+	struct rq *rq = rq_of(cfs_rq);
+	struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
 	unsigned long now = jiffies;
+	unsigned long load;
 
-	if (rq->h_load_throttle == now)
+	if (cfs_rq->last_h_load_update == now)
 		return;
 
-	rq->h_load_throttle = now;
+	cfs_rq->h_load_next = NULL;
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+		cfs_rq->h_load_next = se;
+		if (cfs_rq->last_h_load_update == now)
+			break;
+	}
 
-	rcu_read_lock();
-	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
-	rcu_read_unlock();
+	if (!se) {
+		cfs_rq->h_load = rq->avg.load_avg_contrib;
+		cfs_rq->last_h_load_update = now;
+	}
+
+	while ((se = cfs_rq->h_load_next) != NULL) {
+		load = cfs_rq->h_load;
+		load = div64_ul(load * se->avg.load_avg_contrib,
+				cfs_rq->runnable_load_avg + 1);
+		cfs_rq = group_cfs_rq(se);
+		cfs_rq->h_load = load;
+		cfs_rq->last_h_load_update = now;
+	}
 }
 
 static unsigned long task_h_load(struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
 
+	update_cfs_rq_h_load(cfs_rq);
 	return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load,
 			cfs_rq->runnable_load_avg + 1);
 }
@@ -4220,10 +4221,6 @@ static inline void update_blocked_averages(int cpu)
 {
 }
 
-static inline void update_h_load(long cpu)
-{
-}
-
 static unsigned long task_h_load(struct task_struct *p)
 {
 	return p->se.avg.load_avg_contrib;
@@ -5108,7 +5105,6 @@ redo:
 		env.src_rq    = busiest;
 		env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
-		update_h_load(env.src_cpu);
 more_balance:
 		local_irq_save(flags);
 		double_rq_lock(env.dst_rq, busiest);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ef0a7b2..5e129ef 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -285,7 +285,6 @@ struct cfs_rq {
 	/* Required to track per-cpu representation of a task_group */
 	u32 tg_runnable_contrib;
 	unsigned long tg_load_contrib;
-#endif /* CONFIG_FAIR_GROUP_SCHED */
 
 	/*
 	 *   h_load = weight * f(tg)
@@ -294,6 +293,9 @@ struct cfs_rq {
 	 * this group.
 	 */
 	unsigned long h_load;
+	u64 last_h_load_update;
+	struct sched_entity *h_load_next;
+#endif /* CONFIG_FAIR_GROUP_SCHED */
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -429,9 +431,6 @@ struct rq {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* list of leaf cfs_rq on this cpu: */
 	struct list_head leaf_cfs_rq_list;
-#ifdef CONFIG_SMP
-	unsigned long h_load_throttle;
-#endif /* CONFIG_SMP */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
 #ifdef CONFIG_RT_GROUP_SCHED
-- 
cgit v0.10.2


From 62470419e993f8d9d93db0effd3af4296ecb79a5 Mon Sep 17 00:00:00 2001
From: Michael Wang <wangyun@linux.vnet.ibm.com>
Date: Thu, 4 Jul 2013 12:55:51 +0800
Subject: sched: Implement smarter wake-affine logic

The wake-affine scheduler feature is currently always trying to pull
the wakee close to the waker. In theory this should be beneficial if
the waker's CPU caches hot data for the wakee, and it's also beneficial
in the extreme ping-pong high context switch rate case.

Testing shows it can benefit hackbench up to 15%.

However, the feature is somewhat blind, from which some workloads
such as pgbench suffer. It's also time-consuming algorithmically.

Testing shows it can damage pgbench up to 50% - far more than the
benefit it brings in the best case.

So wake-affine should be smarter and it should realize when to
stop its thankless effort at trying to find a suitable CPU to wake on.

This patch introduces 'wakee_flips', which will be increased each
time the task flips (switches) its wakee target.

So a high 'wakee_flips' value means the task has more than one
wakee, and the bigger the number, the higher the wakeup frequency.

Now when making the decision on whether to pull or not, pay attention to
the wakee with a high 'wakee_flips', pulling such a task may benefit
the wakee. Also imply that the waker will face cruel competition later,
it could be very cruel or very fast depends on the story behind
'wakee_flips', waker therefore suffers.

Furthermore, if waker also has a high 'wakee_flips', that implies that
multiple tasks rely on it, then waker's higher latency will damage all
of them, so pulling wakee seems to be a bad deal.

Thus, when 'waker->wakee_flips / wakee->wakee_flips' becomes
higher and higher, the cost of pulling seems to be worse and worse.

The patch therefore helps the wake-affine feature to stop its pulling
work when:

	wakee->wakee_flips > factor &&
	waker->wakee_flips > (factor * wakee->wakee_flips)

The 'factor' here is the number of CPUs in the current CPU's NUMA node,
so a bigger node will lead to more pulling since the trial becomes more
severe.

After applying the patch, pgbench shows up to 40% improvements and no regressions.

Tested with 12 cpu x86 server and tip 3.10.0-rc7.

The percentages in the final column highlight the areas with the biggest wins,
all other areas improved as well:

	pgbench		    base	smart

	| db_size | clients |  tps  |	|  tps  |
	+---------+---------+-------+   +-------+
	| 22 MB   |       1 | 10598 |   | 10796 |
	| 22 MB   |       2 | 21257 |   | 21336 |
	| 22 MB   |       4 | 41386 |   | 41622 |
	| 22 MB   |       8 | 51253 |   | 57932 |
	| 22 MB   |      12 | 48570 |   | 54000 |
	| 22 MB   |      16 | 46748 |   | 55982 | +19.75%
	| 22 MB   |      24 | 44346 |   | 55847 | +25.93%
	| 22 MB   |      32 | 43460 |   | 54614 | +25.66%
	| 7484 MB |       1 |  8951 |   |  9193 |
	| 7484 MB |       2 | 19233 |   | 19240 |
	| 7484 MB |       4 | 37239 |   | 37302 |
	| 7484 MB |       8 | 46087 |   | 50018 |
	| 7484 MB |      12 | 42054 |   | 48763 |
	| 7484 MB |      16 | 40765 |   | 51633 | +26.66%
	| 7484 MB |      24 | 37651 |   | 52377 | +39.11%
	| 7484 MB |      32 | 37056 |   | 51108 | +37.92%
	| 15 GB   |       1 |  8845 |   |  9104 |
	| 15 GB   |       2 | 19094 |   | 19162 |
	| 15 GB   |       4 | 36979 |   | 36983 |
	| 15 GB   |       8 | 46087 |   | 49977 |
	| 15 GB   |      12 | 41901 |   | 48591 |
	| 15 GB   |      16 | 40147 |   | 50651 | +26.16%
	| 15 GB   |      24 | 37250 |   | 52365 | +40.58%
	| 15 GB   |      32 | 36470 |   | 50015 | +37.14%

Signed-off-by: Michael Wang <wangyun@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/51D50057.9000809@linux.vnet.ibm.com
[ Improved the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d04b9..4f163a8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1034,6 +1034,9 @@ struct task_struct {
 #ifdef CONFIG_SMP
 	struct llist_node wake_entry;
 	int on_cpu;
+	struct task_struct *last_wakee;
+	unsigned long wakee_flips;
+	unsigned long wakee_flip_decay_ts;
 #endif
 	int on_rq;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 765d87a..860063a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3017,6 +3017,23 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 	return 0;
 }
 
+static void record_wakee(struct task_struct *p)
+{
+	/*
+	 * Rough decay (wiping) for cost saving, don't worry
+	 * about the boundary, really active task won't care
+	 * about the loss.
+	 */
+	if (jiffies > current->wakee_flip_decay_ts + HZ) {
+		current->wakee_flips = 0;
+		current->wakee_flip_decay_ts = jiffies;
+	}
+
+	if (current->last_wakee != p) {
+		current->last_wakee = p;
+		current->wakee_flips++;
+	}
+}
 
 static void task_waking_fair(struct task_struct *p)
 {
@@ -3037,6 +3054,7 @@ static void task_waking_fair(struct task_struct *p)
 #endif
 
 	se->vruntime -= min_vruntime;
+	record_wakee(p);
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -3155,6 +3173,28 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 
 #endif
 
+static int wake_wide(struct task_struct *p)
+{
+	int factor = nr_cpus_node(cpu_to_node(smp_processor_id()));
+
+	/*
+	 * Yeah, it's the switching-frequency, could means many wakee or
+	 * rapidly switch, use factor here will just help to automatically
+	 * adjust the loose-degree, so bigger node will lead to more pull.
+	 */
+	if (p->wakee_flips > factor) {
+		/*
+		 * wakee is somewhat hot, it needs certain amount of cpu
+		 * resource, so if waker is far more hot, prefer to leave
+		 * it alone.
+		 */
+		if (current->wakee_flips > (factor * p->wakee_flips))
+			return 1;
+	}
+
+	return 0;
+}
+
 static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 {
 	s64 this_load, load;
@@ -3164,6 +3204,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 	unsigned long weight;
 	int balanced;
 
+	/*
+	 * If we wake multiple tasks be careful to not bounce
+	 * ourselves around too much.
+	 */
+	if (wake_wide(p))
+		return 0;
+
 	idx	  = sd->wake_idx;
 	this_cpu  = smp_processor_id();
 	prev_cpu  = task_cpu(p);
-- 
cgit v0.10.2


From 7d9ffa8961482232d964173cccba6e14d2d543b2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 4 Jul 2013 12:56:46 +0800
Subject: sched: Micro-optimize the smart wake-affine logic

Smart wake-affine is using node-size as the factor currently, but the overhead
of the mask operation is high.

Thus, this patch introduce the 'sd_llc_size' percpu variable, which will record
the highest cache-share domain size, and make it to be the new factor, in order
to reduce the overhead and make it more reasonable.

Tested-by: Davidlohr Bueso <davidlohr.bueso@hp.com>
Tested-by: Michael Wang <wangyun@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Michael Wang <wangyun@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Link: http://lkml.kernel.org/r/51D5008E.6030102@linux.vnet.ibm.com
[ Tidied up the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7c32cb..6df0fbe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5083,18 +5083,23 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
  * two cpus are in the same cache domain, see cpus_share_cache().
  */
 DEFINE_PER_CPU(struct sched_domain *, sd_llc);
+DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
 
 static void update_top_cache_domain(int cpu)
 {
 	struct sched_domain *sd;
 	int id = cpu;
+	int size = 1;
 
 	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
-	if (sd)
+	if (sd) {
 		id = cpumask_first(sched_domain_span(sd));
+		size = cpumask_weight(sched_domain_span(sd));
+	}
 
 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
+	per_cpu(sd_llc_size, cpu) = size;
 	per_cpu(sd_llc_id, cpu) = id;
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 860063a..f237437 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3175,7 +3175,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 
 static int wake_wide(struct task_struct *p)
 {
-	int factor = nr_cpus_node(cpu_to_node(smp_processor_id()));
+	int factor = this_cpu_read(sd_llc_size);
 
 	/*
 	 * Yeah, it's the switching-frequency, could means many wakee or
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5e129ef..4c1cb80 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -594,6 +594,7 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
 }
 
 DECLARE_PER_CPU(struct sched_domain *, sd_llc);
+DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
 
 struct sched_group_power {
-- 
cgit v0.10.2


From 6050cb0b0b366092d1383bc23d7b16cd26db00f0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 23 Jul 2013 02:30:59 +0200
Subject: perf: Fix branch stack refcount leak on callchain init failure

On callchain buffers allocation failure, free_event() is
called and all the accounting performed in perf_event_alloc()
for that event is cancelled.

But if the event has branch stack sampling, it is unaccounted
as well from the branch stack sampling events refcounts.

This is a bug because this accounting is performed after the
callchain buffer allocation. As a result, the branch stack sampling
events refcount can become negative.

To fix this, move the branch stack event accounting before the
callchain buffer allocation.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374539466-4799-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1274114..f35aa7e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6567,6 +6567,12 @@ done:
 			atomic_inc(&nr_comm_events);
 		if (event->attr.task)
 			atomic_inc(&nr_task_events);
+		if (has_branch_stack(event)) {
+			static_key_slow_inc(&perf_sched_events.key);
+			if (!(event->attach_state & PERF_ATTACH_TASK))
+				atomic_inc(&per_cpu(perf_branch_stack_events,
+						    event->cpu));
+		}
 		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 			err = get_callchain_buffers();
 			if (err) {
@@ -6574,12 +6580,6 @@ done:
 				return ERR_PTR(err);
 			}
 		}
-		if (has_branch_stack(event)) {
-			static_key_slow_inc(&perf_sched_events.key);
-			if (!(event->attach_state & PERF_ATTACH_TASK))
-				atomic_inc(&per_cpu(perf_branch_stack_events,
-						    event->cpu));
-		}
 	}
 
 	return event;
-- 
cgit v0.10.2


From 90983b16078ab0fdc58f0dab3e8e3da79c9579a2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 23 Jul 2013 02:31:00 +0200
Subject: perf: Sanitize get_callchain_buffer()

In case of allocation failure, get_callchain_buffer() keeps the
refcount incremented for the current event.

As a result, when get_callchain_buffers() returns an error,
we must cleanup what it did by cancelling its last refcount
with a call to put_callchain_buffers().

This is a hack in order to be able to call free_event()
after that failure.

The original purpose of that was to simplify the failure
path. But this error handling is actually counter intuitive,
ugly and not very easy to follow because one expect to
see the resources used to perform a service to be cleaned
by the callee if case of failure, not by the caller.

So lets clean this up by cancelling the refcount from
get_callchain_buffer() in case of failure. And correctly free
the event accordingly in perf_event_alloc().

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374539466-4799-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index c772061..76a8bc5 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -117,6 +117,8 @@ int get_callchain_buffers(void)
 	err = alloc_callchain_buffers();
 exit:
 	mutex_unlock(&callchain_mutex);
+	if (err)
+		atomic_dec(&nr_callchain_events);
 
 	return err;
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f35aa7e..3b99862 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6457,7 +6457,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	struct pmu *pmu;
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
-	long err;
+	long err = -EINVAL;
 
 	if ((unsigned)cpu >= nr_cpu_ids) {
 		if (!task || cpu != -1)
@@ -6540,25 +6540,23 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	 * we currently do not support PERF_FORMAT_GROUP on inherited events
 	 */
 	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
-		goto done;
+		goto err_ns;
 
 	pmu = perf_init_event(event);
-
-done:
-	err = 0;
 	if (!pmu)
-		err = -EINVAL;
-	else if (IS_ERR(pmu))
+		goto err_ns;
+	else if (IS_ERR(pmu)) {
 		err = PTR_ERR(pmu);
-
-	if (err) {
-		if (event->ns)
-			put_pid_ns(event->ns);
-		kfree(event);
-		return ERR_PTR(err);
+		goto err_ns;
 	}
 
 	if (!event->parent) {
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+			err = get_callchain_buffers();
+			if (err)
+				goto err_pmu;
+		}
+
 		if (event->attach_state & PERF_ATTACH_TASK)
 			static_key_slow_inc(&perf_sched_events.key);
 		if (event->attr.mmap || event->attr.mmap_data)
@@ -6573,16 +6571,19 @@ done:
 				atomic_inc(&per_cpu(perf_branch_stack_events,
 						    event->cpu));
 		}
-		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
-			err = get_callchain_buffers();
-			if (err) {
-				free_event(event);
-				return ERR_PTR(err);
-			}
-		}
 	}
 
 	return event;
+
+err_pmu:
+	if (event->destroy)
+		event->destroy(event);
+err_ns:
+	if (event->ns)
+		put_pid_ns(event->ns);
+	kfree(event);
+
+	return ERR_PTR(err);
 }
 
 static int perf_copy_attr(struct perf_event_attr __user *uattr,
-- 
cgit v0.10.2


From 766d6c076928191d75ad5b0d0f58f52b1e7682d8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 23 Jul 2013 02:31:01 +0200
Subject: perf: Factor out event accounting code to
 account_event()/__free_event()

Gather all the event accounting code to a single place,
once all the prerequisites are completed. This simplifies
the refcounting.

Original-patch-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374539466-4799-4-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3b99862..158fd57 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3128,6 +3128,21 @@ static void free_event_rcu(struct rcu_head *head)
 static void ring_buffer_put(struct ring_buffer *rb);
 static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
 
+static void __free_event(struct perf_event *event)
+{
+	if (!event->parent) {
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+			put_callchain_buffers();
+	}
+
+	if (event->destroy)
+		event->destroy(event);
+
+	if (event->ctx)
+		put_ctx(event->ctx);
+
+	call_rcu(&event->rcu_head, free_event_rcu);
+}
 static void free_event(struct perf_event *event)
 {
 	irq_work_sync(&event->pending);
@@ -3141,8 +3156,6 @@ static void free_event(struct perf_event *event)
 			atomic_dec(&nr_comm_events);
 		if (event->attr.task)
 			atomic_dec(&nr_task_events);
-		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-			put_callchain_buffers();
 		if (is_cgroup_event(event)) {
 			atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
 			static_key_slow_dec_deferred(&perf_sched_events);
@@ -3180,13 +3193,8 @@ static void free_event(struct perf_event *event)
 	if (is_cgroup_event(event))
 		perf_detach_cgroup(event);
 
-	if (event->destroy)
-		event->destroy(event);
-
-	if (event->ctx)
-		put_ctx(event->ctx);
 
-	call_rcu(&event->rcu_head, free_event_rcu);
+	__free_event(event);
 }
 
 int perf_event_release_kernel(struct perf_event *event)
@@ -6443,6 +6451,29 @@ unlock:
 	return pmu;
 }
 
+static void account_event(struct perf_event *event)
+{
+	if (event->attach_state & PERF_ATTACH_TASK)
+		static_key_slow_inc(&perf_sched_events.key);
+	if (event->attr.mmap || event->attr.mmap_data)
+		atomic_inc(&nr_mmap_events);
+	if (event->attr.comm)
+		atomic_inc(&nr_comm_events);
+	if (event->attr.task)
+		atomic_inc(&nr_task_events);
+	if (has_branch_stack(event)) {
+		static_key_slow_inc(&perf_sched_events.key);
+		if (!(event->attach_state & PERF_ATTACH_TASK))
+			atomic_inc(&per_cpu(perf_branch_stack_events,
+					    event->cpu));
+	}
+
+	if (is_cgroup_event(event)) {
+		atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
+		static_key_slow_inc(&perf_sched_events.key);
+	}
+}
+
 /*
  * Allocate and initialize a event structure
  */
@@ -6556,21 +6587,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 			if (err)
 				goto err_pmu;
 		}
-
-		if (event->attach_state & PERF_ATTACH_TASK)
-			static_key_slow_inc(&perf_sched_events.key);
-		if (event->attr.mmap || event->attr.mmap_data)
-			atomic_inc(&nr_mmap_events);
-		if (event->attr.comm)
-			atomic_inc(&nr_comm_events);
-		if (event->attr.task)
-			atomic_inc(&nr_task_events);
-		if (has_branch_stack(event)) {
-			static_key_slow_inc(&perf_sched_events.key);
-			if (!(event->attach_state & PERF_ATTACH_TASK))
-				atomic_inc(&per_cpu(perf_branch_stack_events,
-						    event->cpu));
-		}
 	}
 
 	return event;
@@ -6865,17 +6881,14 @@ SYSCALL_DEFINE5(perf_event_open,
 
 	if (flags & PERF_FLAG_PID_CGROUP) {
 		err = perf_cgroup_connect(pid, event, &attr, group_leader);
-		if (err)
-			goto err_alloc;
-		/*
-		 * one more event:
-		 * - that has cgroup constraint on event->cpu
-		 * - that may need work on context switch
-		 */
-		atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
-		static_key_slow_inc(&perf_sched_events.key);
+		if (err) {
+			__free_event(event);
+			goto err_task;
+		}
 	}
 
+	account_event(event);
+
 	/*
 	 * Special case software events and allow them to be part of
 	 * any hardware group.
@@ -7071,6 +7084,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err;
 	}
 
+	account_event(event);
+
 	ctx = find_get_context(event->pmu, task, cpu);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
-- 
cgit v0.10.2


From 4beb31f3657348a8b702dd014d01c520e522012f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 23 Jul 2013 02:31:02 +0200
Subject: perf: Split the per-cpu accounting part of the event accounting code

This way we can use the per-cpu handling seperately.
This is going to be used by to fix the event migration
code accounting.

Original-patch-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374539466-4799-5-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 158fd57..3a4b73a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3128,6 +3128,40 @@ static void free_event_rcu(struct rcu_head *head)
 static void ring_buffer_put(struct ring_buffer *rb);
 static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
 
+static void unaccount_event_cpu(struct perf_event *event, int cpu)
+{
+	if (event->parent)
+		return;
+
+	if (has_branch_stack(event)) {
+		if (!(event->attach_state & PERF_ATTACH_TASK))
+			atomic_dec(&per_cpu(perf_branch_stack_events, cpu));
+	}
+	if (is_cgroup_event(event))
+		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
+}
+
+static void unaccount_event(struct perf_event *event)
+{
+	if (event->parent)
+		return;
+
+	if (event->attach_state & PERF_ATTACH_TASK)
+		static_key_slow_dec_deferred(&perf_sched_events);
+	if (event->attr.mmap || event->attr.mmap_data)
+		atomic_dec(&nr_mmap_events);
+	if (event->attr.comm)
+		atomic_dec(&nr_comm_events);
+	if (event->attr.task)
+		atomic_dec(&nr_task_events);
+	if (is_cgroup_event(event))
+		static_key_slow_dec_deferred(&perf_sched_events);
+	if (has_branch_stack(event))
+		static_key_slow_dec_deferred(&perf_sched_events);
+
+	unaccount_event_cpu(event, event->cpu);
+}
+
 static void __free_event(struct perf_event *event)
 {
 	if (!event->parent) {
@@ -3147,29 +3181,7 @@ static void free_event(struct perf_event *event)
 {
 	irq_work_sync(&event->pending);
 
-	if (!event->parent) {
-		if (event->attach_state & PERF_ATTACH_TASK)
-			static_key_slow_dec_deferred(&perf_sched_events);
-		if (event->attr.mmap || event->attr.mmap_data)
-			atomic_dec(&nr_mmap_events);
-		if (event->attr.comm)
-			atomic_dec(&nr_comm_events);
-		if (event->attr.task)
-			atomic_dec(&nr_task_events);
-		if (is_cgroup_event(event)) {
-			atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
-			static_key_slow_dec_deferred(&perf_sched_events);
-		}
-
-		if (has_branch_stack(event)) {
-			static_key_slow_dec_deferred(&perf_sched_events);
-			/* is system-wide event */
-			if (!(event->attach_state & PERF_ATTACH_TASK)) {
-				atomic_dec(&per_cpu(perf_branch_stack_events,
-						    event->cpu));
-			}
-		}
-	}
+	unaccount_event(event);
 
 	if (event->rb) {
 		struct ring_buffer *rb;
@@ -6451,8 +6463,24 @@ unlock:
 	return pmu;
 }
 
+static void account_event_cpu(struct perf_event *event, int cpu)
+{
+	if (event->parent)
+		return;
+
+	if (has_branch_stack(event)) {
+		if (!(event->attach_state & PERF_ATTACH_TASK))
+			atomic_inc(&per_cpu(perf_branch_stack_events, cpu));
+	}
+	if (is_cgroup_event(event))
+		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
+}
+
 static void account_event(struct perf_event *event)
 {
+	if (event->parent)
+		return;
+
 	if (event->attach_state & PERF_ATTACH_TASK)
 		static_key_slow_inc(&perf_sched_events.key);
 	if (event->attr.mmap || event->attr.mmap_data)
@@ -6461,17 +6489,12 @@ static void account_event(struct perf_event *event)
 		atomic_inc(&nr_comm_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
-	if (has_branch_stack(event)) {
+	if (has_branch_stack(event))
 		static_key_slow_inc(&perf_sched_events.key);
-		if (!(event->attach_state & PERF_ATTACH_TASK))
-			atomic_inc(&per_cpu(perf_branch_stack_events,
-					    event->cpu));
-	}
-
-	if (is_cgroup_event(event)) {
-		atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
+	if (is_cgroup_event(event))
 		static_key_slow_inc(&perf_sched_events.key);
-	}
+
+	account_event_cpu(event, event->cpu);
 }
 
 /*
-- 
cgit v0.10.2


From 9a545de019b536771feefb76f85e5038b65c2190 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 23 Jul 2013 02:31:03 +0200
Subject: perf: Migrate per cpu event accounting

When an event is migrated, move the event per-cpu
accounting accordingly so that branch stack and cgroup
events work correctly on the new CPU.

Original-patch-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374539466-4799-6-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3a4b73a..63bdec9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7145,6 +7145,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 	list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
 				 event_entry) {
 		perf_remove_from_context(event);
+		unaccount_event_cpu(event, src_cpu);
 		put_ctx(src_ctx);
 		list_add(&event->event_entry, &events);
 	}
@@ -7157,6 +7158,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 		list_del(&event->event_entry);
 		if (event->state >= PERF_EVENT_STATE_OFF)
 			event->state = PERF_EVENT_STATE_INACTIVE;
+		account_event_cpu(event, dst_cpu);
 		perf_install_in_context(dst_ctx, event, dst_cpu);
 		get_ctx(dst_ctx);
 	}
-- 
cgit v0.10.2


From ba8a75c16e292c0a3a87406a77508cbbc6cf4ee2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 23 Jul 2013 02:31:04 +0200
Subject: perf: Account freq events per cpu

This is going to be used by the full dynticks subsystem
as a finer-grained information to know when to keep and
when to stop the tick.

Original-patch-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374539466-4799-7-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 63bdec9..3fe385a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -141,6 +141,7 @@ enum event_type_t {
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
+static DEFINE_PER_CPU(atomic_t, perf_freq_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -3139,6 +3140,9 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 	}
 	if (is_cgroup_event(event))
 		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
+
+	if (event->attr.freq)
+		atomic_dec(&per_cpu(perf_freq_events, cpu));
 }
 
 static void unaccount_event(struct perf_event *event)
@@ -6474,6 +6478,9 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 	}
 	if (is_cgroup_event(event))
 		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
+
+	if (event->attr.freq)
+		atomic_inc(&per_cpu(perf_freq_events, cpu));
 }
 
 static void account_event(struct perf_event *event)
-- 
cgit v0.10.2


From d84153d6c96f61aa06429586284639f32debf03e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 23 Jul 2013 02:31:05 +0200
Subject: perf: Implement finer grained full dynticks kick

Currently the full dynticks subsystem keep the
tick alive as long as there are perf events running.

This prevents the tick from being stopped as long as features
such that the lockup detectors are running. As a temporary fix,
the lockup detector is disabled by default when full dynticks
is built but this is not a long term viable solution.

To fix this, only keep the tick alive when an event configured
with a frequency rather than a period is running on the CPU,
or when an event throttles on the CPU.

These are the only purposes of the perf tick, especially now that
the rotation of flexible events is handled from a seperate hrtimer.
The tick can be shutdown the rest of the time.

Original-patch-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374539466-4799-8-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3fe385a..916cf1f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -870,12 +870,8 @@ static void perf_pmu_rotate_start(struct pmu *pmu)
 
 	WARN_ON(!irqs_disabled());
 
-	if (list_empty(&cpuctx->rotation_list)) {
-		int was_empty = list_empty(head);
+	if (list_empty(&cpuctx->rotation_list))
 		list_add(&cpuctx->rotation_list, head);
-		if (was_empty)
-			tick_nohz_full_kick();
-	}
 }
 
 static void get_ctx(struct perf_event_context *ctx)
@@ -1875,6 +1871,9 @@ static int  __perf_install_in_context(void *info)
 	perf_pmu_enable(cpuctx->ctx.pmu);
 	perf_ctx_unlock(cpuctx, task_ctx);
 
+	if (atomic_read(&__get_cpu_var(perf_freq_events)))
+		tick_nohz_full_kick();
+
 	return 0;
 }
 
@@ -2812,10 +2811,11 @@ done:
 #ifdef CONFIG_NO_HZ_FULL
 bool perf_event_can_stop_tick(void)
 {
-	if (list_empty(&__get_cpu_var(rotation_list)))
-		return true;
-	else
+	if (atomic_read(&__get_cpu_var(perf_freq_events)) ||
+	    __this_cpu_read(perf_throttled_count))
 		return false;
+	else
+		return true;
 }
 #endif
 
@@ -5202,6 +5202,7 @@ static int __perf_event_overflow(struct perf_event *event,
 			__this_cpu_inc(perf_throttled_count);
 			hwc->interrupts = MAX_INTERRUPTS;
 			perf_log_throttle(event, 0);
+			tick_nohz_full_kick();
 			ret = 1;
 		}
 	}
-- 
cgit v0.10.2


From 93786a5f6aeb9c032c1c240246c5aabcf457b38f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 23 Jul 2013 02:31:06 +0200
Subject: watchdog: Make it work under full dynticks

A perf event can be used without forcing the tick to
stay alive if it doesn't use a frequency but a sample
period and if it doesn't throttle (raise storm of events).

Since the lockup detector neither use a perf event frequency
nor should ever throttle due to its high period, it can now
run concurrently with the full dynticks feature.

So remove the hack that disabled the watchdog.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Anish Singh <anish198519851985@gmail.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374539466-4799-9-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1241d8c..51c4f34 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -553,14 +553,6 @@ void __init lockup_detector_init(void)
 {
 	set_sample_period();
 
-#ifdef CONFIG_NO_HZ_FULL
-	if (watchdog_user_enabled) {
-		watchdog_user_enabled = 0;
-		pr_warning("Disabled lockup detectors by default for full dynticks\n");
-		pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n");
-	}
-#endif
-
 	if (watchdog_user_enabled)
 		watchdog_enable_all_cpus();
 }
-- 
cgit v0.10.2


From cf4957f17f2a89984915ea808876d9c82225b862 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 24 Oct 2012 13:37:58 +0200
Subject: perf: Add PERF_EVENT_IOC_ID ioctl to return event ID

The only way to get the event ID is by reading the event fd,
followed by parsing the ID value out of the returned data.

While this is ok for current read format used by perf tool,
it is not ok when we use PERF_FORMAT_GROUP format.

With this format the data are returned for the whole group
and there's no way to find out what ID belongs to our fd
(if we are not group leader event).

Adding a simple ioctl that returns event primary ID for given fd.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-v1bn5cto707jn0bon34afqr1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index efef1d3..62c25a2 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -321,6 +321,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID		_IOR('$', 7, u64 *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 916cf1f..5200b60 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3568,6 +3568,15 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_EVENT_IOC_PERIOD:
 		return perf_event_period(event, (u64 __user *)arg);
 
+	case PERF_EVENT_IOC_ID:
+	{
+		u64 id = primary_event_id(event);
+
+		if (copy_to_user((void __user *)arg, &id, sizeof(id)))
+			return -EFAULT;
+		return 0;
+	}
+
 	case PERF_EVENT_IOC_SET_OUTPUT:
 	{
 		int ret;
-- 
cgit v0.10.2


From 6f5ab0019fd328b50a8488c9e5193fc1dbd8d6ed Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 15 Oct 2012 20:13:45 +0200
Subject: perf: Do not get values from disabled counters in group format read

It's possible some of the counters in the group could be
disabled when sampling member of the event group is reading
the rest via PERF_SAMPLE_READ sample type processing. Disabled
counters could then produce wrong numbers.

Fixing that by reading only enabled counters for PERF_SAMPLE_READ
sample type processing.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-wwkjb0bbcuslnz0klrmqi26r@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5200b60..e82e700 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4388,7 +4388,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
 
-		if (sub != event)
+		if ((sub != event) &&
+		    (sub->state == PERF_EVENT_STATE_ACTIVE))
 			sub->pmu->read(sub);
 
 		values[n++] = perf_event_count(sub);
-- 
cgit v0.10.2


From e2b5abe0c82b45980b95ead22678861a2013c0df Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 4 Apr 2012 19:32:27 +0200
Subject: perf evlist: Use PERF_EVENT_IOC_ID perf ioctl to read event id

Changing the way we retrieve the event ID. Instead of parsing out
the ID out of the read data, using the PERF_EVENT_IOC_ID ioctl.

Keeping the old way in place to support kernels without
PERF_EVENT_IOC_ID ioctl support.

This will be useful for retrieving the event ID for events
with PERF_FORMAT_GROUP read format set, where it's impossible
to get correct event id out of the read call data.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-psgb4n7kte8e6tfenbe7nj2h@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 42ea4e9..0d3b739 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -302,6 +302,17 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 {
 	u64 read_data[4] = { 0, };
 	int id_idx = 1; /* The first entry is the counter value */
+	u64 id;
+	int ret;
+
+	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
+	if (!ret)
+		goto add;
+
+	if (errno != ENOTTY)
+		return -1;
+
+	/* Legacy way to get event id.. All hail to old kernels! */
 
 	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
 	    read(fd, &read_data, sizeof(read_data)) == -1)
@@ -312,7 +323,10 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		++id_idx;
 
-	perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]);
+	id = read_data[id_idx];
+
+ add:
+	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 9ede473cc9f655f8a10cfc8ebbf04c48d84db7ee Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 10 Oct 2012 17:38:13 +0200
Subject: perf tools: Add support for parsing PERF_SAMPLE_READ sample type

Adding support to parse out the PERF_SAMPLE_READ sample bits.  The code
contains both single and group format specification.

This code parse out and prepare PERF_SAMPLE_READ data into the
perf_sample struct. It will be used for group leader sampling feature
comming in shortly.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-0tgdoln5rwk3wocshb442cl3@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1ebb8fb..6119a64 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -80,6 +80,23 @@ struct stack_dump {
 	char *data;
 };
 
+struct sample_read_value {
+	u64 value;
+	u64 id;
+};
+
+struct sample_read {
+	u64 time_enabled;
+	u64 time_running;
+	union {
+		struct {
+			u64 nr;
+			struct sample_read_value *values;
+		} group;
+		struct sample_read_value one;
+	};
+};
+
 struct perf_sample {
 	u64 ip;
 	u32 pid, tid;
@@ -97,6 +114,7 @@ struct perf_sample {
 	struct branch_stack *branch_stack;
 	struct regs_dump  user_regs;
 	struct stack_dump user_stack;
+	struct sample_read read;
 };
 
 #define PERF_MEM_DATA_SRC_NONE \
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 0d3b739..df77a44 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -676,6 +676,32 @@ u64 perf_evlist__sample_type(struct perf_evlist *evlist)
 	return first->attr.sample_type;
 }
 
+bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
+	u64 read_format = first->attr.read_format;
+	u64 sample_type = first->attr.sample_type;
+
+	list_for_each_entry_continue(pos, &evlist->entries, node) {
+		if (read_format != pos->attr.read_format)
+			return false;
+	}
+
+	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
+	if ((sample_type & PERF_SAMPLE_READ) &&
+	    !(read_format & PERF_FORMAT_ID)) {
+		return false;
+	}
+
+	return true;
+}
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+	return first->attr.read_format;
+}
+
 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
 {
 	struct perf_evsel *first = perf_evlist__first(evlist);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 0583d36..c7178b7 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -118,6 +118,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist);
 void __perf_evlist__set_leader(struct list_head *list);
 void perf_evlist__set_leader(struct perf_evlist *evlist);
 
+u64 perf_evlist__read_format(struct perf_evlist *evlist);
 u64 perf_evlist__sample_type(struct perf_evlist *evlist);
 bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
@@ -127,6 +128,7 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even
 
 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist);
 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist);
+bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
 
 void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
 				   struct list_head *list,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8bed0c1..9ab8fff 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1096,8 +1096,34 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	}
 
 	if (type & PERF_SAMPLE_READ) {
-		fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
-		return -1;
+		u64 read_format = evsel->attr.read_format;
+
+		if (read_format & PERF_FORMAT_GROUP)
+			data->read.group.nr = *array;
+		else
+			data->read.one.value = *array;
+
+		array++;
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			data->read.time_enabled = *array;
+			array++;
+		}
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+			data->read.time_running = *array;
+			array++;
+		}
+
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			data->read.group.values = (struct sample_read_value *) array;
+			array = (void *) array + data->read.group.nr *
+				sizeof(struct sample_read_value);
+		} else {
+			data->read.one.id = *array;
+			array++;
+		}
 	}
 
 	if (type & PERF_SAMPLE_CALLCHAIN) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 272c9cf..f082921 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -71,6 +71,11 @@ static int perf_session__open(struct perf_session *self, bool force)
 		goto out_close;
 	}
 
+	if (!perf_evlist__valid_read_format(self->evlist)) {
+		pr_err("non matching read_format");
+		goto out_close;
+	}
+
 	self->size = input_stat.st_size;
 	return 0;
 
@@ -749,6 +754,36 @@ static void perf_session__print_tstamp(struct perf_session *session,
 		printf("%" PRIu64 " ", sample->time);
 }
 
+static void sample_read__printf(struct perf_sample *sample, u64 read_format)
+{
+	printf("... sample_read:\n");
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		printf("...... time enabled %016" PRIx64 "\n",
+		       sample->read.time_enabled);
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		printf("...... time running %016" PRIx64 "\n",
+		       sample->read.time_running);
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		u64 i;
+
+		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);
+
+		for (i = 0; i < sample->read.group.nr; i++) {
+			struct sample_read_value *value;
+
+			value = &sample->read.group.values[i];
+			printf("..... id %016" PRIx64
+			       ", value %016" PRIx64 "\n",
+			       value->id, value->value);
+		}
+	} else
+		printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
+			sample->read.one.id, sample->read.one.value);
+}
+
 static void dump_event(struct perf_session *session, union perf_event *event,
 		       u64 file_offset, struct perf_sample *sample)
 {
@@ -798,6 +833,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
 
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		sample_read__printf(sample, evsel->attr.read_format);
 }
 
 static struct machine *
-- 
cgit v0.10.2


From c4861afe30aa3b00c95f9389f24cf6ede88416e4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 12 Oct 2012 13:02:21 +0200
Subject: perf evlist: Fix event ID retrieval for group format read case

We need to fail the event ID retrieval in case both following conditions
are true:

  - we are on kernel with no PERF_EVENT_IOC_ID support
  - PERF_FORMAT_GROUP read format is set

The PERF_FORMAT_GROUP read format bit is the killer for retrieving event
ID out of the read syscall, because we have no guarantee of the event
placement within leader kernel sibling list.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-e93pgyj20rqx48qzw10vj4r4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index df77a44..10fcc03 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -314,6 +314,13 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 
 	/* Legacy way to get event id.. All hail to old kernels! */
 
+	/*
+	 * This way does not work with group format read, so bail
+	 * out in that case.
+	 */
+	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
+		return -1;
+
 	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
 	    read(fd, &read_data, sizeof(read_data)) == -1)
 		return -1;
-- 
cgit v0.10.2


From 932a35940a3f03613796ab4855ecbb214dbdc0c2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 11 Oct 2012 14:10:35 +0200
Subject: perf evlist: Add perf_evlist__id2sid method to get event ID related
 data

This will be helpful for PERF_FORMAT_GROUP samples where we need to
store ID related period value for each event.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-twmlgsbyim97p7cyohjwb1df@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 10fcc03..da2dd92 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -337,21 +337,32 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 	return 0;
 }
 
-struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
 {
 	struct hlist_head *head;
 	struct perf_sample_id *sid;
 	int hash;
 
-	if (evlist->nr_entries == 1)
-		return perf_evlist__first(evlist);
-
 	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
 	head = &evlist->heads[hash];
 
 	hlist_for_each_entry(sid, head, node)
 		if (sid->id == id)
-			return sid->evsel;
+			return sid;
+
+	return NULL;
+}
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+{
+	struct perf_sample_id *sid;
+
+	if (evlist->nr_entries == 1)
+		return perf_evlist__first(evlist);
+
+	sid = perf_evlist__id2sid(evlist, id);
+	if (sid)
+		return sid->evsel;
 
 	if (!perf_evlist__sample_id_all(evlist))
 		return perf_evlist__first(evlist);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c7178b7..327abab 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -78,6 +78,8 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
 
 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
 
+struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
+
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
-- 
cgit v0.10.2


From e4caec0d1af3d608d52e6b92d09fb862d7691d4b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 10 Oct 2012 18:52:24 +0200
Subject: perf evsel: Add PERF_SAMPLE_READ sample related processing

For sample with sample type PERF_SAMPLE_READ the period value is stored
in the 'struct sample_read'.

Moreover if the read format has PERF_FORMAT_GROUP, the 'struct
sample_read' contains period values for all events in the group (for
which the sample's event is a leader).

We deliver separated samples for all the values contained within the
'struct sample_read'.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-6mdm5xkrm6kypouh1c33cyys@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3f156cc..6a2cf26 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -38,6 +38,9 @@ struct perf_sample_id {
 	struct hlist_node 	node;
 	u64		 	id;
 	struct perf_evsel	*evsel;
+
+	/* Holds total ID period value for PERF_SAMPLE_READ processing. */
+	u64			period;
 };
 
 /** struct perf_evsel - event selector
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f082921..9247d9c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -860,6 +860,75 @@ static struct machine *
 	return &session->machines.host;
 }
 
+static int deliver_sample_value(struct perf_session *session,
+				struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct sample_read_value *v,
+				struct machine *machine)
+{
+	struct perf_sample_id *sid;
+
+	sid = perf_evlist__id2sid(session->evlist, v->id);
+	if (sid) {
+		sample->id     = v->id;
+		sample->period = v->value - sid->period;
+		sid->period    = v->value;
+	}
+
+	if (!sid || sid->evsel == NULL) {
+		++session->stats.nr_unknown_id;
+		return 0;
+	}
+
+	return tool->sample(tool, event, sample, sid->evsel, machine);
+}
+
+static int deliver_sample_group(struct perf_session *session,
+				struct perf_tool *tool,
+				union  perf_event *event,
+				struct perf_sample *sample,
+				struct machine *machine)
+{
+	int ret = -EINVAL;
+	u64 i;
+
+	for (i = 0; i < sample->read.group.nr; i++) {
+		ret = deliver_sample_value(session, tool, event, sample,
+					   &sample->read.group.values[i],
+					   machine);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int
+perf_session__deliver_sample(struct perf_session *session,
+			     struct perf_tool *tool,
+			     union  perf_event *event,
+			     struct perf_sample *sample,
+			     struct perf_evsel *evsel,
+			     struct machine *machine)
+{
+	/* We know evsel != NULL. */
+	u64 sample_type = evsel->attr.sample_type;
+	u64 read_format = evsel->attr.read_format;
+
+	/* Standard sample delievery. */
+	if (!(sample_type & PERF_SAMPLE_READ))
+		return tool->sample(tool, event, sample, evsel, machine);
+
+	/* For PERF_SAMPLE_READ we have either single or group mode. */
+	if (read_format & PERF_FORMAT_GROUP)
+		return deliver_sample_group(session, tool, event, sample,
+					    machine);
+	else
+		return deliver_sample_value(session, tool, event, sample,
+					    &sample->read.one, machine);
+}
+
 static int perf_session_deliver_event(struct perf_session *session,
 				      union perf_event *event,
 				      struct perf_sample *sample,
@@ -902,7 +971,8 @@ static int perf_session_deliver_event(struct perf_session *session,
 			++session->stats.nr_unprocessable_samples;
 			return 0;
 		}
-		return tool->sample(tool, event, sample, evsel, machine);
+		return perf_session__deliver_sample(session, tool, event,
+						    sample, evsel, machine);
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
 	case PERF_RECORD_COMM:
-- 
cgit v0.10.2


From 3c1763115b492afb743daa4e1c8099eca6a70634 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 10 Oct 2012 17:39:03 +0200
Subject: perf tools: Add 'S' event/group modifier to read sample value

Adding 'S' event/group modifier to specify that the event value/s are
read by PERF_SAMPLE_READ sample type processing, instead of the period
value offered by lower layers.

There's additional behaviour change for 'S' modifier being specified on
event group:

Currently all the events within a group makes samples. If user now
specifies 'S' within group modifier, only the leader will trigger
samples. The rest of events in the group will have sampling disabled.

And same as for single events, values of all events within the group
(including leader) are read by PERF_SAMPLE_READ sample type processing.

Following example will create event group with cycles and cache-misses
events, setting the cycles as group leader and the only event to
actually sample. Both cycles and cache-misses event period values are
read by PERF_SAMPLE_READ sample type processing with PERF_FORMAT_GROUP
read format.

Example:

  $ perf record -e '{cycles,cache-misses}:S' ls
  ...
  $ perf report --group --show-total-period --stdio
  ...
  # Samples: 36  of event 'anon group { cycles, cache-misses }'
  # Event count (approx.): 12585593
  #
  #       Overhead          Period  Command      Shared Object                      Symbol
  # ..............  ..............  .......  .................  ..........................
  #
    19.92%   1.20%  2505936     31       ls  [kernel.kallsyms]  [k] mark_held_locks
    13.74%   0.47%  1729327     12       ls  [kernel.kallsyms]  [k] sched_clock_local
    13.64%  23.72%  1716147    612       ls  ld-2.14.90.so      [.] check_match.10805
    13.12%  23.22%  1650778    599       ls  libc-2.14.90.so    [.] _nl_intern_locale_data
    11.24%  29.19%  1414554    753       ls  [kernel.kallsyms]  [k] sched_clock_cpu
     8.50%   0.35%  1070150      9       ls  [kernel.kallsyms]  [k] check_chain_key
  ...

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-iyoinu3axi11mymwnh2b7fxj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 826f3d6..eb03f06 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -29,6 +29,7 @@ counted. The following modifiers exist:
  G - guest counting (in KVM guests)
  H - host counting (not in KVM guests)
  p - precise level
+ S - read sample value (PERF_SAMPLE_READ)
 
 The 'p' modifier can be used for specifying how precise the instruction
 address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9ab8fff..8f10161 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -490,6 +490,7 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
 void perf_evsel__config(struct perf_evsel *evsel,
 			struct perf_record_opts *opts)
 {
+	struct perf_evsel *leader = evsel->leader;
 	struct perf_event_attr *attr = &evsel->attr;
 	int track = !evsel->idx; /* only the first counter needs these */
 
@@ -499,6 +500,25 @@ void perf_evsel__config(struct perf_evsel *evsel,
 	perf_evsel__set_sample_bit(evsel, IP);
 	perf_evsel__set_sample_bit(evsel, TID);
 
+	if (evsel->sample_read) {
+		perf_evsel__set_sample_bit(evsel, READ);
+
+		/*
+		 * We need ID even in case of single event, because
+		 * PERF_SAMPLE_READ process ID specific data.
+		 */
+		perf_evsel__set_sample_id(evsel);
+
+		/*
+		 * Apply group format only if we belong to group
+		 * with more than one members.
+		 */
+		if (leader->nr_members > 1) {
+			attr->read_format |= PERF_FORMAT_GROUP;
+			attr->inherit = 0;
+		}
+	}
+
 	/*
 	 * We default some events to a 1 default interval. But keep
 	 * it a weak assumption overridable by the user.
@@ -514,6 +534,15 @@ void perf_evsel__config(struct perf_evsel *evsel,
 		}
 	}
 
+	/*
+	 * Disable sampling for all group members other
+	 * than leader in case leader 'leads' the sampling.
+	 */
+	if ((leader != evsel) && leader->sample_read) {
+		attr->sample_freq   = 0;
+		attr->sample_period = 0;
+	}
+
 	if (opts->no_samples)
 		attr->sample_freq = 0;
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6a2cf26..5edc625 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -79,6 +79,7 @@ struct perf_evsel {
 	/* parse modifier helper */
 	int			exclude_GH;
 	int			nr_members;
+	int			sample_read;
 	struct perf_evsel	*leader;
 	char			*group_name;
 };
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2c460ed..dba877d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -687,6 +687,7 @@ struct event_modifier {
 	int eG;
 	int precise;
 	int exclude_GH;
+	int sample_read;
 };
 
 static int get_event_modifier(struct event_modifier *mod, char *str,
@@ -698,6 +699,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	int eH = evsel ? evsel->attr.exclude_host : 0;
 	int eG = evsel ? evsel->attr.exclude_guest : 0;
 	int precise = evsel ? evsel->attr.precise_ip : 0;
+	int sample_read = 0;
 
 	int exclude = eu | ek | eh;
 	int exclude_GH = evsel ? evsel->exclude_GH : 0;
@@ -730,6 +732,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 			/* use of precise requires exclude_guest */
 			if (!exclude_GH)
 				eG = 1;
+		} else if (*str == 'S') {
+			sample_read = 1;
 		} else
 			break;
 
@@ -756,6 +760,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	mod->eG = eG;
 	mod->precise = precise;
 	mod->exclude_GH = exclude_GH;
+	mod->sample_read = sample_read;
 	return 0;
 }
 
@@ -768,7 +773,7 @@ static int check_modifier(char *str)
 	char *p = str;
 
 	/* The sizeof includes 0 byte as well. */
-	if (strlen(str) > (sizeof("ukhGHppp") - 1))
+	if (strlen(str) > (sizeof("ukhGHpppS") - 1))
 		return -1;
 
 	while (*p) {
@@ -806,6 +811,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
 		evsel->attr.exclude_host   = mod.eH;
 		evsel->attr.exclude_guest  = mod.eG;
 		evsel->exclude_GH          = mod.exclude_GH;
+		evsel->sample_read         = mod.sample_read;
 	}
 
 	return 0;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index e9d1134..b36115f 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -82,7 +82,7 @@ num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?]*
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?]*
-modifier_event	[ukhpGH]+
+modifier_event	[ukhpGHS]+
 modifier_bp	[rwx]{1,3}
 
 %%
-- 
cgit v0.10.2


From 8404db63461af62025f32f8368861fb33604e62f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 1 Feb 2013 19:33:31 +0100
Subject: perf tests: Add attr record group sampling test

Adding test to validate perf_event_attr data for command:

  'record -e '{cycles,cache-misses}:S'

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-9eppxvhkly6gse5ximudckrp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
new file mode 100644
index 0000000..658f5d6
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -0,0 +1,36 @@
+[config]
+command = record
+args    = -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=343
+read_format=12
+inherit=0
+
+[event-2:base-record]
+fd=2
+group_fd=1
+
+# cache-misses
+type=0
+config=3
+
+# default | PERF_SAMPLE_READ
+sample_type=343
+
+# PERF_FORMAT_ID | PERF_FORMAT_GROUP
+read_format=12
+
+mmap=0
+comm=0
+enable_on_exec=0
+disabled=0
+
+# inherit is disabled for group sampling
+inherit=0
+
+# sampling disabled
+sample_freq=0
+sample_period=0
-- 
cgit v0.10.2


From a9f93f97424c64f8d5d94b653a2133e491498680 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 1 Feb 2013 20:37:11 +0100
Subject: perf tests: Add parse events tests for leader sampling

Adding 2 more tests to the automated parse events suite for following
event config:

  '{cycles,cache-misses,branch-misses}:S'
  '{instructions,branch-misses}:Su'

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-tmcy0ir7i8id2t54qg5ifbio@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 344c844..b46379c 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -520,6 +520,7 @@ static int test__group1(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:upp */
 	evsel = perf_evsel__next(evsel);
@@ -535,6 +536,7 @@ static int test__group1(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
 }
@@ -560,6 +562,7 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cache-references + :u modifier */
 	evsel = perf_evsel__next(evsel);
@@ -574,6 +577,7 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:k */
 	evsel = perf_evsel__next(evsel);
@@ -587,6 +591,7 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
 }
@@ -615,6 +620,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 		!strcmp(leader->group_name, "group1"));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group1 cycles:kppp */
 	evsel = perf_evsel__next(evsel);
@@ -631,6 +637,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group2 cycles + G modifier */
 	evsel = leader = perf_evsel__next(evsel);
@@ -648,6 +655,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 		!strcmp(leader->group_name, "group2"));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group2 1:3 + G modifier */
 	evsel = perf_evsel__next(evsel);
@@ -661,6 +669,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:u */
 	evsel = perf_evsel__next(evsel);
@@ -674,6 +683,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
 }
@@ -701,6 +711,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:kp + p */
 	evsel = perf_evsel__next(evsel);
@@ -716,6 +727,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
 }
@@ -742,6 +754,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions + G */
 	evsel = perf_evsel__next(evsel);
@@ -756,6 +769,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:G */
 	evsel = leader = perf_evsel__next(evsel);
@@ -772,6 +786,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:G */
 	evsel = perf_evsel__next(evsel);
@@ -963,6 +978,98 @@ static int test__group_gh4(struct perf_evlist *evlist)
 	return 0;
 }
 
+static int test__leader_sample1(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+	/* cycles - sampling group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* cache-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* branch-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	return 0;
+}
+
+static int test__leader_sample2(struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+
+	/* instructions - sampling group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* branch-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	return 0;
+}
+
 static int count_tracepoints(void)
 {
 	char events_path[PATH_MAX];
@@ -1179,6 +1286,14 @@ static struct evlist_test test__events[] = {
 		.name  = "{cycles:G,cache-misses:H}:uG",
 		.check = test__group_gh4,
 	},
+	[38] = {
+		.name  = "{cycles,cache-misses,branch-misses}:S",
+		.check = test__leader_sample1,
+	},
+	[39] = {
+		.name  = "{instructions,branch-misses}:Su",
+		.check = test__leader_sample2,
+	},
 };
 
 static struct evlist_test test__events_pmu[] = {
-- 
cgit v0.10.2


From 251f426fddd9217ce6e4478653d3ee33df518030 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 28 Jul 2013 09:14:34 -0600
Subject: perf evsel: Actually show symbol offset in stack trace when requested

Symbol offset is one of the fields that can be requested in perf-script.
Currently you do not get that data when requested. e.g.,

perf script -f comm,tid,pid,time,cpu,sym,symoff,ip
...
gcc  6201/6201  [006] 762250.617897:
    ffffffff81090d95 update_curr
    ffffffff810911b8 dequeue_entity
    ffffffff81091825 dequeue_task_fair
    ffffffff81087163 dequeue_task
    ffffffff81087c03 deactivate_task
...

With this patch you get the offset:
...
gcc  6201/6201  [006] 762250.617897:
    ffffffff81090d95 update_curr+0x1c5
    ffffffff810911b8 dequeue_entity+0x28
    ffffffff81091825 dequeue_task_fair+0x45
    ffffffff81087163 dequeue_task+0x93
    ffffffff81087c03 deactivate_task+0x23
...

Signed-off-by: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1375024474-45726-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9247d9c..a0ce5a4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1519,8 +1519,13 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 			printf("\t%16" PRIx64, node->ip);
 			if (print_sym) {
 				printf(" ");
-				symbol__fprintf_symname(node->sym, stdout);
+				if (print_symoffset) {
+					al.addr = node->ip;
+					symbol__fprintf_symname_offs(node->sym, &al, stdout);
+				} else
+					symbol__fprintf_symname(node->sym, stdout);
 			}
+
 			if (print_dso) {
 				printf(" (");
 				map__fprintf_dsoname(node->map, stdout);
-- 
cgit v0.10.2


From 602bab1b883090ffd125ed1253fe8ec127c048b1 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 26 Jul 2013 08:27:23 -0600
Subject: perf tools: Fix compile of util/tsc.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Fedora 18, with gcc 4.6.4 compile fails with:

arch/x86/util/tsc.c: In function ‘perf_time_to_tsc’:
arch/x86/util/tsc.c:13:6: error: declaration of ‘time’ shadows a global declaration [-Werror=shadow]
cc1: all warnings being treated as errors
make: *** [/tmp/junk/arch/x86/util/tsc.o] Error 1
make: *** Waiting for unfinished jobs....

Fix by renaming the local variable.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Link: http://lkml.kernel.org/r/1374848843-43127-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index f111744..9570c2b 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -10,11 +10,11 @@
 
 u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
 {
-	u64 time, quot, rem;
+	u64 t, quot, rem;
 
-	time = ns - tc->time_zero;
-	quot = time / tc->time_mult;
-	rem  = time % tc->time_mult;
+	t = ns - tc->time_zero;
+	quot = t / tc->time_mult;
+	rem  = t % tc->time_mult;
 	return (quot << tc->time_shift) +
 	       (rem << tc->time_shift) / tc->time_mult;
 }
-- 
cgit v0.10.2


From a14bb860a38ff5b4aa3db20f251fef43e447a7c9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Jul 2013 16:38:23 -0300
Subject: perf trace: Beautify 'connect' result

It is an errno, so print an error string.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-zt68gijvvoe8gd7kmclo43si@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0e4b67f..da7ae01 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -18,6 +18,7 @@ static struct syscall_fmt {
 } syscall_fmts[] = {
 	{ .name	    = "access",	    .errmsg = true, },
 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
+	{ .name	    = "connect",    .errmsg = true, },
 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
 	{ .name	    = "futex",	    .errmsg = true, },
-- 
cgit v0.10.2


From 3223565cdf856fa07024e5db7ca24e1b1b38d1db Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Aug 2013 17:00:45 -0300
Subject: perf python: Remove duplicate TID bit from mask

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thiago Peixoto <thiagolcpeixoto@gmail.com>
Link: http://lkml.kernel.org/n/tip-jurgz6myq125o1ql6lldh6f7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index b11cca5..2225162 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -21,7 +21,7 @@ def main():
 	evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
 			   wakeup_events = 1, watermark = 1,
 			   sample_id_all = 1,
-			   sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
+			   sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU)
 	evsel.open(cpus = cpus, threads = threads);
 	evlist = perf.evlist(cpus, threads)
 	evlist.add(evsel)
-- 
cgit v0.10.2


From 3b47abe1b5f5446ab41a3a139b6075f46f77f21f Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 10:50:29 +0900
Subject: perf util: Add parse_nsec_time() function

The parse_nsec_time() function is for parsing a string of time into
64-bit nsec value.  It's a preparation of time filtering in some of perf
commands.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: David Ahern <dsahern@gmail.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1370310629-9642-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 9a06584..6d17b18 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -328,3 +328,36 @@ void put_tracing_file(char *file)
 {
 	free(file);
 }
+
+int parse_nsec_time(const char *str, u64 *ptime)
+{
+	u64 time_sec, time_nsec;
+	char *end;
+
+	time_sec = strtoul(str, &end, 10);
+	if (*end != '.' && *end != '\0')
+		return -1;
+
+	if (*end == '.') {
+		int i;
+		char nsec_buf[10];
+
+		if (strlen(++end) > 9)
+			return -1;
+
+		strncpy(nsec_buf, end, 9);
+		nsec_buf[9] = '\0';
+
+		/* make it nsec precision */
+		for (i = strlen(nsec_buf); i < 9; i++)
+			nsec_buf[i] = '0';
+
+		time_nsec = strtoul(nsec_buf, &end, 10);
+		if (*end != '\0')
+			return -1;
+	} else
+		time_nsec = 0;
+
+	*ptime = time_sec * NSEC_PER_SEC + time_nsec;
+	return 0;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index cc1574e..a535359 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -208,6 +208,8 @@ static inline int has_extension(const char *filename, const char *ext)
 #define NSEC_PER_MSEC	1000000L
 #endif
 
+int parse_nsec_time(const char *str, u64 *ptime);
+
 extern unsigned char sane_ctype[256];
 #define GIT_SPACE		0x01
 #define GIT_DIGIT		0x02
-- 
cgit v0.10.2


From 42ee8c61cb1b7fe7b7ad5071bfb3c609cb0620ca Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Aug 2013 14:05:39 -0600
Subject: perf top: move CONSOLE_CLEAR to header file

For use with kvm-live mode.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375473947-64285-2-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bbf4635..9101f7c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -238,8 +238,6 @@ out_unlock:
 	pthread_mutex_unlock(&notes->lock);
 }
 
-static const char		CONSOLE_CLEAR[] = "[H[2J";
-
 static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 						     struct addr_location *al,
 						     struct perf_sample *sample)
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index df46be9..b554ffc 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -39,6 +39,8 @@ struct perf_top {
 	float		   min_percent;
 };
 
+#define CONSOLE_CLEAR "[H[2J"
+
 size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
 void perf_top__reset_sample_counters(struct perf_top *top);
 #endif /* __PERF_TOP_H */
-- 
cgit v0.10.2


From ffe4f3c0d109dc53e1b3448ac457052107f34a84 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Aug 2013 14:05:40 -0600
Subject: perf stats: Add max and min stats

Need an initialization function to set min to -1 to
differentiate from an actual min of 0.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375473947-64285-3-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 7c59c28..6506b3d 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -10,6 +10,12 @@ void update_stats(struct stats *stats, u64 val)
 	delta = val - stats->mean;
 	stats->mean += delta / stats->n;
 	stats->M2 += delta*(val - stats->mean);
+
+	if (val > stats->max)
+		stats->max = val;
+
+	if (val < stats->min)
+		stats->min = val;
 }
 
 double avg_stats(struct stats *stats)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 588367c..ae8ccd7 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -6,6 +6,7 @@
 struct stats
 {
 	double n, mean, M2;
+	u64 max, min;
 };
 
 void update_stats(struct stats *stats, u64 val);
@@ -13,4 +14,12 @@ double avg_stats(struct stats *stats);
 double stddev_stats(struct stats *stats);
 double rel_stddev_stats(double stddev, double avg);
 
+static inline void init_stats(struct stats *stats)
+{
+	stats->n    = 0.0;
+	stats->mean = 0.0;
+	stats->M2   = 0.0;
+	stats->min  = (u64) -1;
+	stats->max  = 0;
+}
 #endif
-- 
cgit v0.10.2


From 9c5014022f5d5b09abc8b713da81b3d2db319699 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Aug 2013 14:05:41 -0600
Subject: perf session: Export a few functions for event processing

Allows kvm live mode to reuse the event processing and ordered samples
processing used by the perf-report path.

v2: removed flush_sample_queue as noticed by Jiri

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375473947-64285-4-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index a0ce5a4..b5ebd47 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -250,7 +250,7 @@ static int process_finished_round(struct perf_tool *tool,
 				  union perf_event *event,
 				  struct perf_session *session);
 
-static void perf_tool__fill_defaults(struct perf_tool *tool)
+void perf_tool__fill_defaults(struct perf_tool *tool)
 {
 	if (tool->sample == NULL)
 		tool->sample = process_event_sample_stub;
@@ -495,7 +495,7 @@ static int perf_session_deliver_event(struct perf_session *session,
 				      u64 file_offset);
 
 static int flush_sample_queue(struct perf_session *s,
-			       struct perf_tool *tool)
+		       struct perf_tool *tool)
 {
 	struct ordered_samples *os = &s->ordered_samples;
 	struct list_head *head = &os->samples;
@@ -1049,10 +1049,10 @@ static void event_swap(union perf_event *event, bool sample_id_all)
 		swap(event, sample_id_all);
 }
 
-static int perf_session__process_event(struct perf_session *session,
-				       union perf_event *event,
-				       struct perf_tool *tool,
-				       u64 file_offset)
+int perf_session__process_event(struct perf_session *session,
+				union perf_event *event,
+				struct perf_tool *tool,
+				u64 file_offset)
 {
 	struct perf_sample sample;
 	int ret;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index ad8d3d4..9818fc2 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -56,6 +56,13 @@ int __perf_session__process_events(struct perf_session *self,
 int perf_session__process_events(struct perf_session *self,
 				 struct perf_tool *tool);
 
+int perf_session__process_event(struct perf_session *session,
+				union perf_event *event,
+				struct perf_tool *tool,
+				u64 file_offset);
+
+void perf_tool__fill_defaults(struct perf_tool *tool);
+
 int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel,
 				    struct thread *thread,
 				    struct ip_callchain *chain,
-- 
cgit v0.10.2


From 8fdd84c44fd09d783caa4fb81d2d680b0cf07eeb Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Aug 2013 14:05:42 -0600
Subject: perf kvm: Split out tracepoints from record args

Needed by kvm live command. Make record_args a local while we are
messing with the args.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375473947-64285-5-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 24b78ae..7d14a3a 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -801,16 +801,11 @@ exit:
 	return ret;
 }
 
-static const char * const record_args[] = {
-	"record",
-	"-R",
-	"-f",
-	"-m", "1024",
-	"-c", "1",
-	"-e", "kvm:kvm_entry",
-	"-e", "kvm:kvm_exit",
-	"-e", "kvm:kvm_mmio",
-	"-e", "kvm:kvm_pio",
+static const char * const kvm_events_tp[] = {
+	"kvm:kvm_entry",
+	"kvm:kvm_exit",
+	"kvm:kvm_mmio",
+	"kvm:kvm_pio",
 };
 
 #define STRDUP_FAIL_EXIT(s)		\
@@ -826,8 +821,16 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 {
 	unsigned int rec_argc, i, j;
 	const char **rec_argv;
+	const char * const record_args[] = {
+		"record",
+		"-R",
+		"-f",
+		"-m", "1024",
+		"-c", "1",
+	};
 
-	rec_argc = ARRAY_SIZE(record_args) + argc + 2;
+	rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
+		   2 * ARRAY_SIZE(kvm_events_tp);
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
 	if (rec_argv == NULL)
@@ -836,6 +839,11 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
 
+	for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
+	}
+
 	rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
 	rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
 
-- 
cgit v0.10.2


From 5c6974f49832a55edc9ca744323778947c104ca0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 Aug 2013 17:41:09 -0700
Subject: perf evlist: Remove obsolete dummy execve

Minor cleanup.

The dummy execve to pre-resolve the PLT is obsolete since
"enable_on_execve" was added. The counters are only
running after the execve anyways. So just remove it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375490473-1503-2-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index da2dd92..c7d111f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -837,13 +837,6 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
 		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
 
 		/*
-		 * Do a dummy execvp to get the PLT entry resolved,
-		 * so we avoid the resolver overhead on the real
-		 * execvp call.
-		 */
-		execvp("", (char **)argv);
-
-		/*
 		 * Tell the parent we're ready to go
 		 */
 		close(child_ready_pipe[1]);
-- 
cgit v0.10.2


From e2407bef968d64a28465561832686636d3380bf9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 Aug 2013 17:41:10 -0700
Subject: perf evsel: Add support for enabling counters

Add support for enabling already set up counters by using an
ioctl. I share some code with the filter setup.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375490473-1503-3-git-send-email-andi@firstfloor.org
[ Fixed up 'err' variable indentation ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8f10161..960394e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -634,15 +634,15 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 	return evsel->fd != NULL ? 0 : -ENOMEM;
 }
 
-int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
-			   const char *filter)
+static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthreads,
+			  int ioc,  void *arg)
 {
 	int cpu, thread;
 
 	for (cpu = 0; cpu < ncpus; cpu++) {
 		for (thread = 0; thread < nthreads; thread++) {
 			int fd = FD(evsel, cpu, thread),
-			    err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter);
+			    err = ioctl(fd, ioc, arg);
 
 			if (err)
 				return err;
@@ -652,6 +652,21 @@ int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 	return 0;
 }
 
+int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
+			   const char *filter)
+{
+	return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
+				     PERF_EVENT_IOC_SET_FILTER,
+				     (void *)filter);
+}
+
+int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+	return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
+				     PERF_EVENT_IOC_ENABLE,
+				     0);
+}
+
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
 {
 	evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5edc625..532a5f9 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -146,6 +146,7 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel);
 
 int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			   const char *filter);
+int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
 			     struct cpu_map *cpus);
-- 
cgit v0.10.2


From 411916880ff4061ac0491a154f10af4d49a0c61a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 Aug 2013 17:41:11 -0700
Subject: perf stat: Add support for --initial-delay option

When measuring workloads the startup phase -- doing page faults, dynamic
linking, opening files -- is often very different from the rest of the
workload.  Especially with smaller kernels and using counter
multiplexing this can give significant measurement errors.

Multiplexing assumes that the workload is mostly the same over longer
periods. But at startup there is typically some spike of activity which
is relatively short.  If many groups are multiplexing the one group
seeing the spike, and which is then scaled up over the time to run all
groups, may see a significant error.

Also in general it's often not useful to measure the startup, because it
is so different from the rest.

One way around this is to use interval mode and discard the first
sample, but this can be awkward because interval mode doesn't support
intervals of less than 100ms, and also a useful interval is not
necessarily the same as a useful startup delay.

This patch adds a new --initial-delay / -D option to skip measuring for
the startup phase. The time can be specified in ms

Here's a simple example:

perf stat -e page-faults bash -c 'for i in $(seq 100000) ; do true ; done'
...
             3,721 page-faults
...

If we just wait 20 ms the number of page faults is 1/3 less:

perf stat -D 20 -e page-faults bash -c 'for i in $(seq 100000) ; do true ; done'
...
             2,823 page-faults
...

So we filtered out most of the startup noise from bash.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375490473-1503-4-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..73c9759 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical processor.
 
+-D msecs::
+--initial-delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..2e637e4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -100,6 +100,7 @@ static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
 static unsigned int		interval			= 0;
+static unsigned int		initial_delay			= 0;
 static bool			forever				= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
@@ -254,7 +255,8 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 	if (!perf_target__has_task(&target) &&
 	    perf_evsel__is_group_leader(evsel)) {
 		attr->disabled = 1;
-		attr->enable_on_exec = 1;
+		if (!initial_delay)
+			attr->enable_on_exec = 1;
 	}
 
 	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
@@ -416,6 +418,20 @@ static void print_interval(void)
 	}
 }
 
+static void handle_initial_delay(void)
+{
+	struct perf_evsel *counter;
+
+	if (initial_delay) {
+		const int ncpus = cpu_map__nr(evsel_list->cpus),
+			nthreads = thread_map__nr(evsel_list->threads);
+
+		usleep(initial_delay * 1000);
+		list_for_each_entry(counter, &evsel_list->entries, node)
+			perf_evsel__enable(counter, ncpus, nthreads);
+	}
+}
+
 static int __run_perf_stat(int argc, const char **argv)
 {
 	char msg[512];
@@ -486,6 +502,7 @@ static int __run_perf_stat(int argc, const char **argv)
 
 	if (forks) {
 		perf_evlist__start_workload(evsel_list);
+		handle_initial_delay();
 
 		if (interval) {
 			while (!waitpid(child_pid, &status, WNOHANG)) {
@@ -497,6 +514,7 @@ static int __run_perf_stat(int argc, const char **argv)
 		if (WIFSIGNALED(status))
 			psignal(WTERMSIG(status), argv[0]);
 	} else {
+		handle_initial_delay();
 		while (!done) {
 			nanosleep(&ts, NULL);
 			if (interval)
@@ -1419,6 +1437,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "aggregate counts per processor socket", AGGR_SOCKET),
 	OPT_SET_UINT(0, "per-core", &aggr_mode,
 		     "aggregate counts per physical processor core", AGGR_CORE),
+	OPT_UINTEGER('D', "delay", &initial_delay,
+		     "ms to wait before starting measurement after program start"),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
-- 
cgit v0.10.2


From 2bbf03f16a634f675c49c473b2b6528571990aea Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 Aug 2013 17:41:12 -0700
Subject: perf stat: Flush output after each line in interval mode

When interval mode is outputting to a pipe, each measurement should be
flushed individually, so that the reader sees it timely.

With a terminal each line is automatically flushed by stdio, but that is
disabled with non terminal output.

Simply fflush output after each time interval

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375490473-1503-5-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2e637e4..f686d5f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -416,6 +416,8 @@ static void print_interval(void)
 		list_for_each_entry(counter, &evsel_list->entries, node)
 			print_counter_aggr(counter, prefix);
 	}
+
+	fflush(output);
 }
 
 static void handle_initial_delay(void)
-- 
cgit v0.10.2


From 3445432b7b24665cf4693fc4794c62d4d768a978 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:49 +0300
Subject: perf symbols: avoid SyS kernel syscall aliases

When removing duplicate symbols, prefer to remove syscall aliases
starting with SyS or compat_SyS.

A side-effect of that is that it results in slightly improved results
for the "vmlinux symtab matches kallsyms" test.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 02718e7..ea62ecd 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -87,6 +87,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
 {
 	s64 a;
 	s64 b;
+	size_t na, nb;
 
 	/* Prefer a symbol with non zero length */
 	a = syma->end - syma->start;
@@ -120,11 +121,21 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
 	else if (a > b)
 		return SYMBOL_B;
 
-	/* If all else fails, choose the symbol with the longest name */
-	if (strlen(syma->name) >= strlen(symb->name))
+	/* Choose the symbol with the longest name */
+	na = strlen(syma->name);
+	nb = strlen(symb->name);
+	if (na > nb)
 		return SYMBOL_A;
-	else
+	else if (na < nb)
+		return SYMBOL_B;
+
+	/* Avoid "SyS" kernel syscall aliases */
+	if (na >= 3 && !strncmp(syma->name, "SyS", 3))
 		return SYMBOL_B;
+	if (na >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+		return SYMBOL_B;
+
+	return SYMBOL_A;
 }
 
 void symbols__fixup_duplicate(struct rb_root *symbols)
-- 
cgit v0.10.2


From b55ae0a976f2927ea8f15a85c43bbe6d25a68a41 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:45 +0300
Subject: perf tests: Add test for reading object code

Using the information in mmap events, perf tools can read object code
associated with sampled addresses.  A test is added that compares bytes
read by perf with the same bytes read using objdump.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index bfd12d0..e0d3d9f 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -392,6 +392,7 @@ LIB_OBJS += $(OUTPUT)tests/sw-clock.o
 ifeq ($(ARCH),x86)
 LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
 endif
+LIB_OBJS += $(OUTPUT)tests/code-reading.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b7b4049..f5af192 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -100,6 +100,10 @@ static struct test {
 	},
 #endif
 	{
+		.desc = "Test object code reading",
+		.func = test__code_reading,
+	},
+	{
 		.func = NULL,
 	},
 };
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
new file mode 100644
index 0000000..28bee62
--- /dev/null
+++ b/tools/perf/tests/code-reading.c
@@ -0,0 +1,509 @@
+#include <sys/types.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "machine.h"
+#include "event.h"
+#include "thread.h"
+
+#include "tests.h"
+
+#define BUFSZ	1024
+#define READLEN	128
+
+static unsigned int hex(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	return c - 'A' + 10;
+}
+
+static void read_objdump_line(const char *line, size_t line_len, void **buf,
+			      size_t *len)
+{
+	const char *p;
+	size_t i;
+
+	/* Skip to a colon */
+	p = strchr(line, ':');
+	if (!p)
+		return;
+	i = p + 1 - line;
+
+	/* Read bytes */
+	while (*len) {
+		char c1, c2;
+
+		/* Skip spaces */
+		for (; i < line_len; i++) {
+			if (!isspace(line[i]))
+				break;
+		}
+		/* Get 2 hex digits */
+		if (i >= line_len || !isxdigit(line[i]))
+			break;
+		c1 = line[i++];
+		if (i >= line_len || !isxdigit(line[i]))
+			break;
+		c2 = line[i++];
+		/* Followed by a space */
+		if (i < line_len && line[i] && !isspace(line[i]))
+			break;
+		/* Store byte */
+		*(unsigned char *)*buf = (hex(c1) << 4) | hex(c2);
+		*buf += 1;
+		*len -= 1;
+	}
+}
+
+static int read_objdump_output(FILE *f, void **buf, size_t *len)
+{
+	char *line = NULL;
+	size_t line_len;
+	ssize_t ret;
+	int err = 0;
+
+	while (1) {
+		ret = getline(&line, &line_len, f);
+		if (feof(f))
+			break;
+		if (ret < 0) {
+			pr_debug("getline failed\n");
+			err = -1;
+			break;
+		}
+		read_objdump_line(line, ret, buf, len);
+	}
+
+	free(line);
+
+	return err;
+}
+
+static int read_via_objdump(const char *filename, u64 addr, void *buf,
+			    size_t len)
+{
+	char cmd[PATH_MAX * 2];
+	const char *fmt;
+	FILE *f;
+	int ret;
+
+	fmt = "%s -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s";
+	ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len,
+		       filename);
+	if (ret <= 0 || (size_t)ret >= sizeof(cmd))
+		return -1;
+
+	pr_debug("Objdump command is: %s\n", cmd);
+
+	f = popen(cmd, "r");
+	if (!f) {
+		pr_debug("popen failed\n");
+		return -1;
+	}
+
+	ret = read_objdump_output(f, &buf, &len);
+	if (len) {
+		pr_debug("objdump read too few bytes\n");
+		if (!ret)
+			ret = len;
+	}
+
+	pclose(f);
+
+	return ret;
+}
+
+static int read_object_code(u64 addr, size_t len, u8 cpumode,
+			    struct thread *thread, struct machine *machine)
+{
+	struct addr_location al;
+	unsigned char buf1[BUFSZ];
+	unsigned char buf2[BUFSZ];
+	size_t ret_len;
+	u64 objdump_addr;
+	int ret;
+
+	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
+
+	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, addr,
+			      &al);
+	if (!al.map || !al.map->dso) {
+		pr_debug("thread__find_addr_map failed\n");
+		return -1;
+	}
+
+	pr_debug("File is: %s\n", al.map->dso->long_name);
+
+	if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+		pr_debug("Unexpected kernel address - skipping\n");
+		return 0;
+	}
+
+	pr_debug("On file address is: %#"PRIx64"\n", al.addr);
+
+	if (len > BUFSZ)
+		len = BUFSZ;
+
+	/* Do not go off the map */
+	if (addr + len > al.map->end)
+		len = al.map->end - addr;
+
+	/* Read the object code using perf */
+	ret_len = dso__data_read_offset(al.map->dso, machine, al.addr, buf1,
+					len);
+	if (ret_len != len) {
+		pr_debug("dso__data_read_offset failed\n");
+		return -1;
+	}
+
+	/*
+	 * Converting addresses for use by objdump requires more information.
+	 * map__load() does that.  See map__rip_2objdump() for details.
+	 */
+	if (map__load(al.map, NULL))
+		return -1;
+
+	/* Read the object code using objdump */
+	objdump_addr = map__rip_2objdump(al.map, al.addr);
+	ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len);
+	if (ret > 0) {
+		/*
+		 * The kernel maps are inaccurate - assume objdump is right in
+		 * that case.
+		 */
+		if (cpumode == PERF_RECORD_MISC_KERNEL ||
+		    cpumode == PERF_RECORD_MISC_GUEST_KERNEL) {
+			len -= ret;
+			if (len)
+				pr_debug("Reducing len to %zu\n", len);
+			else
+				return -1;
+		}
+	}
+	if (ret < 0) {
+		pr_debug("read_via_objdump failed\n");
+		return -1;
+	}
+
+	/* The results should be identical */
+	if (memcmp(buf1, buf2, len)) {
+		pr_debug("Bytes read differ from those read by objdump\n");
+		return -1;
+	}
+	pr_debug("Bytes read match those read by objdump\n");
+
+	return 0;
+}
+
+static int process_sample_event(struct machine *machine,
+				struct perf_evlist *evlist,
+				union perf_event *event)
+{
+	struct perf_sample sample;
+	struct thread *thread;
+	u8 cpumode;
+
+	if (perf_evlist__parse_sample(evlist, event, &sample)) {
+		pr_debug("perf_evlist__parse_sample failed\n");
+		return -1;
+	}
+
+	thread = machine__findnew_thread(machine, sample.pid);
+	if (!thread) {
+		pr_debug("machine__findnew_thread failed\n");
+		return -1;
+	}
+
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	return read_object_code(sample.ip, READLEN, cpumode, thread, machine);
+}
+
+static int process_event(struct machine *machine, struct perf_evlist *evlist,
+			 union perf_event *event)
+{
+	if (event->header.type == PERF_RECORD_SAMPLE)
+		return process_sample_event(machine, evlist, event);
+
+	if (event->header.type < PERF_RECORD_MAX)
+		return machine__process_event(machine, event);
+
+	return 0;
+}
+
+static int process_events(struct machine *machine, struct perf_evlist *evlist)
+{
+	union perf_event *event;
+	int i, ret;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+			ret = process_event(machine, evlist, event);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+static int comp(const void *a, const void *b)
+{
+	return *(int *)a - *(int *)b;
+}
+
+static void do_sort_something(void)
+{
+	size_t sz = 40960;
+	int buf[sz], i;
+
+	for (i = 0; i < (int)sz; i++)
+		buf[i] = sz - i - 1;
+
+	qsort(buf, sz, sizeof(int), comp);
+
+	for (i = 0; i < (int)sz; i++) {
+		if (buf[i] != i) {
+			pr_debug("qsort failed\n");
+			break;
+		}
+	}
+}
+
+static void sort_something(void)
+{
+	int i;
+
+	for (i = 0; i < 10; i++)
+		do_sort_something();
+}
+
+static void syscall_something(void)
+{
+	int pipefd[2];
+	int i;
+
+	for (i = 0; i < 1000; i++) {
+		if (pipe(pipefd) < 0) {
+			pr_debug("pipe failed\n");
+			break;
+		}
+		close(pipefd[1]);
+		close(pipefd[0]);
+	}
+}
+
+static void fs_something(void)
+{
+	const char *test_file_name = "temp-perf-code-reading-test-file--";
+	FILE *f;
+	int i;
+
+	for (i = 0; i < 1000; i++) {
+		f = fopen(test_file_name, "w+");
+		if (f) {
+			fclose(f);
+			unlink(test_file_name);
+		}
+	}
+}
+
+static void do_something(void)
+{
+	fs_something();
+
+	sort_something();
+
+	syscall_something();
+}
+
+enum {
+	TEST_CODE_READING_OK,
+	TEST_CODE_READING_NO_VMLINUX,
+	TEST_CODE_READING_NO_ACCESS,
+};
+
+static int do_test_code_reading(void)
+{
+	struct machines machines;
+	struct machine *machine;
+	struct thread *thread;
+	struct perf_record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.freq		     = 4000,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+	};
+	struct thread_map *threads = NULL;
+	struct cpu_map *cpus = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel = NULL;
+	int err = -1, ret;
+	pid_t pid;
+	struct map *map;
+	bool have_vmlinux, excl_kernel = false;
+
+	pid = getpid();
+
+	machines__init(&machines);
+	machine = &machines.host;
+
+	ret = machine__create_kernel_maps(machine);
+	if (ret < 0) {
+		pr_debug("machine__create_kernel_maps failed\n");
+		goto out_err;
+	}
+
+	/* Load kernel map */
+	map = machine->vmlinux_maps[MAP__FUNCTION];
+	ret = map__load(map, NULL);
+	if (ret < 0) {
+		pr_debug("map__load failed\n");
+		goto out_err;
+	}
+	have_vmlinux = map->dso->symtab_type == DSO_BINARY_TYPE__VMLINUX;
+	/* No point getting kernel events if there is no vmlinux */
+	if (!have_vmlinux)
+		excl_kernel = true;
+
+	threads = thread_map__new_by_tid(pid);
+	if (!threads) {
+		pr_debug("thread_map__new_by_tid failed\n");
+		goto out_err;
+	}
+
+	ret = perf_event__synthesize_thread_map(NULL, threads,
+						perf_event__process, machine);
+	if (ret < 0) {
+		pr_debug("perf_event__synthesize_thread_map failed\n");
+		goto out_err;
+	}
+
+	thread = machine__findnew_thread(machine, pid);
+	if (!thread) {
+		pr_debug("machine__findnew_thread failed\n");
+		goto out_err;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (!cpus) {
+		pr_debug("cpu_map__new failed\n");
+		goto out_err;
+	}
+
+	while (1) {
+		const char *str;
+
+		evlist = perf_evlist__new();
+		if (!evlist) {
+			pr_debug("perf_evlist__new failed\n");
+			goto out_err;
+		}
+
+		perf_evlist__set_maps(evlist, cpus, threads);
+
+		if (excl_kernel)
+			str = "cycles:u";
+		else
+			str = "cycles";
+		pr_debug("Parsing event '%s'\n", str);
+		ret = parse_events(evlist, str);
+		if (ret < 0) {
+			pr_debug("parse_events failed\n");
+			goto out_err;
+		}
+
+		perf_evlist__config(evlist, &opts);
+
+		evsel = perf_evlist__first(evlist);
+
+		evsel->attr.comm = 1;
+		evsel->attr.disabled = 1;
+		evsel->attr.enable_on_exec = 0;
+
+		ret = perf_evlist__open(evlist);
+		if (ret < 0) {
+			if (!excl_kernel) {
+				excl_kernel = true;
+				perf_evlist__delete(evlist);
+				evlist = NULL;
+				continue;
+			}
+			pr_debug("perf_evlist__open failed\n");
+			goto out_err;
+		}
+		break;
+	}
+
+	ret = perf_evlist__mmap(evlist, UINT_MAX, false);
+	if (ret < 0) {
+		pr_debug("perf_evlist__mmap failed\n");
+		goto out_err;
+	}
+
+	perf_evlist__enable(evlist);
+
+	do_something();
+
+	perf_evlist__disable(evlist);
+
+	ret = process_events(machine, evlist);
+	if (ret < 0)
+		goto out_err;
+
+	if (!have_vmlinux)
+		err = TEST_CODE_READING_NO_VMLINUX;
+	else if (excl_kernel)
+		err = TEST_CODE_READING_NO_ACCESS;
+	else
+		err = TEST_CODE_READING_OK;
+out_err:
+	if (evlist) {
+		perf_evlist__munmap(evlist);
+		perf_evlist__close(evlist);
+		perf_evlist__delete(evlist);
+	}
+	if (cpus)
+		cpu_map__delete(cpus);
+	if (threads)
+		thread_map__delete(threads);
+	machines__destroy_kernel_maps(&machines);
+	machine__delete_threads(machine);
+	machines__exit(&machines);
+
+	return err;
+}
+
+int test__code_reading(void)
+{
+	int ret;
+
+	ret = do_test_code_reading();
+
+	switch (ret) {
+	case TEST_CODE_READING_OK:
+		return 0;
+	case TEST_CODE_READING_NO_VMLINUX:
+		fprintf(stderr, " (no vmlinux)");
+		return 0;
+	case TEST_CODE_READING_NO_ACCESS:
+		fprintf(stderr, " (no access)");
+		return 0;
+	default:
+		return -1;
+	};
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index d22202a..c748f53 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -36,5 +36,6 @@ int test__bp_signal_overflow(void);
 int test__task_exit(void);
 int test__sw_clock_freq(void);
 int test__perf_time_to_tsc(void);
+int test__code_reading(void);
 
 #endif /* TESTS_H */
-- 
cgit v0.10.2


From 5b7ba82a75915e739709d0ace4bb559cb280db09 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:46 +0300
Subject: perf symbols: Load kernel maps before using

In order to use kernel maps to read object code, those maps must be
adjusted to map to the dso file offset.  Because lazy-initialization is
used, that is not done until symbols are loaded.  However the maps are
first used by thread__find_addr_map() before symbols are loaded.  So
this patch changes thread__find_addr() to "load" kernel maps before
using them.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 1d8de2e..f012a98 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -206,7 +206,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 	}
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al);
+			      event->ip.ip, &al, NULL);
 
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 1cad370..cd616ff 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -341,10 +341,10 @@ static void print_sample_addr(union perf_event *event,
 		return;
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      sample->addr, &al);
+			      sample->addr, &al, NULL);
 	if (!al.map)
 		thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE,
-				      sample->addr, &al);
+				      sample->addr, &al, NULL);
 
 	al.cpu = sample->cpu;
 	al.sym = NULL;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 28bee62..0c7b052 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -138,7 +138,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, addr,
-			      &al);
+			      &al, NULL);
 	if (!al.map || !al.map->dso) {
 		pr_debug("thread__find_addr_map failed\n");
 		return -1;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 5295625..3a0f508 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -33,7 +33,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 	}
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al);
+			      event->ip.ip, &al, NULL);
 
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9541270..cc7c0c9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -592,9 +592,10 @@ int perf_event__process(struct perf_tool *tool __maybe_unused,
 void thread__find_addr_map(struct thread *self,
 			   struct machine *machine, u8 cpumode,
 			   enum map_type type, u64 addr,
-			   struct addr_location *al)
+			   struct addr_location *al, symbol_filter_t filter)
 {
 	struct map_groups *mg = &self->mg;
+	bool load_map = false;
 
 	al->thread = self;
 	al->addr = addr;
@@ -609,11 +610,13 @@ void thread__find_addr_map(struct thread *self,
 	if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
 		al->level = 'k';
 		mg = &machine->kmaps;
+		load_map = true;
 	} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
 		al->level = '.';
 	} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
 		al->level = 'g';
 		mg = &machine->kmaps;
+		load_map = true;
 	} else {
 		/*
 		 * 'u' means guest os user space.
@@ -654,8 +657,15 @@ try_again:
 			mg = &machine->kmaps;
 			goto try_again;
 		}
-	} else
+	} else {
+		/*
+		 * Kernel maps might be changed when loading symbols so loading
+		 * must be done prior to using kernel maps.
+		 */
+		if (load_map)
+			map__load(al->map, filter);
 		al->addr = al->map->map_ip(al->map, al->addr);
+	}
 }
 
 void thread__find_addr_location(struct thread *thread, struct machine *machine,
@@ -663,7 +673,7 @@ void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				struct addr_location *al,
 				symbol_filter_t filter)
 {
-	thread__find_addr_map(thread, machine, cpumode, type, addr, al);
+	thread__find_addr_map(thread, machine, cpumode, type, addr, al, filter);
 	if (al->map != NULL)
 		al->sym = map__find_symbol(al->map, al->addr, filter);
 	else
@@ -699,7 +709,7 @@ int perf_event__preprocess_sample(const union perf_event *event,
 		machine__create_kernel_maps(machine);
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, al);
+			      event->ip.ip, al, filter);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 0fe1f9c..f98d1d9 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -41,7 +41,7 @@ static inline struct map *thread__find_map(struct thread *self,
 
 void thread__find_addr_map(struct thread *thread, struct machine *machine,
 			   u8 cpumode, enum map_type type, u64 addr,
-			   struct addr_location *al);
+			   struct addr_location *al, symbol_filter_t filter);
 
 void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				u8 cpumode, enum map_type type, u64 addr,
diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index 958723b..5bbd494 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -272,7 +272,7 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
 	struct addr_location al;
 
 	thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, ip, &al);
+			      MAP__FUNCTION, ip, &al, NULL);
 	return al.map;
 }
 
@@ -349,7 +349,7 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
 	ssize_t size;
 
 	thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, addr, &al);
+			      MAP__FUNCTION, addr, &al, NULL);
 	if (!al.map) {
 		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
 		return -1;
-- 
cgit v0.10.2


From 39b12f7812710e9a5896805d96812b3ede7491e8 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:47 +0300
Subject: perf tools: Make it possible to read object code from vmlinux

The new "object code reading" test shows that it is not possible to read
object code from vmlinux.  That is because the mappings do not map to
the dso.  This patch fixes that.

A side-effect of changing the kernel map is that the "reloc" offset must
be taken into account.  As a result of that separate map functions for
relocation are no longer needed.

Also fixing up the maps to match the symbols no longer makes sense and
so is not done.

The vmlinux dso data_type is now set to either DSO_BINARY_TYPE__VMLINUX
or DSO_BINARY_TYPE__GUEST_VMLINUX as approprite, which enables the
correct file name to be determined by dso__binary_type_file().

This patch breaks the "vmlinux symtab matches kallsyms" test.  That is
fixed in a following patch.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index c4374f0..121583d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -78,6 +78,8 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
 			 symbol_conf.symfs, build_id_hex, build_id_hex + 2);
 		break;
 
+	case DSO_BINARY_TYPE__VMLINUX:
+	case DSO_BINARY_TYPE__GUEST_VMLINUX:
 	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
 		snprintf(file, size, "%s%s",
 			 symbol_conf.symfs, dso->long_name);
@@ -95,9 +97,7 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
 
 	default:
 	case DSO_BINARY_TYPE__KALLSYMS:
-	case DSO_BINARY_TYPE__VMLINUX:
 	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
-	case DSO_BINARY_TYPE__GUEST_VMLINUX:
 	case DSO_BINARY_TYPE__JAVA_JIT:
 	case DSO_BINARY_TYPE__NOT_FOUND:
 		ret = -1;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index d51aaf2..02aadaf 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/rbtree.h>
+#include <stdbool.h>
 #include "types.h"
 #include "map.h"
 
@@ -146,4 +147,11 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
 size_t dso__fprintf_symbols_by_name(struct dso *dso,
 				    enum map_type type, FILE *fp);
 size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
+
+static inline bool dso__is_vmlinux(struct dso *dso)
+{
+	return dso->data_type == DSO_BINARY_TYPE__VMLINUX ||
+	       dso->data_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
+}
+
 #endif /* __PERF_DSO */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index f9f9d63..dc35dcf 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -628,10 +628,8 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
 	struct map *map = machine->vmlinux_maps[type];
 	int ret = dso__load_vmlinux_path(map->dso, map, filter);
 
-	if (ret > 0) {
+	if (ret > 0)
 		dso__set_loaded(map->dso, type);
-		map__reloc_vmlinux(map);
-	}
 
 	return ret;
 }
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 8bcdf9e..5f662a3 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -182,12 +182,6 @@ int map__load(struct map *map, symbol_filter_t filter)
 #endif
 		return -1;
 	}
-	/*
-	 * Only applies to the kernel, as its symtabs aren't relative like the
-	 * module ones.
-	 */
-	if (map->dso->kernel)
-		map__reloc_vmlinux(map);
 
 	return 0;
 }
@@ -513,35 +507,6 @@ int map_groups__clone(struct map_groups *mg,
 	return 0;
 }
 
-static u64 map__reloc_map_ip(struct map *map, u64 ip)
-{
-	return ip + (s64)map->pgoff;
-}
-
-static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
-{
-	return ip - (s64)map->pgoff;
-}
-
-void map__reloc_vmlinux(struct map *map)
-{
-	struct kmap *kmap = map__kmap(map);
-	s64 reloc;
-
-	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
-		return;
-
-	reloc = (kmap->ref_reloc_sym->unrelocated_addr -
-		 kmap->ref_reloc_sym->addr);
-
-	if (!reloc)
-		return;
-
-	map->map_ip   = map__reloc_map_ip;
-	map->unmap_ip = map__reloc_unmap_ip;
-	map->pgoff    = reloc;
-}
-
 void maps__insert(struct rb_root *maps, struct map *map)
 {
 	struct rb_node **p = &maps->rb_node;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 4b12bf8..ed6f443 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -603,7 +603,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 						     ".gnu.prelink_undo",
 						     NULL) != NULL);
 	} else {
-		ss->adjust_symbols = 0;
+		ss->adjust_symbols = ehdr.e_type == ET_EXEC;
 	}
 
 	ss->name   = strdup(name);
@@ -624,6 +624,37 @@ out_close:
 	return err;
 }
 
+/**
+ * ref_reloc_sym_not_found - has kernel relocation symbol been found.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns %true if we are dealing with the kernel maps and the
+ * relocation reference symbol has not yet been found.  Otherwise %false is
+ * returned.
+ */
+static bool ref_reloc_sym_not_found(struct kmap *kmap)
+{
+	return kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
+	       !kmap->ref_reloc_sym->unrelocated_addr;
+}
+
+/**
+ * ref_reloc - kernel relocation offset.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns the offset of kernel addresses as determined by using
+ * the relocation reference symbol i.e. if the kernel has not been relocated
+ * then the return value is zero.
+ */
+static u64 ref_reloc(struct kmap *kmap)
+{
+	if (kmap && kmap->ref_reloc_sym &&
+	    kmap->ref_reloc_sym->unrelocated_addr)
+		return kmap->ref_reloc_sym->addr -
+		       kmap->ref_reloc_sym->unrelocated_addr;
+	return 0;
+}
+
 int dso__load_sym(struct dso *dso, struct map *map,
 		  struct symsrc *syms_ss, struct symsrc *runtime_ss,
 		  symbol_filter_t filter, int kmodule)
@@ -642,6 +673,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	Elf_Scn *sec, *sec_strndx;
 	Elf *elf;
 	int nr = 0;
+	bool remap_kernel = false, adjust_kernel_syms = false;
 
 	dso->symtab_type = syms_ss->type;
 
@@ -681,7 +713,31 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	nr_syms = shdr.sh_size / shdr.sh_entsize;
 
 	memset(&sym, 0, sizeof(sym));
-	dso->adjust_symbols = runtime_ss->adjust_symbols;
+
+	/*
+	 * The kernel relocation symbol is needed in advance in order to adjust
+	 * kernel maps correctly.
+	 */
+	if (ref_reloc_sym_not_found(kmap)) {
+		elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+			const char *elf_name = elf_sym__name(&sym, symstrs);
+
+			if (strcmp(elf_name, kmap->ref_reloc_sym->name))
+				continue;
+			kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
+			break;
+		}
+	}
+
+	dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
+	/*
+	 * Initial kernel and module mappings do not map to the dso.  For
+	 * function mappings, flag the fixups.
+	 */
+	if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) {
+		remap_kernel = true;
+		adjust_kernel_syms = dso->adjust_symbols;
+	}
 	elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
 		struct symbol *f;
 		const char *elf_name = elf_sym__name(&sym, symstrs);
@@ -690,10 +746,6 @@ int dso__load_sym(struct dso *dso, struct map *map,
 		const char *section_name;
 		bool used_opd = false;
 
-		if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
-		    strcmp(elf_name, kmap->ref_reloc_sym->name) == 0)
-			kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
-
 		if (!is_label && !elf_sym__is_a(&sym, map->type))
 			continue;
 
@@ -745,15 +797,37 @@ int dso__load_sym(struct dso *dso, struct map *map,
 		    (sym.st_value & 1))
 			--sym.st_value;
 
-		if (dso->kernel != DSO_TYPE_USER || kmodule) {
+		if (dso->kernel || kmodule) {
 			char dso_name[PATH_MAX];
 
+			/* Adjust symbol to map to file offset */
+			if (adjust_kernel_syms)
+				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+
 			if (strcmp(section_name,
 				   (curr_dso->short_name +
 				    dso->short_name_len)) == 0)
 				goto new_symbol;
 
 			if (strcmp(section_name, ".text") == 0) {
+				/*
+				 * The initial kernel mapping is based on
+				 * kallsyms and identity maps.  Overwrite it to
+				 * map to the kernel dso.
+				 */
+				if (remap_kernel && dso->kernel) {
+					remap_kernel = false;
+					map->start = shdr.sh_addr +
+						     ref_reloc(kmap);
+					map->end = map->start + shdr.sh_size;
+					map->pgoff = shdr.sh_offset;
+					map->map_ip = map__map_ip;
+					map->unmap_ip = map__unmap_ip;
+					/* Ensure maps are correctly ordered */
+					map_groups__remove(kmap->kmaps, map);
+					map_groups__insert(kmap->kmaps, map);
+				}
+
 				curr_map = map;
 				curr_dso = dso;
 				goto new_symbol;
@@ -781,8 +855,16 @@ int dso__load_sym(struct dso *dso, struct map *map,
 					dso__delete(curr_dso);
 					goto out_elf_end;
 				}
-				curr_map->map_ip = identity__map_ip;
-				curr_map->unmap_ip = identity__map_ip;
+				if (adjust_kernel_syms) {
+					curr_map->start = shdr.sh_addr +
+							  ref_reloc(kmap);
+					curr_map->end = curr_map->start +
+							shdr.sh_size;
+					curr_map->pgoff = shdr.sh_offset;
+				} else {
+					curr_map->map_ip = identity__map_ip;
+					curr_map->unmap_ip = identity__map_ip;
+				}
 				curr_dso->symtab_type = dso->symtab_type;
 				map_groups__insert(kmap->kmaps, curr_map);
 				dsos__add(&dso->node, curr_dso);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ea62ecd..04300dd 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -917,6 +917,10 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
 	symsrc__destroy(&ss);
 
 	if (err > 0) {
+		if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+			dso->data_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+		else
+			dso->data_type = DSO_BINARY_TYPE__VMLINUX;
 		dso__set_long_name(dso, (char *)vmlinux);
 		dso__set_loaded(dso, map->type);
 		pr_debug("Using %s for symbols\n", symfs_vmlinux);
@@ -989,7 +993,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 			dso__set_long_name(dso,
 					   strdup(symbol_conf.vmlinux_name));
 			dso->lname_alloc = 1;
-			goto out_fixup;
+			return err;
 		}
 		return err;
 	}
@@ -997,7 +1001,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 	if (vmlinux_path != NULL) {
 		err = dso__load_vmlinux_path(dso, map, filter);
 		if (err > 0)
-			goto out_fixup;
+			return err;
 	}
 
 	/* do not try local files if a symfs was given */
@@ -1058,7 +1062,6 @@ do_kallsyms:
 
 	if (err > 0) {
 		dso__set_long_name(dso, strdup("[kernel.kallsyms]"));
-out_fixup:
 		map__fixup_start(map);
 		map__fixup_end(map);
 	}
@@ -1089,7 +1092,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 		if (symbol_conf.default_guest_vmlinux_name != NULL) {
 			err = dso__load_vmlinux(dso, map,
 				symbol_conf.default_guest_vmlinux_name, filter);
-			goto out_try_fixup;
+			return err;
 		}
 
 		kallsyms_filename = symbol_conf.default_guest_kallsyms;
@@ -1101,15 +1104,10 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 	}
 
 	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
-	if (err > 0)
-		pr_debug("Using %s for symbols\n", kallsyms_filename);
-
-out_try_fixup:
 	if (err > 0) {
-		if (kallsyms_filename != NULL) {
-			machine__mmap_name(machine, path, sizeof(path));
-			dso__set_long_name(dso, strdup(path));
-		}
+		pr_debug("Using %s for symbols\n", kallsyms_filename);
+		machine__mmap_name(machine, path, sizeof(path));
+		dso__set_long_name(dso, strdup(path));
 		map__fixup_start(map);
 		map__fixup_end(map);
 	}
-- 
cgit v0.10.2


From d380b34830cc76461feab012f0bc52b01e65087c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:48 +0300
Subject: perf tests: Adjust the vmlinux symtab matches kallsyms test

The vmlinux maps now map to the dso and the symbol values are now file
offsets.  For comparison with kallsyms the virtual memory address is
needed which is obtained by unmapping the symbol value.

The "vmlinux symtab matches kallsyms" is adjusted accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index add1539..e2e1498 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -25,6 +25,7 @@ int test__vmlinux_matches_kallsyms(void)
 	struct machine kallsyms, vmlinux;
 	enum map_type type = MAP__FUNCTION;
 	struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", };
+	u64 mem_start, mem_end;
 
 	/*
 	 * Step 1:
@@ -123,10 +124,14 @@ int test__vmlinux_matches_kallsyms(void)
 		if (sym->start == sym->end)
 			continue;
 
-		first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+		mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start);
+		mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
+
+		first_pair = machine__find_kernel_symbol(&kallsyms, type,
+							 mem_start, NULL, NULL);
 		pair = first_pair;
 
-		if (pair && pair->start == sym->start) {
+		if (pair && pair->start == mem_start) {
 next_pair:
 			if (strcmp(sym->name, pair->name) == 0) {
 				/*
@@ -138,10 +143,11 @@ next_pair:
 				 * off the real size. More than that and we
 				 * _really_ have a problem.
 				 */
-				s64 skew = sym->end - pair->end;
+				s64 skew = mem_end - pair->end;
 				if (llabs(skew) >= page_size)
 					pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
-						 sym->start, sym->name, sym->end, pair->end);
+						 mem_start, sym->name, mem_end,
+						 pair->end);
 
 				/*
 				 * Do not count this as a failure, because we
@@ -159,7 +165,7 @@ detour:
 				if (nnd) {
 					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
 
-					if (next->start == sym->start) {
+					if (next->start == mem_start) {
 						pair = next;
 						goto next_pair;
 					}
@@ -172,10 +178,11 @@ detour:
 				}
 
 				pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
-					 sym->start, sym->name, pair->name);
+					 mem_start, sym->name, pair->name);
 			}
 		} else
-			pr_debug("%#" PRIx64 ": %s not on kallsyms\n", sym->start, sym->name);
+			pr_debug("%#" PRIx64 ": %s not on kallsyms\n",
+				 mem_start, sym->name);
 
 		err = -1;
 	}
@@ -208,16 +215,19 @@ detour:
 	for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
 
-		pair = map_groups__find(&kallsyms.kmaps, type, pos->start);
+		mem_start = vmlinux_map->unmap_ip(vmlinux_map, pos->start);
+		mem_end = vmlinux_map->unmap_ip(vmlinux_map, pos->end);
+
+		pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
 		if (pair == NULL || pair->priv)
 			continue;
 
-		if (pair->start == pos->start) {
+		if (pair->start == mem_start) {
 			pair->priv = 1;
 			pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
 				pos->start, pos->end, pos->pgoff, pos->dso->name);
-			if (pos->pgoff != pair->pgoff || pos->end != pair->end)
-				pr_info(": \n*%" PRIx64 "-%" PRIx64 " %" PRIx64 "",
+			if (mem_end != pair->end)
+				pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
 					pair->start, pair->end, pair->pgoff);
 			pr_info(" %s\n", pair->dso->name);
 			pair->priv = 1;
-- 
cgit v0.10.2


From 0131c4ec794a7409eafff0c79105309540aaca4d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:50 +0300
Subject: perf tools: Make it possible to read object code from kernel modules

The new "object code reading" test shows that it is not possible to read
object code from kernel modules.  That is because the mappings do not
map to the dsos.  This patch fixes that.

This involves identifying and flagging relocatable (ELF type ET_REL)
files (e.g. kernel modules) for symbol adjustment and updating
map__rip_2objdump() accordingly.  The kmodule parameter of
dso__load_sym() is taken into use and the module map altered to map to
the dso.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 121583d..1955804 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -419,6 +419,7 @@ struct dso *dso__new(const char *name)
 		dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
 		dso->data_type   = DSO_BINARY_TYPE__NOT_FOUND;
 		dso->loaded = 0;
+		dso->rel = 0;
 		dso->sorted_by_name = 0;
 		dso->has_build_id = 0;
 		dso->kernel = DSO_TYPE_USER;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 02aadaf..735a837 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -85,6 +85,7 @@ struct dso {
 	u8		 lname_alloc:1;
 	u8		 sorted_by_name;
 	u8		 loaded;
+	u8		 rel;
 	u8		 build_id[BUILD_ID_SIZE];
 	const char	 *short_name;
 	char		 *long_name;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 5f662a3..4d599fe 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -248,14 +248,18 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
 
 /*
  * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
- * map->dso->adjust_symbols==1 for ET_EXEC-like cases.
+ * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is
+ * relative to section start.
  */
 u64 map__rip_2objdump(struct map *map, u64 rip)
 {
-	u64 addr = map->dso->adjust_symbols ?
-			map->unmap_ip(map, rip) :	/* RIP -> IP */
-			rip;
-	return addr;
+	if (!map->dso->adjust_symbols)
+		return rip;
+
+	if (map->dso->rel)
+		return rip - map->pgoff;
+
+	return map->unmap_ip(map, rip);
 }
 
 void map_groups__init(struct map_groups *mg)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index ed6f443..3eaa7b4 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -599,11 +599,13 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 	if (dso->kernel == DSO_TYPE_USER) {
 		GElf_Shdr shdr;
 		ss->adjust_symbols = (ehdr.e_type == ET_EXEC ||
+				ehdr.e_type == ET_REL ||
 				elf_section_by_name(elf, &ehdr, &shdr,
 						     ".gnu.prelink_undo",
 						     NULL) != NULL);
 	} else {
-		ss->adjust_symbols = ehdr.e_type == ET_EXEC;
+		ss->adjust_symbols = ehdr.e_type == ET_EXEC ||
+				     ehdr.e_type == ET_REL;
 	}
 
 	ss->name   = strdup(name);
@@ -676,6 +678,14 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	bool remap_kernel = false, adjust_kernel_syms = false;
 
 	dso->symtab_type = syms_ss->type;
+	dso->rel = syms_ss->ehdr.e_type == ET_REL;
+
+	/*
+	 * Modules may already have symbols from kallsyms, but those symbols
+	 * have the wrong values for the dso maps, so remove them.
+	 */
+	if (kmodule && syms_ss->symtab)
+		symbols__delete(&dso->symbols[map->type]);
 
 	if (!syms_ss->symtab) {
 		syms_ss->symtab  = syms_ss->dynsym;
@@ -828,11 +838,24 @@ int dso__load_sym(struct dso *dso, struct map *map,
 					map_groups__insert(kmap->kmaps, map);
 				}
 
+				/*
+				 * The initial module mapping is based on
+				 * /proc/modules mapped to offset zero.
+				 * Overwrite it to map to the module dso.
+				 */
+				if (remap_kernel && kmodule) {
+					remap_kernel = false;
+					map->pgoff = shdr.sh_offset;
+				}
+
 				curr_map = map;
 				curr_dso = dso;
 				goto new_symbol;
 			}
 
+			if (!kmap)
+				goto new_symbol;
+
 			snprintf(dso_name, sizeof(dso_name),
 				 "%s%s", dso->short_name, section_name);
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 04300dd..b9056a8 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -854,10 +854,15 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 	if (!runtime_ss && syms_ss)
 		runtime_ss = syms_ss;
 
-	if (syms_ss)
-		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, 0);
-	else
+	if (syms_ss) {
+		int km;
+
+		km = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+		     dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE;
+		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, km);
+	} else {
 		ret = -1;
+	}
 
 	if (ret > 0) {
 		int nr_plt;
-- 
cgit v0.10.2


From 8e0cf965f95edd41df11cca50b92b4cb6ea8d80a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:51 +0300
Subject: perf symbols: Add support for reading from /proc/kcore

In the absence of vmlinux, perf tools uses kallsyms for symbols.  If the
user has access, now also map to /proc/kcore.

The dso data_type is now set to either DSO_BINARY_TYPE__KCORE or
DSO_BINARY_TYPE__GUEST_KCORE as approprite.

This patch breaks the "vmlinux symtab matches kallsyms" test.  That is
fixed in a following patch.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 1955804..e3c1ff8 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -95,6 +95,11 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
 			 dso->long_name);
 		break;
 
+	case DSO_BINARY_TYPE__KCORE:
+	case DSO_BINARY_TYPE__GUEST_KCORE:
+		snprintf(file, size, "%s", dso->long_name);
+		break;
+
 	default:
 	case DSO_BINARY_TYPE__KALLSYMS:
 	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 735a837..b793053 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -21,6 +21,8 @@ enum dso_binary_type {
 	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
 	DSO_BINARY_TYPE__GUEST_KMODULE,
 	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+	DSO_BINARY_TYPE__KCORE,
+	DSO_BINARY_TYPE__GUEST_KCORE,
 	DSO_BINARY_TYPE__NOT_FOUND,
 };
 
@@ -155,4 +157,10 @@ static inline bool dso__is_vmlinux(struct dso *dso)
 	       dso->data_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
 }
 
+static inline bool dso__is_kcore(struct dso *dso)
+{
+	return dso->data_type == DSO_BINARY_TYPE__KCORE ||
+	       dso->data_type == DSO_BINARY_TYPE__GUEST_KCORE;
+}
+
 #endif /* __PERF_DSO */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index dc35dcf..ef3b49c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -856,6 +856,18 @@ static void machine__set_kernel_mmap_len(struct machine *machine,
 	}
 }
 
+static bool machine__uses_kcore(struct machine *machine)
+{
+	struct dso *dso;
+
+	list_for_each_entry(dso, &machine->kernel_dsos, node) {
+		if (dso__is_kcore(dso))
+			return true;
+	}
+
+	return false;
+}
+
 static int machine__process_kernel_mmap_event(struct machine *machine,
 					      union perf_event *event)
 {
@@ -864,6 +876,10 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 	enum dso_kernel_type kernel_type;
 	bool is_kernel_mmap;
 
+	/* If we have maps from kcore then we do not need or want any others */
+	if (machine__uses_kcore(machine))
+		return 0;
+
 	machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
 	if (machine__is_host(machine))
 		kernel_type = DSO_TYPE_KERNEL;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 4d599fe..9e8304c 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -555,3 +555,21 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
 
 	return NULL;
 }
+
+struct map *maps__first(struct rb_root *maps)
+{
+	struct rb_node *first = rb_first(maps);
+
+	if (first)
+		return rb_entry(first, struct map, rb_node);
+	return NULL;
+}
+
+struct map *maps__next(struct map *map)
+{
+	struct rb_node *next = rb_next(&map->rb_node);
+
+	if (next)
+		return rb_entry(next, struct map, rb_node);
+	return NULL;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index a887f2c..2cc93cb 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -112,6 +112,8 @@ size_t __map_groups__fprintf_maps(struct map_groups *mg,
 void maps__insert(struct rb_root *maps, struct map *map);
 void maps__remove(struct rb_root *maps, struct map *map);
 struct map *maps__find(struct rb_root *maps, u64 addr);
+struct map *maps__first(struct rb_root *maps);
+struct map *maps__next(struct map *map);
 void map_groups__init(struct map_groups *mg);
 void map_groups__exit(struct map_groups *mg);
 int map_groups__clone(struct map_groups *mg,
@@ -139,6 +141,17 @@ static inline struct map *map_groups__find(struct map_groups *mg,
 	return maps__find(&mg->maps[type], addr);
 }
 
+static inline struct map *map_groups__first(struct map_groups *mg,
+					    enum map_type type)
+{
+	return maps__first(&mg->maps[type]);
+}
+
+static inline struct map *map_groups__next(struct map *map)
+{
+	return maps__next(map);
+}
+
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
 				       enum map_type type, u64 addr,
 				       struct map **mapp,
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 3eaa7b4..a7b9ab5 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -951,6 +951,57 @@ out_elf_end:
 	return err;
 }
 
+static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
+{
+	GElf_Phdr phdr;
+	size_t i, phdrnum;
+	int err;
+	u64 sz;
+
+	if (elf_getphdrnum(elf, &phdrnum))
+		return -1;
+
+	for (i = 0; i < phdrnum; i++) {
+		if (gelf_getphdr(elf, i, &phdr) == NULL)
+			return -1;
+		if (phdr.p_type != PT_LOAD)
+			continue;
+		if (exe) {
+			if (!(phdr.p_flags & PF_X))
+				continue;
+		} else {
+			if (!(phdr.p_flags & PF_R))
+				continue;
+		}
+		sz = min(phdr.p_memsz, phdr.p_filesz);
+		if (!sz)
+			continue;
+		err = mapfn(phdr.p_vaddr, sz, phdr.p_offset, data);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+		    bool *is_64_bit)
+{
+	int err;
+	Elf *elf;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return -1;
+
+	if (is_64_bit)
+		*is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+	err = elf_read_maps(elf, exe, mapfn, data);
+
+	elf_end(elf);
+	return err;
+}
+
 void symbol__elf_init(void)
 {
 	elf_version(EV_CURRENT);
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index a7390cd..3a802c3 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -301,6 +301,13 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
 	return 0;
 }
 
+int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused,
+		    mapfn_t mapfn __maybe_unused, void *data __maybe_unused,
+		    bool *is_64_bit __maybe_unused)
+{
+	return -1;
+}
+
 void symbol__elf_init(void)
 {
 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b9056a8..77f3b95 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -327,6 +327,16 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
 	return NULL;
 }
 
+static struct symbol *symbols__first(struct rb_root *symbols)
+{
+	struct rb_node *n = rb_first(symbols);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
 struct symbol_name_rb_node {
 	struct rb_node	rb_node;
 	struct symbol	sym;
@@ -397,6 +407,11 @@ struct symbol *dso__find_symbol(struct dso *dso,
 	return symbols__find(&dso->symbols[type], addr);
 }
 
+struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
+{
+	return symbols__first(&dso->symbols[type]);
+}
+
 struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 					const char *name)
 {
@@ -533,6 +548,53 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename,
 	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
 }
 
+static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
+					 symbol_filter_t filter)
+{
+	struct map_groups *kmaps = map__kmap(map)->kmaps;
+	struct map *curr_map;
+	struct symbol *pos;
+	int count = 0, moved = 0;
+	struct rb_root *root = &dso->symbols[map->type];
+	struct rb_node *next = rb_first(root);
+
+	while (next) {
+		char *module;
+
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		module = strchr(pos->name, '\t');
+		if (module)
+			*module = '\0';
+
+		curr_map = map_groups__find(kmaps, map->type, pos->start);
+
+		if (!curr_map || (filter && filter(curr_map, pos))) {
+			rb_erase(&pos->rb_node, root);
+			symbol__delete(pos);
+		} else {
+			pos->start -= curr_map->start - curr_map->pgoff;
+			if (pos->end)
+				pos->end -= curr_map->start - curr_map->pgoff;
+			if (curr_map != map) {
+				rb_erase(&pos->rb_node, root);
+				symbols__insert(
+					&curr_map->dso->symbols[curr_map->type],
+					pos);
+				++moved;
+			} else {
+				++count;
+			}
+		}
+	}
+
+	/* Symbols have been adjusted */
+	dso->adjust_symbols = 1;
+
+	return count + moved;
+}
+
 /*
  * Split the symbols into maps, making sure there are no overlaps, i.e. the
  * kernel range is broken in several maps, named [kernel].N, as we don't have
@@ -674,6 +736,161 @@ bool symbol__restricted_filename(const char *filename,
 	return restricted;
 }
 
+struct kcore_mapfn_data {
+	struct dso *dso;
+	enum map_type type;
+	struct list_head maps;
+};
+
+static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+{
+	struct kcore_mapfn_data *md = data;
+	struct map *map;
+
+	map = map__new2(start, md->dso, md->type);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->end = map->start + len;
+	map->pgoff = pgoff;
+
+	list_add(&map->node, &md->maps);
+
+	return 0;
+}
+
+/*
+ * If kallsyms is referenced by name then we look for kcore in the same
+ * directory.
+ */
+static bool kcore_filename_from_kallsyms_filename(char *kcore_filename,
+						  const char *kallsyms_filename)
+{
+	char *name;
+
+	strcpy(kcore_filename, kallsyms_filename);
+	name = strrchr(kcore_filename, '/');
+	if (!name)
+		return false;
+
+	if (!strcmp(name, "/kallsyms")) {
+		strcpy(name, "/kcore");
+		return true;
+	}
+
+	return false;
+}
+
+static int dso__load_kcore(struct dso *dso, struct map *map,
+			   const char *kallsyms_filename)
+{
+	struct map_groups *kmaps = map__kmap(map)->kmaps;
+	struct machine *machine = kmaps->machine;
+	struct kcore_mapfn_data md;
+	struct map *old_map, *new_map, *replacement_map = NULL;
+	bool is_64_bit;
+	int err, fd;
+	char kcore_filename[PATH_MAX];
+	struct symbol *sym;
+
+	/* This function requires that the map is the kernel map */
+	if (map != machine->vmlinux_maps[map->type])
+		return -EINVAL;
+
+	if (!kcore_filename_from_kallsyms_filename(kcore_filename,
+						   kallsyms_filename))
+		return -EINVAL;
+
+	md.dso = dso;
+	md.type = map->type;
+	INIT_LIST_HEAD(&md.maps);
+
+	fd = open(kcore_filename, O_RDONLY);
+	if (fd < 0)
+		return -EINVAL;
+
+	/* Read new maps into temporary lists */
+	err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md,
+			      &is_64_bit);
+	if (err)
+		goto out_err;
+
+	if (list_empty(&md.maps)) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	/* Remove old maps */
+	old_map = map_groups__first(kmaps, map->type);
+	while (old_map) {
+		struct map *next = map_groups__next(old_map);
+
+		if (old_map != map)
+			map_groups__remove(kmaps, old_map);
+		old_map = next;
+	}
+
+	/* Find the kernel map using the first symbol */
+	sym = dso__first_symbol(dso, map->type);
+	list_for_each_entry(new_map, &md.maps, node) {
+		if (sym && sym->start >= new_map->start &&
+		    sym->start < new_map->end) {
+			replacement_map = new_map;
+			break;
+		}
+	}
+
+	if (!replacement_map)
+		replacement_map = list_entry(md.maps.next, struct map, node);
+
+	/* Add new maps */
+	while (!list_empty(&md.maps)) {
+		new_map = list_entry(md.maps.next, struct map, node);
+		list_del(&new_map->node);
+		if (new_map == replacement_map) {
+			map->start	= new_map->start;
+			map->end	= new_map->end;
+			map->pgoff	= new_map->pgoff;
+			map->map_ip	= new_map->map_ip;
+			map->unmap_ip	= new_map->unmap_ip;
+			map__delete(new_map);
+			/* Ensure maps are correctly ordered */
+			map_groups__remove(kmaps, map);
+			map_groups__insert(kmaps, map);
+		} else {
+			map_groups__insert(kmaps, new_map);
+		}
+	}
+
+	/*
+	 * Set the data type and long name so that kcore can be read via
+	 * dso__data_read_addr().
+	 */
+	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		dso->data_type = DSO_BINARY_TYPE__GUEST_KCORE;
+	else
+		dso->data_type = DSO_BINARY_TYPE__KCORE;
+	dso__set_long_name(dso, strdup(kcore_filename));
+
+	close(fd);
+
+	if (map->type == MAP__FUNCTION)
+		pr_debug("Using %s for kernel object code\n", kcore_filename);
+	else
+		pr_debug("Using %s for kernel data\n", kcore_filename);
+
+	return 0;
+
+out_err:
+	while (!list_empty(&md.maps)) {
+		map = list_entry(md.maps.next, struct map, node);
+		list_del(&map->node);
+		map__delete(map);
+	}
+	close(fd);
+	return -EINVAL;
+}
+
 int dso__load_kallsyms(struct dso *dso, const char *filename,
 		       struct map *map, symbol_filter_t filter)
 {
@@ -691,7 +908,10 @@ int dso__load_kallsyms(struct dso *dso, const char *filename,
 	else
 		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
 
-	return dso__split_kallsyms(dso, map, filter);
+	if (!dso__load_kcore(dso, map, filename))
+		return dso__split_kallsyms_for_kcore(dso, map, filter);
+	else
+		return dso__split_kallsyms(dso, map, filter);
 }
 
 static int dso__load_perf_map(struct dso *dso, struct map *map,
@@ -1065,7 +1285,7 @@ do_kallsyms:
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	free(kallsyms_allocated_filename);
 
-	if (err > 0) {
+	if (err > 0 && !dso__is_kcore(dso)) {
 		dso__set_long_name(dso, strdup("[kernel.kallsyms]"));
 		map__fixup_start(map);
 		map__fixup_end(map);
@@ -1109,8 +1329,9 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 	}
 
 	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
-	if (err > 0) {
+	if (err > 0)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
+	if (err > 0 && !dso__is_kcore(dso)) {
 		machine__mmap_name(machine, path, sizeof(path));
 		dso__set_long_name(dso, strdup(path));
 		map__fixup_start(map);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 5f720dc..fd5b70e 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -215,6 +215,7 @@ struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
 				u64 addr);
 struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 					const char *name);
+struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
 
 int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
@@ -247,4 +248,8 @@ void symbols__fixup_duplicate(struct rb_root *symbols);
 void symbols__fixup_end(struct rb_root *symbols);
 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type);
 
+typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+		    bool *is_64_bit);
+
 #endif /* __PERF_SYMBOL */
-- 
cgit v0.10.2


From 82e75d00adc5bde3cf98f11e937eed6127163969 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:52 +0300
Subject: perf tests: Adjust the vmlinux symtab matches kallsyms test again

The kallsyms maps now may map to kcore and the symbol values now may be
file offsets.  For comparison with vmlinux the virtual memory address is
needed which is obtained by unmapping the symbol value.

The "vmlinux symtab matches kallsyms" is adjusted accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index e2e1498..2bd13ed 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -16,6 +16,8 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
 	return 0;
 }
 
+#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
+
 int test__vmlinux_matches_kallsyms(void)
 {
 	int err = -1;
@@ -74,7 +76,7 @@ int test__vmlinux_matches_kallsyms(void)
 		goto out;
 	}
 
-	ref_reloc_sym.addr = sym->start;
+	ref_reloc_sym.addr = UM(sym->start);
 
 	/*
 	 * Step 5:
@@ -131,7 +133,7 @@ int test__vmlinux_matches_kallsyms(void)
 							 mem_start, NULL, NULL);
 		pair = first_pair;
 
-		if (pair && pair->start == mem_start) {
+		if (pair && UM(pair->start) == mem_start) {
 next_pair:
 			if (strcmp(sym->name, pair->name) == 0) {
 				/*
@@ -143,11 +145,11 @@ next_pair:
 				 * off the real size. More than that and we
 				 * _really_ have a problem.
 				 */
-				s64 skew = mem_end - pair->end;
+				s64 skew = mem_end - UM(pair->end);
 				if (llabs(skew) >= page_size)
 					pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
 						 mem_start, sym->name, mem_end,
-						 pair->end);
+						 UM(pair->end));
 
 				/*
 				 * Do not count this as a failure, because we
@@ -165,7 +167,7 @@ detour:
 				if (nnd) {
 					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
 
-					if (next->start == mem_start) {
+					if (UM(next->start) == mem_start) {
 						pair = next;
 						goto next_pair;
 					}
-- 
cgit v0.10.2


From 7a77bc2c0d2726a7fc9e6b91b36f984c3e377008 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:53 +0300
Subject: perf tests: Add kcore to the object code reading test

Make the "object code reading" test attempt to read from kcore.

The test uses objdump which struggles with kcore. i.e.  doesn't always
work, sometimes takes a long time.  The test has been made to work
around those issues.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-10-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 0c7b052..8e0943b 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -20,6 +20,11 @@
 #define BUFSZ	1024
 #define READLEN	128
 
+struct state {
+	u64 done[1024];
+	size_t done_cnt;
+};
+
 static unsigned int hex(char c)
 {
 	if (c >= '0' && c <= '9')
@@ -107,6 +112,9 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf,
 
 	pr_debug("Objdump command is: %s\n", cmd);
 
+	/* Ignore objdump errors */
+	strcat(cmd, " 2>/dev/null");
+
 	f = popen(cmd, "r");
 	if (!f) {
 		pr_debug("popen failed\n");
@@ -126,7 +134,8 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf,
 }
 
 static int read_object_code(u64 addr, size_t len, u8 cpumode,
-			    struct thread *thread, struct machine *machine)
+			    struct thread *thread, struct machine *machine,
+			    struct state *state)
 {
 	struct addr_location al;
 	unsigned char buf1[BUFSZ];
@@ -146,7 +155,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 
 	pr_debug("File is: %s\n", al.map->dso->long_name);
 
-	if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+	if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(al.map->dso)) {
 		pr_debug("Unexpected kernel address - skipping\n");
 		return 0;
 	}
@@ -175,6 +185,24 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	if (map__load(al.map, NULL))
 		return -1;
 
+	/* objdump struggles with kcore - try each map only once */
+	if (dso__is_kcore(al.map->dso)) {
+		size_t d;
+
+		for (d = 0; d < state->done_cnt; d++) {
+			if (state->done[d] == al.map->start) {
+				pr_debug("kcore map tested already");
+				pr_debug(" - skipping\n");
+				return 0;
+			}
+		}
+		if (state->done_cnt >= ARRAY_SIZE(state->done)) {
+			pr_debug("Too many kcore maps - skipping\n");
+			return 0;
+		}
+		state->done[state->done_cnt++] = al.map->start;
+	}
+
 	/* Read the object code using objdump */
 	objdump_addr = map__rip_2objdump(al.map, al.addr);
 	ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len);
@@ -186,10 +214,19 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 		if (cpumode == PERF_RECORD_MISC_KERNEL ||
 		    cpumode == PERF_RECORD_MISC_GUEST_KERNEL) {
 			len -= ret;
-			if (len)
+			if (len) {
 				pr_debug("Reducing len to %zu\n", len);
-			else
+			} else if (dso__is_kcore(al.map->dso)) {
+				/*
+				 * objdump cannot handle very large segments
+				 * that may be found in kcore.
+				 */
+				pr_debug("objdump failed for kcore");
+				pr_debug(" - skipping\n");
+				return 0;
+			} else {
 				return -1;
+			}
 		}
 	}
 	if (ret < 0) {
@@ -209,7 +246,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 
 static int process_sample_event(struct machine *machine,
 				struct perf_evlist *evlist,
-				union perf_event *event)
+				union perf_event *event, struct state *state)
 {
 	struct perf_sample sample;
 	struct thread *thread;
@@ -228,14 +265,15 @@ static int process_sample_event(struct machine *machine,
 
 	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	return read_object_code(sample.ip, READLEN, cpumode, thread, machine);
+	return read_object_code(sample.ip, READLEN, cpumode, thread, machine,
+				state);
 }
 
 static int process_event(struct machine *machine, struct perf_evlist *evlist,
-			 union perf_event *event)
+			 union perf_event *event, struct state *state)
 {
 	if (event->header.type == PERF_RECORD_SAMPLE)
-		return process_sample_event(machine, evlist, event);
+		return process_sample_event(machine, evlist, event, state);
 
 	if (event->header.type < PERF_RECORD_MAX)
 		return machine__process_event(machine, event);
@@ -243,14 +281,15 @@ static int process_event(struct machine *machine, struct perf_evlist *evlist,
 	return 0;
 }
 
-static int process_events(struct machine *machine, struct perf_evlist *evlist)
+static int process_events(struct machine *machine, struct perf_evlist *evlist,
+			  struct state *state)
 {
 	union perf_event *event;
 	int i, ret;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
-			ret = process_event(machine, evlist, event);
+			ret = process_event(machine, evlist, event, state);
 			if (ret < 0)
 				return ret;
 		}
@@ -331,10 +370,12 @@ static void do_something(void)
 enum {
 	TEST_CODE_READING_OK,
 	TEST_CODE_READING_NO_VMLINUX,
+	TEST_CODE_READING_NO_KCORE,
 	TEST_CODE_READING_NO_ACCESS,
+	TEST_CODE_READING_NO_KERNEL_OBJ,
 };
 
-static int do_test_code_reading(void)
+static int do_test_code_reading(bool try_kcore)
 {
 	struct machines machines;
 	struct machine *machine;
@@ -348,6 +389,9 @@ static int do_test_code_reading(void)
 			.uses_mmap   = true,
 		},
 	};
+	struct state state = {
+		.done_cnt = 0,
+	};
 	struct thread_map *threads = NULL;
 	struct cpu_map *cpus = NULL;
 	struct perf_evlist *evlist = NULL;
@@ -355,7 +399,7 @@ static int do_test_code_reading(void)
 	int err = -1, ret;
 	pid_t pid;
 	struct map *map;
-	bool have_vmlinux, excl_kernel = false;
+	bool have_vmlinux, have_kcore, excl_kernel = false;
 
 	pid = getpid();
 
@@ -368,6 +412,10 @@ static int do_test_code_reading(void)
 		goto out_err;
 	}
 
+	/* Force the use of kallsyms instead of vmlinux to try kcore */
+	if (try_kcore)
+		symbol_conf.kallsyms_name = "/proc/kallsyms";
+
 	/* Load kernel map */
 	map = machine->vmlinux_maps[MAP__FUNCTION];
 	ret = map__load(map, NULL);
@@ -375,9 +423,15 @@ static int do_test_code_reading(void)
 		pr_debug("map__load failed\n");
 		goto out_err;
 	}
-	have_vmlinux = map->dso->symtab_type == DSO_BINARY_TYPE__VMLINUX;
-	/* No point getting kernel events if there is no vmlinux */
-	if (!have_vmlinux)
+	have_vmlinux = dso__is_vmlinux(map->dso);
+	have_kcore = dso__is_kcore(map->dso);
+
+	/* 2nd time through we just try kcore */
+	if (try_kcore && !have_kcore)
+		return TEST_CODE_READING_NO_KCORE;
+
+	/* No point getting kernel events if there is no kernel object */
+	if (!have_vmlinux && !have_kcore)
 		excl_kernel = true;
 
 	threads = thread_map__new_by_tid(pid);
@@ -461,11 +515,13 @@ static int do_test_code_reading(void)
 
 	perf_evlist__disable(evlist);
 
-	ret = process_events(machine, evlist);
+	ret = process_events(machine, evlist, &state);
 	if (ret < 0)
 		goto out_err;
 
-	if (!have_vmlinux)
+	if (!have_vmlinux && !have_kcore && !try_kcore)
+		err = TEST_CODE_READING_NO_KERNEL_OBJ;
+	else if (!have_vmlinux && !try_kcore)
 		err = TEST_CODE_READING_NO_VMLINUX;
 	else if (excl_kernel)
 		err = TEST_CODE_READING_NO_ACCESS;
@@ -492,7 +548,9 @@ int test__code_reading(void)
 {
 	int ret;
 
-	ret = do_test_code_reading();
+	ret = do_test_code_reading(false);
+	if (!ret)
+		ret = do_test_code_reading(true);
 
 	switch (ret) {
 	case TEST_CODE_READING_OK:
@@ -500,9 +558,15 @@ int test__code_reading(void)
 	case TEST_CODE_READING_NO_VMLINUX:
 		fprintf(stderr, " (no vmlinux)");
 		return 0;
+	case TEST_CODE_READING_NO_KCORE:
+		fprintf(stderr, " (no kcore)");
+		return 0;
 	case TEST_CODE_READING_NO_ACCESS:
 		fprintf(stderr, " (no access)");
 		return 0;
+	case TEST_CODE_READING_NO_KERNEL_OBJ:
+		fprintf(stderr, " (no kernel obj)");
+		return 0;
 	default:
 		return -1;
 	};
-- 
cgit v0.10.2


From bbb7f846f88df05646795854a014d73fb00f3b8b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:54 +0300
Subject: perf annotate: Allow disassembly using /proc/kcore

Annotation with /proc/kcore is possible so the logic is adjusted to
allow it.  The main difference is that /proc/kcore had no symbols so the
parsing logic needed a tweak to read jump offsets.

The other difference is that objdump cannot always read from kcore.
That seems to be a bug with objdump.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-11-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 9101f7c..440c3b3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -103,7 +103,8 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 	/*
 	 * We can't annotate with just /proc/kallsyms
 	 */
-	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(map->dso)) {
 		pr_err("Can't annotate %s: No vmlinux file was found in the "
 		       "path\n", sym->name);
 		sleep(1);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index d102716..4ab2f11 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -110,10 +110,10 @@ static int jump__parse(struct ins_operands *ops)
 {
 	const char *s = strchr(ops->raw, '+');
 
-	ops->target.addr = strtoll(ops->raw, NULL, 16);
+	ops->target.addr = strtoull(ops->raw, NULL, 16);
 
 	if (s++ != NULL)
-		ops->target.offset = strtoll(s, NULL, 16);
+		ops->target.offset = strtoull(s, NULL, 16);
 	else
 		ops->target.offset = UINT64_MAX;
 
@@ -821,6 +821,10 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 	if (dl == NULL)
 		return -1;
 
+	if (dl->ops.target.offset == UINT64_MAX)
+		dl->ops.target.offset = dl->ops.target.addr -
+					map__rip_2objdump(map, sym->start);
+
 	disasm__add(&notes->src->source, dl);
 
 	return 0;
@@ -864,7 +868,8 @@ fallback:
 		free_filename = false;
 	}
 
-	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(dso)) {
 		char bf[BUILD_ID_SIZE * 2 + 16] = " with build id ";
 		char *build_id_msg = NULL;
 
@@ -898,7 +903,7 @@ fallback:
 	snprintf(command, sizeof(command),
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
-		 " -d %s %s -C %s|grep -v %s|expand",
+		 " -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
 		 objdump_path ? objdump_path : "objdump",
 		 disassembler_style ? "-M " : "",
 		 disassembler_style ? disassembler_style : "",
-- 
cgit v0.10.2


From 34f77abcb34e1da4ee3ca5c5a41b673664eee1fa Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:55 +0300
Subject: perf annotate: Put dso name in symbol annotation title

Currently the symbol name is displayed at the top when displaying symbol
annotation.  Add to this the dso long name.

Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-12-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index cc64d3f..0f88a77 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -428,6 +428,14 @@ static void annotate_browser__init_asm_mode(struct annotate_browser *browser)
 	browser->b.nr_entries = browser->nr_asm_entries;
 }
 
+#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
+
+static int sym_title(struct symbol *sym, struct map *map, char *title,
+		     size_t sz)
+{
+	return snprintf(title, sz, "%s  %s", sym->name, map->dso->long_name);
+}
+
 static bool annotate_browser__callq(struct annotate_browser *browser,
 				    struct perf_evsel *evsel,
 				    struct hist_browser_timer *hbt)
@@ -438,6 +446,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	struct annotation *notes;
 	struct symbol *target;
 	u64 ip;
+	char title[SYM_TITLE_MAX_SIZE];
 
 	if (!ins__is_call(dl->ins))
 		return false;
@@ -461,7 +470,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 
 	pthread_mutex_unlock(&notes->lock);
 	symbol__tui_annotate(target, ms->map, evsel, hbt);
-	ui_browser__show_title(&browser->b, sym->name);
+	sym_title(sym, ms->map, title, sizeof(title));
+	ui_browser__show_title(&browser->b, title);
 	return true;
 }
 
@@ -653,8 +663,10 @@ static int annotate_browser__run(struct annotate_browser *browser,
 	const char *help = "Press 'h' for help on key bindings";
 	int delay_secs = hbt ? hbt->refresh : 0;
 	int key;
+	char title[SYM_TITLE_MAX_SIZE];
 
-	if (ui_browser__show(&browser->b, sym->name, help) < 0)
+	sym_title(sym, ms->map, title, sizeof(title));
+	if (ui_browser__show(&browser->b, title, help) < 0)
 		return -1;
 
 	annotate_browser__calc_percent(browser, evsel);
-- 
cgit v0.10.2


From 484a5e7476b7ce790ba37417a4976c7f86a87231 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:56 +0300
Subject: perf annotate: Remove nop at end of annotation

When kcore is used for annotation, symbols do not have correct sizes
because they come from kallsyms, that has only its start address, with
the end address being the next symbol's minus one.

That sometimes results in an extra nop being seen after the end of a
function.  Remove it.

Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-13-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4ab2f11..646e38d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -830,6 +830,30 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 	return 0;
 }
 
+static void delete_last_nop(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct list_head *list = &notes->src->source;
+	struct disasm_line *dl;
+
+	while (!list_empty(list)) {
+		dl = list_entry(list->prev, struct disasm_line, node);
+
+		if (dl->ins && dl->ins->ops) {
+			if (dl->ins->ops != &nop_ops)
+				return;
+		} else {
+			if (!strstr(dl->line, " nop ") &&
+			    !strstr(dl->line, " nopl ") &&
+			    !strstr(dl->line, " nopw "))
+				return;
+		}
+
+		list_del(&dl->node);
+		disasm_line__free(dl);
+	}
+}
+
 int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
 {
 	struct dso *dso = map->dso;
@@ -923,6 +947,13 @@ fallback:
 		if (symbol__parse_objdump_line(sym, map, file, privsize) < 0)
 			break;
 
+	/*
+	 * kallsyms does not have symbol sizes so there may a nop at the end.
+	 * Remove it.
+	 */
+	if (dso__is_kcore(dso))
+		delete_last_nop(sym);
+
 	pclose(file);
 out_free_filename:
 	if (free_filename)
-- 
cgit v0.10.2


From b178170a38e719cb7bc4a14d3f5e4b4ea6b7b851 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:57 +0300
Subject: perf annotate: Add call target name if it is missing

The /proc/kcore file has no symbols, so the call target name does not
display.  Fix by looking up the symbol name if it is on the same map.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-14-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 646e38d..bfc5a27 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -825,6 +825,22 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 		dl->ops.target.offset = dl->ops.target.addr -
 					map__rip_2objdump(map, sym->start);
 
+	/*
+	 * kcore has no symbols, so add the call target name if it is on the
+	 * same map.
+	 */
+	if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) {
+		struct symbol *s;
+		u64 ip = dl->ops.target.addr;
+
+		if (ip >= map->start && ip <= map->end) {
+			ip = map->map_ip(map, ip);
+			s = map__find_symbol(map, ip, NULL);
+			if (s && s->start == ip)
+				dl->ops.target.name = strdup(s->name);
+		}
+	}
+
 	disasm__add(&notes->src->source, dl);
 
 	return 0;
-- 
cgit v0.10.2


From fcd9fef9a64b84520852e0247d6796893e28864c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 7 Aug 2013 15:55:48 -0300
Subject: perf annotate browser: Improve description of '?' hotkey

The previous description: "Search previous string" is usually associated
with the 'N' following a '/string', the opposite of 'n', which is
'Search next string' in the direction established with '/' or '?'.

So change it to 'Search string backwards', to clarify that.

The 'N' hotkey remains to be implemented with the semantic described
above.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-5lw5y15d7vv308xbpm8pqe4g@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 0f88a77..7d75497 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -732,7 +732,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"s             Toggle source code view\n"
 		"/             Search string\n"
 		"r             Run available scripts\n"
-		"?             Search previous string\n");
+		"?             Search string backwards\n");
 			continue;
 		case 'r':
 			{
-- 
cgit v0.10.2


From e6f653888844f3b86f4274f03e731a3eacd22c49 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 2 Aug 2013 13:10:50 +0200
Subject: perf annotate browser: Fix typo

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/20130802111050.GA29126@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 7d75497..08545ae 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -505,7 +505,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser)
 
 	dl = annotate_browser__find_offset(browser, dl->ops.target.offset, &idx);
 	if (dl == NULL) {
-		ui_helpline__puts("Invallid jump offset");
+		ui_helpline__puts("Invalid jump offset");
 		return true;
 	}
 
-- 
cgit v0.10.2


From e30b88a77cc8ae2a1febf268c8443a6cdd696417 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 5 Aug 2013 21:41:33 -0400
Subject: perf session: Export queue_event function

Taking a lesson from perf-trace and bringing in control of event
processing to perf-kvm-stat-live: parse the sample to get access the
time leaving just the need to queue it to the ordered samples list.  For
that the queue_event function needs to be exported.

Unexport perf_session__process_event.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375753297-69645-2-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b5ebd47..dedaeb2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -643,7 +643,7 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s)
 
 #define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))
 
-static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
+int perf_session_queue_event(struct perf_session *s, union perf_event *event,
 				    struct perf_sample *sample, u64 file_offset)
 {
 	struct ordered_samples *os = &s->ordered_samples;
@@ -1049,10 +1049,10 @@ static void event_swap(union perf_event *event, bool sample_id_all)
 		swap(event, sample_id_all);
 }
 
-int perf_session__process_event(struct perf_session *session,
-				union perf_event *event,
-				struct perf_tool *tool,
-				u64 file_offset)
+static int perf_session__process_event(struct perf_session *session,
+				       union perf_event *event,
+				       struct perf_tool *tool,
+				       u64 file_offset)
 {
 	struct perf_sample sample;
 	int ret;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9818fc2..8bed17e 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -56,10 +56,8 @@ int __perf_session__process_events(struct perf_session *self,
 int perf_session__process_events(struct perf_session *self,
 				 struct perf_tool *tool);
 
-int perf_session__process_event(struct perf_session *session,
-				union perf_event *event,
-				struct perf_tool *tool,
-				u64 file_offset);
+int perf_session_queue_event(struct perf_session *s, union perf_event *event,
+			     struct perf_sample *sample, u64 file_offset);
 
 void perf_tool__fill_defaults(struct perf_tool *tool);
 
-- 
cgit v0.10.2


From 1afe1d148491069ef51ed69fa53b09e1cb3ec30d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 5 Aug 2013 21:41:34 -0400
Subject: perf kvm: Add live mode

perf kvm stat currently requires back to back record and report commands
to see stats. e.g,.

  perf kvm stat record -p $pid -- sleep 1
  perf kvm stat report

This is inconvenvient for on box monitoring of a VM. This patch
introduces a 'live' mode that in effect combines the record plus report
into one command. e.g., to monitor a single VM:

  perf kvm stat live -p $pid

or all VMs:

  perf kvm stat live

Same stats options for the record+report path work with the live mode.
Display rate defaults to 1 second and can be changed using the -d
option.

v4:
- address comments from Xiao -- verify_vcpu check should not look at
  processors on line for the host, prune configurable options.
- set attr->{mmap,comm,task} to 0 - don't need task events so trim events
  we have to deal with
- better control of time for queue event flushing to reduce frequency of
  "Timestamp below last timeslice flush" failures.

v3:
updated to use existing tracepoint parsing code

v2:
removed ABSTIME arg from timerfd_settime as mentioned by Namhyung
only call perf_kvm__handle_stdin when poll returns activity.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375753297-69645-3-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 7d14a3a..29bfca7 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -2,6 +2,7 @@
 #include "perf.h"
 
 #include "util/evsel.h"
+#include "util/evlist.h"
 #include "util/util.h"
 #include "util/cache.h"
 #include "util/symbol.h"
@@ -15,9 +16,12 @@
 #include <lk/debugfs.h>
 #include "util/tool.h"
 #include "util/stat.h"
+#include "util/top.h"
 
 #include <sys/prctl.h>
+#include <sys/timerfd.h>
 
+#include <termios.h>
 #include <semaphore.h>
 #include <pthread.h>
 #include <math.h>
@@ -82,6 +86,8 @@ struct exit_reasons_table {
 
 struct perf_kvm_stat {
 	struct perf_tool    tool;
+	struct perf_record_opts opts;
+	struct perf_evlist  *evlist;
 	struct perf_session *session;
 
 	const char *file_name;
@@ -96,10 +102,16 @@ struct perf_kvm_stat {
 	struct kvm_events_ops *events_ops;
 	key_cmp_fun compare;
 	struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
+
 	u64 total_time;
 	u64 total_count;
+	u64 lost_events;
 
 	struct rb_root result;
+
+	int timerfd;
+	unsigned int display_time;
+	bool live;
 };
 
 
@@ -320,6 +332,23 @@ static void init_kvm_event_record(struct perf_kvm_stat *kvm)
 		INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
 }
 
+static void clear_events_cache_stats(struct list_head *kvm_events_cache)
+{
+	struct list_head *head;
+	struct kvm_event *event;
+	unsigned int i;
+
+	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+		head = &kvm_events_cache[i];
+		list_for_each_entry(event, head, hash_entry) {
+			/* reset stats for event */
+			memset(&event->total, 0, sizeof(event->total));
+			memset(event->vcpu, 0,
+			       event->max_vcpu * sizeof(*event->vcpu));
+		}
+	}
+}
+
 static int kvm_events_hash_fn(u64 key)
 {
 	return key & (EVENTS_CACHE_SIZE - 1);
@@ -472,7 +501,11 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
 	vcpu_record->last_event = NULL;
 	vcpu_record->start_time = 0;
 
-	BUG_ON(timestamp < time_begin);
+	/* seems to happen once in a while during live mode */
+	if (timestamp < time_begin) {
+		pr_debug("End time before begin time; skipping event.\n");
+		return true;
+	}
 
 	time_diff = timestamp - time_begin;
 	return update_kvm_event(event, vcpu, time_diff);
@@ -639,24 +672,56 @@ static struct kvm_event *pop_from_result(struct rb_root *result)
 	return container_of(node, struct kvm_event, rb);
 }
 
-static void print_vcpu_info(int vcpu)
+static void print_vcpu_info(struct perf_kvm_stat *kvm)
 {
+	int vcpu = kvm->trace_vcpu;
+
 	pr_info("Analyze events for ");
 
+	if (kvm->live) {
+		if (kvm->opts.target.system_wide)
+			pr_info("all VMs, ");
+		else if (kvm->opts.target.pid)
+			pr_info("pid(s) %s, ", kvm->opts.target.pid);
+		else
+			pr_info("dazed and confused on what is monitored, ");
+	}
+
 	if (vcpu == -1)
 		pr_info("all VCPUs:\n\n");
 	else
 		pr_info("VCPU %d:\n\n", vcpu);
 }
 
+static void show_timeofday(void)
+{
+	char date[64];
+	struct timeval tv;
+	struct tm ltime;
+
+	gettimeofday(&tv, NULL);
+	if (localtime_r(&tv.tv_sec, &ltime)) {
+		strftime(date, sizeof(date), "%H:%M:%S", &ltime);
+		pr_info("%s.%06ld", date, tv.tv_usec);
+	} else
+		pr_info("00:00:00.000000");
+
+	return;
+}
+
 static void print_result(struct perf_kvm_stat *kvm)
 {
 	char decode[20];
 	struct kvm_event *event;
 	int vcpu = kvm->trace_vcpu;
 
+	if (kvm->live) {
+		puts(CONSOLE_CLEAR);
+		show_timeofday();
+	}
+
 	pr_info("\n\n");
-	print_vcpu_info(vcpu);
+	print_vcpu_info(kvm);
 	pr_info("%20s ", kvm->events_ops->name);
 	pr_info("%10s ", "Samples");
 	pr_info("%9s ", "Samples%");
@@ -683,6 +748,20 @@ static void print_result(struct perf_kvm_stat *kvm)
 
 	pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
 		kvm->total_count, kvm->total_time / 1e3);
+
+	if (kvm->lost_events)
+		pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
+}
+
+static int process_lost_event(struct perf_tool *tool,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
+
+	kvm->lost_events++;
+	return 0;
 }
 
 static int process_sample_event(struct perf_tool *tool,
@@ -707,10 +786,20 @@ static int process_sample_event(struct perf_tool *tool,
 	return 0;
 }
 
-static int get_cpu_isa(struct perf_session *session)
+static int cpu_isa_config(struct perf_kvm_stat *kvm)
 {
-	char *cpuid = session->header.env.cpuid;
-	int isa;
+	char buf[64], *cpuid;
+	int err, isa;
+
+	if (kvm->live) {
+		err = get_cpuid(buf, sizeof(buf));
+		if (err != 0) {
+			pr_err("Failed to look up CPU type (Intel or AMD)\n");
+			return err;
+		}
+		cpuid = buf;
+	} else
+		cpuid = kvm->session->header.env.cpuid;
 
 	if (strstr(cpuid, "Intel"))
 		isa = 1;
@@ -718,10 +807,361 @@ static int get_cpu_isa(struct perf_session *session)
 		isa = 0;
 	else {
 		pr_err("CPU %s is not supported.\n", cpuid);
-		isa = -ENOTSUP;
+		return -ENOTSUP;
+	}
+
+	if (isa == 1) {
+		kvm->exit_reasons = vmx_exit_reasons;
+		kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
+		kvm->exit_reasons_isa = "VMX";
+	}
+
+	return 0;
+}
+
+static bool verify_vcpu(int vcpu)
+{
+	if (vcpu != -1 && vcpu < 0) {
+		pr_err("Invalid vcpu:%d.\n", vcpu);
+		return false;
+	}
+
+	return true;
+}
+
+/* keeping the max events to a modest level to keep
+ * the processing of samples per mmap smooth.
+ */
+#define PERF_KVM__MAX_EVENTS_PER_MMAP  25
+
+static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
+				   u64 *mmap_time)
+{
+	union perf_event *event;
+	struct perf_sample sample;
+	s64 n = 0;
+	int err;
+
+	*mmap_time = ULLONG_MAX;
+	while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
+		err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
+		if (err) {
+			pr_err("Failed to parse sample\n");
+			return -1;
+		}
+
+		err = perf_session_queue_event(kvm->session, event, &sample, 0);
+		if (err) {
+			pr_err("Failed to enqueue sample: %d\n", err);
+			return -1;
+		}
+
+		/* save time stamp of our first sample for this mmap */
+		if (n == 0)
+			*mmap_time = sample.time;
+
+		/* limit events per mmap handled all at once */
+		n++;
+		if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+			break;
+	}
+
+	return n;
+}
+
+static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
+{
+	int i, err, throttled = 0;
+	s64 n, ntotal = 0;
+	u64 flush_time = ULLONG_MAX, mmap_time;
+
+	for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
+		n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
+		if (n < 0)
+			return -1;
+
+		/* flush time is going to be the minimum of all the individual
+		 * mmap times. Essentially, we flush all the samples queued up
+		 * from the last pass under our minimal start time -- that leaves
+		 * a very small race for samples to come in with a lower timestamp.
+		 * The ioctl to return the perf_clock timestamp should close the
+		 * race entirely.
+		 */
+		if (mmap_time < flush_time)
+			flush_time = mmap_time;
+
+		ntotal += n;
+		if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+			throttled = 1;
+	}
+
+	/* flush queue after each round in which we processed events */
+	if (ntotal) {
+		kvm->session->ordered_samples.next_flush = flush_time;
+		err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session);
+		if (err) {
+			if (kvm->lost_events)
+				pr_info("\nLost events: %" PRIu64 "\n\n",
+					kvm->lost_events);
+			return err;
+		}
+	}
+
+	return throttled;
+}
+
+static volatile int done;
+
+static void sig_handler(int sig __maybe_unused)
+{
+	done = 1;
+}
+
+static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
+{
+	struct itimerspec new_value;
+	int rc = -1;
+
+	kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+	if (kvm->timerfd < 0) {
+		pr_err("timerfd_create failed\n");
+		goto out;
+	}
+
+	new_value.it_value.tv_sec = kvm->display_time;
+	new_value.it_value.tv_nsec = 0;
+	new_value.it_interval.tv_sec = kvm->display_time;
+	new_value.it_interval.tv_nsec = 0;
+
+	if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
+		pr_err("timerfd_settime failed: %d\n", errno);
+		close(kvm->timerfd);
+		goto out;
+	}
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
+{
+	uint64_t c;
+	int rc;
+
+	rc = read(kvm->timerfd, &c, sizeof(uint64_t));
+	if (rc < 0) {
+		if (errno == EAGAIN)
+			return 0;
+
+		pr_err("Failed to read timer fd: %d\n", errno);
+		return -1;
+	}
+
+	if (rc != sizeof(uint64_t)) {
+		pr_err("Error reading timer fd - invalid size returned\n");
+		return -1;
+	}
+
+	if (c != 1)
+		pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
+
+	/* update display */
+	sort_result(kvm);
+	print_result(kvm);
+
+	/* reset counts */
+	clear_events_cache_stats(kvm->kvm_events_cache);
+	kvm->total_count = 0;
+	kvm->total_time = 0;
+	kvm->lost_events = 0;
+
+	return 0;
+}
+
+static int fd_set_nonblock(int fd)
+{
+	long arg = 0;
+
+	arg = fcntl(fd, F_GETFL);
+	if (arg < 0) {
+		pr_err("Failed to get current flags for fd %d\n", fd);
+		return -1;
+	}
+
+	if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
+		pr_err("Failed to set non-block option on fd %d\n", fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static
+int perf_kvm__handle_stdin(struct termios *tc_now, struct termios *tc_save)
+{
+	int c;
+
+	tcsetattr(0, TCSANOW, tc_now);
+	c = getc(stdin);
+	tcsetattr(0, TCSAFLUSH, tc_save);
+
+	if (c == 'q')
+		return 1;
+
+	return 0;
+}
+
+static int kvm_events_live_report(struct perf_kvm_stat *kvm)
+{
+	struct pollfd *pollfds = NULL;
+	int nr_fds, nr_stdin, ret, err = -EINVAL;
+	struct termios tc, save;
+
+	/* live flag must be set first */
+	kvm->live = true;
+
+	ret = cpu_isa_config(kvm);
+	if (ret < 0)
+		return ret;
+
+	if (!verify_vcpu(kvm->trace_vcpu) ||
+	    !select_key(kvm) ||
+	    !register_kvm_events_ops(kvm)) {
+		goto out;
+	}
+
+	init_kvm_event_record(kvm);
+
+	tcgetattr(0, &save);
+	tc = save;
+	tc.c_lflag &= ~(ICANON | ECHO);
+	tc.c_cc[VMIN] = 0;
+	tc.c_cc[VTIME] = 0;
+
+	signal(SIGINT, sig_handler);
+	signal(SIGTERM, sig_handler);
+
+	/* copy pollfds -- need to add timerfd and stdin */
+	nr_fds = kvm->evlist->nr_fds;
+	pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2));
+	if (!pollfds) {
+		err = -ENOMEM;
+		goto out;
+	}
+	memcpy(pollfds, kvm->evlist->pollfd,
+		sizeof(struct pollfd) * kvm->evlist->nr_fds);
+
+	/* add timer fd */
+	if (perf_kvm__timerfd_create(kvm) < 0) {
+		err = -1;
+		goto out;
+	}
+
+	pollfds[nr_fds].fd = kvm->timerfd;
+	pollfds[nr_fds].events = POLLIN;
+	nr_fds++;
+
+	pollfds[nr_fds].fd = fileno(stdin);
+	pollfds[nr_fds].events = POLLIN;
+	nr_stdin = nr_fds;
+	nr_fds++;
+	if (fd_set_nonblock(fileno(stdin)) != 0)
+		goto out;
+
+	/* everything is good - enable the events and process */
+	perf_evlist__enable(kvm->evlist);
+
+	while (!done) {
+		int rc;
+
+		rc = perf_kvm__mmap_read(kvm);
+		if (rc < 0)
+			break;
+
+		err = perf_kvm__handle_timerfd(kvm);
+		if (err)
+			goto out;
+
+		if (pollfds[nr_stdin].revents & POLLIN)
+			done = perf_kvm__handle_stdin(&tc, &save);
+
+		if (!rc && !done)
+			err = poll(pollfds, nr_fds, 100);
+	}
+
+	perf_evlist__disable(kvm->evlist);
+
+	if (err == 0) {
+		sort_result(kvm);
+		print_result(kvm);
+	}
+
+out:
+	if (kvm->timerfd >= 0)
+		close(kvm->timerfd);
+
+	if (pollfds)
+		free(pollfds);
+
+	return err;
+}
+
+static int kvm_live_open_events(struct perf_kvm_stat *kvm)
+{
+	int err, rc = -1;
+	struct perf_evsel *pos;
+	struct perf_evlist *evlist = kvm->evlist;
+
+	perf_evlist__config(evlist, &kvm->opts);
+
+	/*
+	 * Note: exclude_{guest,host} do not apply here.
+	 *       This command processes KVM tracepoints from host only
+	 */
+	list_for_each_entry(pos, &evlist->entries, node) {
+		struct perf_event_attr *attr = &pos->attr;
+
+		/* make sure these *are* set */
+		attr->sample_type |= PERF_SAMPLE_TID;
+		attr->sample_type |= PERF_SAMPLE_TIME;
+		attr->sample_type |= PERF_SAMPLE_CPU;
+		attr->sample_type |= PERF_SAMPLE_RAW;
+		/* make sure these are *not*; want as small a sample as possible */
+		attr->sample_type &= ~PERF_SAMPLE_PERIOD;
+		attr->sample_type &= ~PERF_SAMPLE_IP;
+		attr->sample_type &= ~PERF_SAMPLE_CALLCHAIN;
+		attr->sample_type &= ~PERF_SAMPLE_ADDR;
+		attr->sample_type &= ~PERF_SAMPLE_READ;
+		attr->mmap = 0;
+		attr->comm = 0;
+		attr->task = 0;
+
+		attr->sample_period = 1;
+
+		attr->watermark = 0;
+		attr->wakeup_events = 1000;
+
+		/* will enable all once we are ready */
+		attr->disabled = 1;
+	}
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		printf("Couldn't create the events: %s\n", strerror(errno));
+		goto out;
 	}
 
-	return isa;
+	if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) {
+		ui__error("Failed to mmap the events: %s\n", strerror(errno));
+		perf_evlist__close(evlist);
+		goto out;
+	}
+
+	rc = 0;
+
+out:
+	return rc;
 }
 
 static int read_events(struct perf_kvm_stat *kvm)
@@ -749,30 +1189,13 @@ static int read_events(struct perf_kvm_stat *kvm)
 	 * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
 	 * traced in the old kernel.
 	 */
-	ret = get_cpu_isa(kvm->session);
-
+	ret = cpu_isa_config(kvm);
 	if (ret < 0)
 		return ret;
 
-	if (ret == 1) {
-		kvm->exit_reasons = vmx_exit_reasons;
-		kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
-		kvm->exit_reasons_isa = "VMX";
-	}
-
 	return perf_session__process_events(kvm->session, &kvm->tool);
 }
 
-static bool verify_vcpu(int vcpu)
-{
-	if (vcpu != -1 && vcpu < 0) {
-		pr_err("Invalid vcpu:%d.\n", vcpu);
-		return false;
-	}
-
-	return true;
-}
-
 static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
 {
 	int ret = -EINVAL;
@@ -886,6 +1309,186 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
 	return kvm_events_report_vcpu(kvm);
 }
 
+static struct perf_evlist *kvm_live_event_list(void)
+{
+	struct perf_evlist *evlist;
+	char *tp, *name, *sys;
+	unsigned int j;
+	int err = -1;
+
+	evlist = perf_evlist__new();
+	if (evlist == NULL)
+		return NULL;
+
+	for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
+
+		tp = strdup(kvm_events_tp[j]);
+		if (tp == NULL)
+			goto out;
+
+		/* split tracepoint into subsystem and name */
+		sys = tp;
+		name = strchr(tp, ':');
+		if (name == NULL) {
+			pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
+				kvm_events_tp[j]);
+			free(tp);
+			goto out;
+		}
+		*name = '\0';
+		name++;
+
+		if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+			pr_err("Failed to add %s tracepoint to the list\n", kvm_events_tp[j]);
+			free(tp);
+			goto out;
+		}
+
+		free(tp);
+	}
+
+	err = 0;
+
+out:
+	if (err) {
+		perf_evlist__delete(evlist);
+		evlist = NULL;
+	}
+
+	return evlist;
+}
+
+static int kvm_events_live(struct perf_kvm_stat *kvm,
+			   int argc, const char **argv)
+{
+	char errbuf[BUFSIZ];
+	int err;
+
+	const struct option live_options[] = {
+		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
+			"record events on existing process id"),
+		OPT_UINTEGER('m', "mmap-pages", &kvm->opts.mmap_pages,
+			"number of mmap data pages"),
+		OPT_INCR('v', "verbose", &verbose,
+			"be more verbose (show counter open errors, etc)"),
+		OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
+			"system-wide collection from all CPUs"),
+		OPT_UINTEGER('d', "display", &kvm->display_time,
+			"time in seconds between display updates"),
+		OPT_STRING(0, "event", &kvm->report_event, "report event",
+			"event for reporting: vmexit, mmio, ioport"),
+		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
+			"vcpu id to report"),
+		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
+			"key for sorting: sample(sort by samples number)"
+			" time (sort by avg time)"),
+		OPT_END()
+	};
+	const char * const live_usage[] = {
+		"perf kvm stat live [<options>]",
+		NULL
+	};
+
+
+	/* event handling */
+	kvm->tool.sample = process_sample_event;
+	kvm->tool.comm   = perf_event__process_comm;
+	kvm->tool.exit   = perf_event__process_exit;
+	kvm->tool.fork   = perf_event__process_fork;
+	kvm->tool.lost   = process_lost_event;
+	kvm->tool.ordered_samples = true;
+	perf_tool__fill_defaults(&kvm->tool);
+
+	/* set defaults */
+	kvm->display_time = 1;
+	kvm->opts.user_interval = 1;
+	kvm->opts.mmap_pages = 512;
+	kvm->opts.target.uses_mmap = false;
+	kvm->opts.target.uid_str = NULL;
+	kvm->opts.target.uid = UINT_MAX;
+
+	symbol__init();
+	disable_buildid_cache();
+
+	use_browser = 0;
+	setup_browser(false);
+
+	if (argc) {
+		argc = parse_options(argc, argv, live_options,
+				     live_usage, 0);
+		if (argc)
+			usage_with_options(live_usage, live_options);
+	}
+
+	/*
+	 * target related setups
+	 */
+	err = perf_target__validate(&kvm->opts.target);
+	if (err) {
+		perf_target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
+		ui__warning("%s", errbuf);
+	}
+
+	if (perf_target__none(&kvm->opts.target))
+		kvm->opts.target.system_wide = true;
+
+
+	/*
+	 * generate the event list
+	 */
+	kvm->evlist = kvm_live_event_list();
+	if (kvm->evlist == NULL) {
+		err = -1;
+		goto out;
+	}
+
+	symbol_conf.nr_events = kvm->evlist->nr_entries;
+
+	if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
+		usage_with_options(live_usage, live_options);
+
+	/*
+	 * perf session
+	 */
+	kvm->session = perf_session__new(NULL, O_WRONLY, false, false, &kvm->tool);
+	if (kvm->session == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+	kvm->session->evlist = kvm->evlist;
+	perf_session__set_id_hdr_size(kvm->session);
+
+
+	if (perf_target__has_task(&kvm->opts.target))
+		perf_event__synthesize_thread_map(&kvm->tool,
+						  kvm->evlist->threads,
+						  perf_event__process,
+						  &kvm->session->machines.host);
+	else
+		perf_event__synthesize_threads(&kvm->tool, perf_event__process,
+					       &kvm->session->machines.host);
+
+
+	err = kvm_live_open_events(kvm);
+	if (err)
+		goto out;
+
+	err = kvm_events_live_report(kvm);
+
+out:
+	exit_browser(0);
+
+	if (kvm->session)
+		perf_session__delete(kvm->session);
+	kvm->session = NULL;
+	if (kvm->evlist) {
+		perf_evlist__delete_maps(kvm->evlist);
+		perf_evlist__delete(kvm->evlist);
+	}
+
+	return err;
+}
+
 static void print_kvm_stat_usage(void)
 {
 	printf("Usage: perf kvm stat <command>\n\n");
@@ -893,6 +1496,7 @@ static void print_kvm_stat_usage(void)
 	printf("# Available commands:\n");
 	printf("\trecord: record kvm events\n");
 	printf("\treport: report statistical data of kvm events\n");
+	printf("\tlive:   live reporting of statistical data of kvm events\n");
 
 	printf("\nOtherwise, it is the alias of 'perf stat':\n");
 }
@@ -922,6 +1526,9 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
 	if (!strncmp(argv[1], "rep", 3))
 		return kvm_events_report(&kvm, argc - 1 , argv + 1);
 
+	if (!strncmp(argv[1], "live", 4))
+		return kvm_events_live(&kvm, argc - 1 , argv + 1);
+
 perf_stat:
 	return cmd_stat(argc, argv, NULL);
 }
-- 
cgit v0.10.2


From 62d04dbf3652264157e646d93006b2d74cf1be93 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 5 Aug 2013 21:41:35 -0400
Subject: perf kvm: Add min and max stats to display

Add max and min times for exit events.

v2: address Xiao's comment to use get_event function for pulling max and
    min from stats struct similar to mean and count

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375753297-69645-4-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 29bfca7..b6595e9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -337,14 +337,19 @@ static void clear_events_cache_stats(struct list_head *kvm_events_cache)
 	struct list_head *head;
 	struct kvm_event *event;
 	unsigned int i;
+	int j;
 
 	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
 		head = &kvm_events_cache[i];
 		list_for_each_entry(event, head, hash_entry) {
 			/* reset stats for event */
-			memset(&event->total, 0, sizeof(event->total));
-			memset(event->vcpu, 0,
-			       event->max_vcpu * sizeof(*event->vcpu));
+			event->total.time = 0;
+			init_stats(&event->total.stats);
+
+			for (j = 0; j < event->max_vcpu; ++j) {
+				event->vcpu[j].time = 0;
+				init_stats(&event->vcpu[j].stats);
+			}
 		}
 	}
 }
@@ -583,6 +588,8 @@ static int compare_kvm_event_ ## func(struct kvm_event *one,		\
 GET_EVENT_KEY(time, time);
 COMPARE_EVENT_KEY(count, stats.n);
 COMPARE_EVENT_KEY(mean, stats.mean);
+GET_EVENT_KEY(max, stats.max);
+GET_EVENT_KEY(min, stats.min);
 
 #define DEF_SORT_NAME_KEY(name, compare_key)				\
 	{ #name, compare_kvm_event_ ## compare_key }
@@ -727,20 +734,26 @@ static void print_result(struct perf_kvm_stat *kvm)
 	pr_info("%9s ", "Samples%");
 
 	pr_info("%9s ", "Time%");
+	pr_info("%10s ", "Min Time");
+	pr_info("%10s ", "Max Time");
 	pr_info("%16s ", "Avg time");
 	pr_info("\n\n");
 
 	while ((event = pop_from_result(&kvm->result))) {
-		u64 ecount, etime;
+		u64 ecount, etime, max, min;
 
 		ecount = get_event_count(event, vcpu);
 		etime = get_event_time(event, vcpu);
+		max = get_event_max(event, vcpu);
+		min = get_event_min(event, vcpu);
 
 		kvm->events_ops->decode_key(kvm, &event->key, decode);
 		pr_info("%20s ", decode);
 		pr_info("%10llu ", (unsigned long long)ecount);
 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
+		pr_info("%8" PRIu64 "us ", min / 1000);
+		pr_info("%8" PRIu64 "us ", max / 1000);
 		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
 			kvm_event_rel_stddev(vcpu, event));
 		pr_info("\n");
-- 
cgit v0.10.2


From 2e73f00fe707a8f2476d989de946c12078c7c066 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 5 Aug 2013 21:41:37 -0400
Subject: perf kvm stat report: Add option to analyze specific VM

Add an option to analyze a specific VM within a data file. This allows
the collection of kvm events for all VMs and then analyze data for each
VM (or set of VMs) individually.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375753297-69645-6-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index b6595e9..2ceec81 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -9,7 +9,7 @@
 #include "util/thread.h"
 #include "util/header.h"
 #include "util/session.h"
-
+#include "util/intlist.h"
 #include "util/parse-options.h"
 #include "util/trace-event.h"
 #include "util/debug.h"
@@ -107,6 +107,9 @@ struct perf_kvm_stat {
 	u64 total_count;
 	u64 lost_events;
 
+	const char *pid_str;
+	struct intlist *pid_list;
+
 	struct rb_root result;
 
 	int timerfd;
@@ -777,16 +780,29 @@ static int process_lost_event(struct perf_tool *tool,
 	return 0;
 }
 
+static bool skip_sample(struct perf_kvm_stat *kvm,
+			struct perf_sample *sample)
+{
+	if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL)
+		return true;
+
+	return false;
+}
+
 static int process_sample_event(struct perf_tool *tool,
 				union perf_event *event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel,
 				struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, sample->tid);
+	struct thread *thread;
 	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
 						 tool);
 
+	if (skip_sample(kvm, sample))
+		return 0;
+
+	thread = machine__findnew_thread(machine, sample->tid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			event->header.type);
@@ -1209,11 +1225,27 @@ static int read_events(struct perf_kvm_stat *kvm)
 	return perf_session__process_events(kvm->session, &kvm->tool);
 }
 
+static int parse_target_str(struct perf_kvm_stat *kvm)
+{
+	if (kvm->pid_str) {
+		kvm->pid_list = intlist__new(kvm->pid_str);
+		if (kvm->pid_list == NULL) {
+			pr_err("Error parsing process id string\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
 {
 	int ret = -EINVAL;
 	int vcpu = kvm->trace_vcpu;
 
+	if (parse_target_str(kvm) != 0)
+		goto exit;
+
 	if (!verify_vcpu(vcpu))
 		goto exit;
 
@@ -1300,6 +1332,8 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
 			    "key for sorting: sample(sort by samples number)"
 			    " time (sort by avg time)"),
+		OPT_STRING('p', "pid", &kvm->pid_str, "pid",
+			   "analyze events only for given process id(s)"),
 		OPT_END()
 	};
 
-- 
cgit v0.10.2


From d50bf78ff69297d3f60aa778c272acc8e5f59a19 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 6 Aug 2013 14:14:13 +0900
Subject: perf ui/gtk: Fix segmentation fault on perf_hpp__for_each_format loop

The commit 2b8bfa6bb8a7 ("perf tools: Centralize default columns init in
perf_hpp__init") moves initialization of common overhead column to
perf_hpp__init() but forgot about the gtk code.

So the gtk code added the same column to the list twice causing infinite
loop when iterating it by perf_hpp__for_each_format loop.  When I run
perf report --gtk, I can see following messages indefinitely.

  (perf:11687): Gtk-CRITICAL **: IA__gtk_main_quit: assertion 'main_loops != NULL' failed
  perf: Segmentation fault

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1375766056-19377-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index cb2ed198..2ca66cc 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -109,8 +109,6 @@ __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
 
 void perf_gtk__init_hpp(void)
 {
-	perf_hpp__column_enable(PERF_HPP__OVERHEAD);
-
 	perf_hpp__init();
 
 	perf_hpp__format[PERF_HPP__OVERHEAD].color =
-- 
cgit v0.10.2


From e9a7c414477d20c3cc56f90f29c35b06f0f15e25 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 6 Aug 2013 23:28:05 +1000
Subject: perf tools: Add support for pinned modifier

This commit adds support for a new modifier "D", which requests that the
event, or group of events, be pinned to the PMU.

The "p" modifier is already taken for precise, and "P" may be used in
future to mean "fully precise".

So we use "D", which stands for pinneD - and looks like a padlock, or if
you're using the ":D" syntax perf smiles at you.

This is an oft-requested feature from our HW folks, who want to be able
to run a large number of events, but also want 100% accurate results for
instructions per cycle.

Comparison of results with and without pinning:

$ perf stat -e '{cycles,instructions}:D' -e cycles,instructions,...

  79,590,480,683 cycles         #  0.000 GHz
 166,123,716,524 instructions   #  2.09  insns per cycle
                                #  0.11  stalled cycles per insn

  79,352,134,463 cycles         #  0.000 GHz                     [11.11%]
 165,178,301,818 instructions   #  2.08  insns per cycle
                                #  0.11  stalled cycles per insn [11.13%]

As you can see although perf does a very good job of scaling the values
in the non-pinned case, there is some small discrepancy.

The patch is fairly straight forward, the one detail is that we need to
make sure we only request pinning for the group leader when we have a
group.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1375795686-4226-1-git-send-email-michael@ellerman.id.au
[ Use perf_evsel__is_group_leader instead of open coded equivalent, as
  suggested by Jiri Olsa ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index eb03f06..6fce6a6 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -30,6 +30,7 @@ counted. The following modifiers exist:
  H - host counting (not in KVM guests)
  p - precise level
  S - read sample value (PERF_SAMPLE_READ)
+ D - pin the event to the PMU
 
 The 'p' modifier can be used for specifying how precise the instruction
 address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index dba877d..9cba923 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -688,6 +688,7 @@ struct event_modifier {
 	int precise;
 	int exclude_GH;
 	int sample_read;
+	int pinned;
 };
 
 static int get_event_modifier(struct event_modifier *mod, char *str,
@@ -700,6 +701,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	int eG = evsel ? evsel->attr.exclude_guest : 0;
 	int precise = evsel ? evsel->attr.precise_ip : 0;
 	int sample_read = 0;
+	int pinned = evsel ? evsel->attr.pinned : 0;
 
 	int exclude = eu | ek | eh;
 	int exclude_GH = evsel ? evsel->exclude_GH : 0;
@@ -734,6 +736,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 				eG = 1;
 		} else if (*str == 'S') {
 			sample_read = 1;
+		} else if (*str == 'D') {
+			pinned = 1;
 		} else
 			break;
 
@@ -761,6 +765,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	mod->precise = precise;
 	mod->exclude_GH = exclude_GH;
 	mod->sample_read = sample_read;
+	mod->pinned = pinned;
+
 	return 0;
 }
 
@@ -773,7 +779,7 @@ static int check_modifier(char *str)
 	char *p = str;
 
 	/* The sizeof includes 0 byte as well. */
-	if (strlen(str) > (sizeof("ukhGHpppS") - 1))
+	if (strlen(str) > (sizeof("ukhGHpppSD") - 1))
 		return -1;
 
 	while (*p) {
@@ -812,6 +818,9 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
 		evsel->attr.exclude_guest  = mod.eG;
 		evsel->exclude_GH          = mod.exclude_GH;
 		evsel->sample_read         = mod.sample_read;
+
+		if (perf_evsel__is_group_leader(evsel))
+			evsel->attr.pinned = mod.pinned;
 	}
 
 	return 0;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index b36115f..0790452 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -82,7 +82,8 @@ num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?]*
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?]*
-modifier_event	[ukhpGHS]+
+/* If you add a modifier you need to update check_modifier() */
+modifier_event	[ukhpGHSD]+
 modifier_bp	[rwx]{1,3}
 
 %%
-- 
cgit v0.10.2


From c9ee780f2736b7a149658b0cd8c8389da23e190a Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 6 Aug 2013 23:28:06 +1000
Subject: perf tests: Add tests of new pinned modifier

Add a negative test to test__checkevent_pmu_events() to get lots of
coverage of the negative case, ie. when the modifier is not specified.

Add a test of a single event, and of the group case.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1375795686-4226-2-git-send-email-michael@ellerman.id.au
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index b46379c..48114d1 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -452,6 +452,7 @@ static int test__checkevent_pmu_events(struct perf_evlist *evlist)
 			evsel->attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
 
 	return 0;
 }
@@ -1070,6 +1071,50 @@ static int test__leader_sample2(struct perf_evlist *evlist __maybe_unused)
 	return 0;
 }
 
+static int test__checkevent_pinned_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__pinned_group(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+	/* cycles - group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned);
+
+	/* cache-misses - can not be pinned, but will go on with the leader */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	/* branch-misses - ditto */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	return 0;
+}
+
 static int count_tracepoints(void)
 {
 	char events_path[PATH_MAX];
@@ -1294,6 +1339,14 @@ static struct evlist_test test__events[] = {
 		.name  = "{instructions,branch-misses}:Su",
 		.check = test__leader_sample2,
 	},
+	[40] = {
+		.name  = "instructions:uDp",
+		.check = test__checkevent_pinned_modifier,
+	},
+	[41] = {
+		.name  = "{cycles,cache-misses,branch-misses}:D",
+		.check = test__pinned_group,
+	},
 };
 
 static struct evlist_test test__events_pmu[] = {
-- 
cgit v0.10.2


From 8f76fcd902e3b3a7d6f6c695cc8bc053579eb179 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 15 Jul 2013 15:27:53 -0500
Subject: perf machine: Do not require /lib/modules/* on a guest

For some types of work loads and special guest environments, you might
have a kernel that has no kernel modules.  The perf kvm record tool
fails instantiate vmlinux maps when the kernel modules directory cannot
be opened, even though the kallsyms has been properly processed.  This
leads to a perf kvm report that has no guest symbols resolved.

This patch changes the failure to locate kernel modules to be non-fatal.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1373920073-4874-1-git-send-email-jason.wessel@windriver.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ef3b49c..6fcc358 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -806,7 +806,10 @@ static int machine__create_modules(struct machine *machine)
 	free(line);
 	fclose(file);
 
-	return machine__set_modules_path(machine);
+	if (machine__set_modules_path(machine) < 0) {
+		pr_debug("Problems setting modules path maps, continuing anyway...\n");
+	}
+	return 0;
 
 out_delete_line:
 	free(line);
-- 
cgit v0.10.2


From cecb977e24da1465cdb0ff2d10d22e5891dc3e6c Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Tue, 16 Jul 2013 10:03:34 +0900
Subject: Revert "tools lib lk: Fix for cross build"

This reverts commit 079787f209416416383c74ea5d5044be2d586f5e.

Below commit already resolve a cross build problem.
I have been noticed this too lately.

    commit 3c4797d46c14fa0c7cf733a77bd4b28875078b53
    Author: Rabin Vincent <rabin@rab.in>
    Date:   Fri May 17 22:27:44 2013 +0200

    tools lib lk: Respect CROSS_COMPILE

    Make lk use CROSS_COMPILE, in order to be able to cross compile perf
    again.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1373936614-22224-1-git-send-email-iamjoonsoo.kim@lge.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/lk/Makefile b/tools/lib/lk/Makefile
index 280dd82..3dba0a4 100644
--- a/tools/lib/lk/Makefile
+++ b/tools/lib/lk/Makefile
@@ -3,21 +3,6 @@ include ../../scripts/Makefile.include
 CC = $(CROSS_COMPILE)gcc
 AR = $(CROSS_COMPILE)ar
 
-# Makefiles suck: This macro sets a default value of $(2) for the
-# variable named by $(1), unless the variable has been set by
-# environment or command line. This is necessary for CC and AR
-# because make sets default values, so the simpler ?= approach
-# won't work as expected.
-define allow-override
-  $(if $(or $(findstring environment,$(origin $(1))),\
-            $(findstring command line,$(origin $(1)))),,\
-    $(eval $(1) = $(2)))
-endef
-
-# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-
 # guard against environment variables
 LIB_H=
 LIB_OBJS=
-- 
cgit v0.10.2


From 70f7b4a7f3f9a46807b644aa0fcfa7daa0d553e4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 21:56:38 -0400
Subject: perf kvm: Option to print events that exceed a duration

This is useful to spot high latency blips. It is normal for HLT reasons
to have long exit times, so strip those from the duration check.

v2: changed threshold to duration per acme's request

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375926999-75129-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 2ceec81..fa2f3d7 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -106,6 +106,7 @@ struct perf_kvm_stat {
 	u64 total_time;
 	u64 total_count;
 	u64 lost_events;
+	u64 duration;
 
 	const char *pid_str;
 	struct intlist *pid_list;
@@ -473,7 +474,7 @@ static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
 static bool handle_end_event(struct perf_kvm_stat *kvm,
 			     struct vcpu_event_record *vcpu_record,
 			     struct event_key *key,
-			     u64 timestamp)
+			     struct perf_sample *sample)
 {
 	struct kvm_event *event;
 	u64 time_begin, time_diff;
@@ -510,12 +511,24 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
 	vcpu_record->start_time = 0;
 
 	/* seems to happen once in a while during live mode */
-	if (timestamp < time_begin) {
+	if (sample->time < time_begin) {
 		pr_debug("End time before begin time; skipping event.\n");
 		return true;
 	}
 
-	time_diff = timestamp - time_begin;
+	time_diff = sample->time - time_begin;
+
+	if (kvm->duration && time_diff > kvm->duration) {
+		char decode[32];
+
+		kvm->events_ops->decode_key(kvm, &event->key, decode);
+		if (strcmp(decode, "HLT")) {
+			pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
+				 sample->time, sample->pid, vcpu_record->vcpu_id,
+				 decode, time_diff/1000);
+		}
+	}
+
 	return update_kvm_event(event, vcpu, time_diff);
 }
 
@@ -562,7 +575,7 @@ static bool handle_kvm_event(struct perf_kvm_stat *kvm,
 		return handle_begin_event(kvm, vcpu_record, &key, sample->time);
 
 	if (kvm->events_ops->is_end_event(evsel, sample, &key))
-		return handle_end_event(kvm, vcpu_record, &key, sample->time);
+		return handle_end_event(kvm, vcpu_record, &key, sample);
 
 	return true;
 }
@@ -1429,6 +1442,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
 			"key for sorting: sample(sort by samples number)"
 			" time (sort by avg time)"),
+		OPT_U64(0, "duration", &kvm->duration,
+		    "show events other than HALT that take longer than duration usecs"),
 		OPT_END()
 	};
 	const char * const live_usage[] = {
@@ -1467,6 +1482,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 			usage_with_options(live_usage, live_options);
 	}
 
+	kvm->duration *= NSEC_PER_USEC;   /* convert usec to nsec */
+
 	/*
 	 * target related setups
 	 */
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 32bd102..cf20187 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -125,6 +125,9 @@
 #ifndef NSEC_PER_SEC
 # define NSEC_PER_SEC			1000000000ULL
 #endif
+#ifndef NSEC_PER_USEC
+# define NSEC_PER_USEC			1000ULL
+#endif
 
 static inline unsigned long long rdclock(void)
 {
-- 
cgit v0.10.2


From 9a6d316692d59c4400a66b01db675abac432b4b2 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 21:56:39 -0400
Subject: perf kvm: Update documentation with live command

Update perf-kvm documentation with new live subcommand. Add -p/--pid
option for perf-kvm-stat-report as well.

Signed-off-by: David Ahern <dsahern@gmail.com>
Requested-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375926999-75129-2-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index 326f2cb..ac84db2 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -13,6 +13,7 @@ SYNOPSIS
 	{top|record|report|diff|buildid-list}
 'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
 	| --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat}
+'perf kvm stat [record|report|live] [<options>]
 
 DESCRIPTION
 -----------
@@ -50,6 +51,10 @@ There are a couple of variants of perf kvm:
   'perf kvm stat report' reports statistical data which includes events
   handled time, samples, and so on.
 
+  'perf kvm stat live' reports statistical data in a live mode (similar to
+  record + report but with statistical data updated live at a given display
+  rate).
+
 OPTIONS
 -------
 -i::
@@ -85,13 +90,50 @@ STAT REPORT OPTIONS
 --vcpu=<value>::
        analyze events which occures on this vcpu. (default: all vcpus)
 
---events=<value>::
-       events to be analyzed. Possible values: vmexit, mmio, ioport.
+--event=<value>::
+       event to be analyzed. Possible values: vmexit, mmio, ioport.
        (default: vmexit)
 -k::
 --key=<value>::
        Sorting key. Possible values: sample (default, sort by samples
        number), time (sort by average time).
+-p::
+--pid=::
+    Analyze events only for given process ID(s) (comma separated list).
+
+STAT LIVE OPTIONS
+-----------------
+-d::
+--display::
+        Time in seconds between display updates
+
+-m::
+--mmap-pages=::
+    Number of mmap data pages. Must be a power of two.
+
+-a::
+--all-cpus::
+        System-wide collection from all CPUs.
+
+-p::
+--pid=::
+    Analyze events only for given process ID(s) (comma separated list).
+
+--vcpu=<value>::
+       analyze events which occures on this vcpu. (default: all vcpus)
+
+
+--event=<value>::
+       event to be analyzed. Possible values: vmexit, mmio, ioport.
+       (default: vmexit)
+
+-k::
+--key=<value>::
+       Sorting key. Possible values: sample (default, sort by samples
+       number), time (sort by average time).
+
+--duration=<value>::
+       Show events other than HLT that take longer than duration usecs.
 
 SEE ALSO
 --------
-- 
cgit v0.10.2


From ad9def7ca020ef5b54968c89194f52d18ef1ef49 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 22:50:44 -0400
Subject: perf sched: Simplify arguments to read_events

Destroy argument is not necessary. If session is not returned to caller,
then clean it up.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375930261-77273-3-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 948183a..34ce57d 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1445,7 +1445,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
 	return err;
 }
 
-static int perf_sched__read_events(struct perf_sched *sched, bool destroy,
+static int perf_sched__read_events(struct perf_sched *sched,
 				   struct perf_session **psession)
 {
 	const struct perf_evsel_str_handler handlers[] = {
@@ -1480,11 +1480,10 @@ static int perf_sched__read_events(struct perf_sched *sched, bool destroy,
 		sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST];
 	}
 
-	if (destroy)
-		perf_session__delete(session);
-
 	if (psession)
 		*psession = session;
+	else
+		perf_session__delete(session);
 
 	return 0;
 
@@ -1529,8 +1528,11 @@ static int perf_sched__lat(struct perf_sched *sched)
 	struct perf_session *session;
 
 	setup_pager();
-	if (perf_sched__read_events(sched, false, &session))
+
+	/* save session -- references to threads are held in work_list */
+	if (perf_sched__read_events(sched, &session))
 		return -1;
+
 	perf_sched__sort_lat(sched);
 
 	printf("\n ---------------------------------------------------------------------------------------------------------------\n");
@@ -1565,7 +1567,7 @@ static int perf_sched__map(struct perf_sched *sched)
 	sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
 
 	setup_pager();
-	if (perf_sched__read_events(sched, true, NULL))
+	if (perf_sched__read_events(sched, NULL))
 		return -1;
 	print_bad_events(sched);
 	return 0;
@@ -1580,7 +1582,7 @@ static int perf_sched__replay(struct perf_sched *sched)
 
 	test_calibrations(sched);
 
-	if (perf_sched__read_events(sched, true, NULL))
+	if (perf_sched__read_events(sched, NULL))
 		return -1;
 
 	printf("nr_run_events:        %ld\n", sched->nr_run_events);
-- 
cgit v0.10.2


From ffb273dd7e3bd72e7d964fc0a0f6d441aceb7dae Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 22:50:45 -0400
Subject: perf sched: Remove thread lookup in sample handler

Not used in the function, so no sense in doing the lookup here. Thread
look up will be done in the timehist command, and no sense in doing it
twice.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375930261-77273-4-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 34ce57d..5285024 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1425,15 +1425,8 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
 						 struct perf_evsel *evsel,
 						 struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, sample->tid);
 	int err = 0;
 
-	if (thread == NULL) {
-		pr_debug("problem processing %s event, skipping it.\n",
-			 perf_evsel__name(evsel));
-		return -1;
-	}
-
 	evsel->hists.stats.total_period += sample->period;
 	hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
 
-- 
cgit v0.10.2


From 4a957e4df1a212c447fd162d18dc7ee6320c1621 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 22:50:46 -0400
Subject: perf sched: Remove sched_process_exit tracepoint

Event is not needed nor analyzed. Since perf-sched leverages perf-record
to capture the sched data, we already capture task events like EXIT.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375930261-77273-5-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5285024..42f4587 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1392,15 +1392,6 @@ static int process_sched_fork_event(struct perf_tool *tool,
 	return 0;
 }
 
-static int process_sched_exit_event(struct perf_tool *tool __maybe_unused,
-				    struct perf_evsel *evsel,
-				    struct perf_sample *sample __maybe_unused,
-				    struct machine *machine __maybe_unused)
-{
-	pr_debug("sched_exit event %p\n", evsel);
-	return 0;
-}
-
 static int process_sched_migrate_task_event(struct perf_tool *tool,
 					    struct perf_evsel *evsel,
 					    struct perf_sample *sample,
@@ -1447,7 +1438,6 @@ static int perf_sched__read_events(struct perf_sched *sched,
 		{ "sched:sched_wakeup",	      process_sched_wakeup_event, },
 		{ "sched:sched_wakeup_new",   process_sched_wakeup_event, },
 		{ "sched:sched_process_fork", process_sched_fork_event, },
-		{ "sched:sched_process_exit", process_sched_exit_event, },
 		{ "sched:sched_migrate_task", process_sched_migrate_task_event, },
 	};
 	struct perf_session *session;
@@ -1634,7 +1624,6 @@ static int __cmd_record(int argc, const char **argv)
 		"-e", "sched:sched_stat_sleep",
 		"-e", "sched:sched_stat_iowait",
 		"-e", "sched:sched_stat_runtime",
-		"-e", "sched:sched_process_exit",
 		"-e", "sched:sched_process_fork",
 		"-e", "sched:sched_wakeup",
 		"-e", "sched:sched_migrate_task",
-- 
cgit v0.10.2


From cb627505ae028a0cd88cc29ed72a4c168a08751d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 22:50:47 -0400
Subject: perf sched: Remove sched_process_fork tracepoint

The PERF_RECORD_FORK event is already collected as part of the use of
cmd_record and those events are analyzed as part of the libperf
machinery.  Using the fork tracepoint as well just duplicates the event
load.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375930261-77273-6-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 42f4587..f809cc7 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -109,8 +109,9 @@ struct trace_sched_handler {
 	int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
 			    struct perf_sample *sample, struct machine *machine);
 
-	int (*fork_event)(struct perf_sched *sched, struct perf_evsel *evsel,
-			  struct perf_sample *sample);
+	/* PERF_RECORD_FORK event, not sched_process_fork tracepoint */
+	int (*fork_event)(struct perf_sched *sched, union perf_event *event,
+			  struct machine *machine);
 
 	int (*migrate_task_event)(struct perf_sched *sched,
 				  struct perf_evsel *evsel,
@@ -717,22 +718,29 @@ static int replay_switch_event(struct perf_sched *sched,
 	return 0;
 }
 
-static int replay_fork_event(struct perf_sched *sched, struct perf_evsel *evsel,
-			     struct perf_sample *sample)
+static int replay_fork_event(struct perf_sched *sched,
+			     union perf_event *event,
+			     struct machine *machine)
 {
-	const char *parent_comm = perf_evsel__strval(evsel, sample, "parent_comm"),
-		   *child_comm  = perf_evsel__strval(evsel, sample, "child_comm");
-	const u32 parent_pid  = perf_evsel__intval(evsel, sample, "parent_pid"),
-		  child_pid  = perf_evsel__intval(evsel, sample, "child_pid");
+	struct thread *child, *parent;
+
+	child = machine__findnew_thread(machine, event->fork.tid);
+	parent = machine__findnew_thread(machine, event->fork.ptid);
+
+	if (child == NULL || parent == NULL) {
+		pr_debug("thread does not exist on fork event: child %p, parent %p\n",
+				 child, parent);
+		return 0;
+	}
 
 	if (verbose) {
-		printf("sched_fork event %p\n", evsel);
-		printf("... parent: %s/%d\n", parent_comm, parent_pid);
-		printf("...  child: %s/%d\n", child_comm, child_pid);
+		printf("fork event\n");
+		printf("... parent: %s/%d\n", parent->comm, parent->tid);
+		printf("...  child: %s/%d\n", child->comm, child->tid);
 	}
 
-	register_pid(sched, parent_pid, parent_comm);
-	register_pid(sched, child_pid, child_comm);
+	register_pid(sched, parent->tid, parent->comm);
+	register_pid(sched, child->tid, child->comm);
 	return 0;
 }
 
@@ -824,14 +832,6 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
 	return 0;
 }
 
-static int latency_fork_event(struct perf_sched *sched __maybe_unused,
-			      struct perf_evsel *evsel __maybe_unused,
-			      struct perf_sample *sample __maybe_unused)
-{
-	/* should insert the newcomer */
-	return 0;
-}
-
 static char sched_out_state(u64 prev_state)
 {
 	const char *str = TASK_STATE_TO_CHAR_STR;
@@ -1379,15 +1379,19 @@ static int process_sched_runtime_event(struct perf_tool *tool,
 	return 0;
 }
 
-static int process_sched_fork_event(struct perf_tool *tool,
-				    struct perf_evsel *evsel,
-				    struct perf_sample *sample,
-				    struct machine *machine __maybe_unused)
+static int perf_sched__process_fork_event(struct perf_tool *tool,
+					  union perf_event *event,
+					  struct perf_sample *sample,
+					  struct machine *machine)
 {
 	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
 
+	/* run the fork event through the perf machineruy */
+	perf_event__process_fork(tool, event, sample, machine);
+
+	/* and then run additional processing needed for this command */
 	if (sched->tp_handler->fork_event)
-		return sched->tp_handler->fork_event(sched, evsel, sample);
+		return sched->tp_handler->fork_event(sched, event, machine);
 
 	return 0;
 }
@@ -1437,7 +1441,6 @@ static int perf_sched__read_events(struct perf_sched *sched,
 		{ "sched:sched_stat_runtime", process_sched_runtime_event, },
 		{ "sched:sched_wakeup",	      process_sched_wakeup_event, },
 		{ "sched:sched_wakeup_new",   process_sched_wakeup_event, },
-		{ "sched:sched_process_fork", process_sched_fork_event, },
 		{ "sched:sched_migrate_task", process_sched_migrate_task_event, },
 	};
 	struct perf_session *session;
@@ -1652,7 +1655,7 @@ static struct perf_sched sched = {
 		.sample		 = perf_sched__process_tracepoint_sample,
 		.comm		 = perf_event__process_comm,
 		.lost		 = perf_event__process_lost,
-		.fork		 = perf_event__process_fork,
+		.fork		 = perf_sched__process_fork_event,
 		.ordered_samples = true,
 	},
 	.cmp_pid	      = LIST_HEAD_INIT(sched.cmp_pid),
@@ -1714,7 +1717,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 		.wakeup_event	    = latency_wakeup_event,
 		.switch_event	    = latency_switch_event,
 		.runtime_event	    = latency_runtime_event,
-		.fork_event	    = latency_fork_event,
 		.migrate_task_event = latency_migrate_task_event,
 	};
 	struct trace_sched_handler map_ops  = {
-- 
cgit v0.10.2


From a6ffaf91302dc1689fc72da0068b87226747fbe0 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 22:50:51 -0400
Subject: perf tool: Simplify options to perf_evsel__print_ip

Make print options based on flags. Simplifies addition of more print
options which is the subject of upcoming patches.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375930261-77273-10-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index cd616ff..ee5d6f8 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -66,6 +66,7 @@ struct output_option {
 static struct {
 	bool user_set;
 	bool wildcard_set;
+	unsigned int print_ip_opts;
 	u64 fields;
 	u64 invalid_fields;
 } output[PERF_TYPE_MAX] = {
@@ -235,6 +236,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
 {
 	int j;
 	struct perf_evsel *evsel;
+	struct perf_event_attr *attr;
 
 	for (j = 0; j < PERF_TYPE_MAX; ++j) {
 		evsel = perf_session__find_first_evtype(session, j);
@@ -253,6 +255,24 @@ static int perf_session__check_output_opt(struct perf_session *session)
 		if (evsel && output[j].fields &&
 			perf_evsel__check_attr(evsel, session))
 			return -1;
+
+		if (evsel == NULL)
+			continue;
+
+		attr = &evsel->attr;
+
+		output[j].print_ip_opts = 0;
+		if (PRINT_FIELD(IP))
+			output[j].print_ip_opts |= PRINT_IP_OPT_IP;
+
+		if (PRINT_FIELD(SYM))
+			output[j].print_ip_opts |= PRINT_IP_OPT_SYM;
+
+		if (PRINT_FIELD(DSO))
+			output[j].print_ip_opts |= PRINT_IP_OPT_DSO;
+
+		if (PRINT_FIELD(SYMOFFSET))
+			output[j].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
 	}
 
 	return 0;
@@ -382,8 +402,7 @@ static void print_sample_bts(union perf_event *event,
 		else
 			printf("\n");
 		perf_evsel__print_ip(evsel, event, sample, machine,
-				     PRINT_FIELD(SYM), PRINT_FIELD(DSO),
-				     PRINT_FIELD(SYMOFFSET));
+				     output[attr->type].print_ip_opts);
 	}
 
 	printf(" => ");
@@ -423,9 +442,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
 			printf(" ");
 		else
 			printf("\n");
+
 		perf_evsel__print_ip(evsel, event, sample, machine,
-				     PRINT_FIELD(SYM), PRINT_FIELD(DSO),
-				     PRINT_FIELD(SYMOFFSET));
+				     output[attr->type].print_ip_opts);
 	}
 
 	printf("\n");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index dedaeb2..e5fd658 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1489,10 +1489,14 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 
 void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 			  struct perf_sample *sample, struct machine *machine,
-			  int print_sym, int print_dso, int print_symoffset)
+			  unsigned int print_opts)
 {
 	struct addr_location al;
 	struct callchain_cursor_node *node;
+	int print_ip = print_opts & PRINT_IP_OPT_IP;
+	int print_sym = print_opts & PRINT_IP_OPT_SYM;
+	int print_dso = print_opts & PRINT_IP_OPT_DSO;
+	int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample,
 					  NULL) < 0) {
@@ -1516,7 +1520,9 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 			if (!node)
 				break;
 
-			printf("\t%16" PRIx64, node->ip);
+			if (print_ip)
+				printf("%16" PRIx64, node->ip);
+
 			if (print_sym) {
 				printf(" ");
 				if (print_symoffset) {
@@ -1537,7 +1543,9 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 		}
 
 	} else {
-		printf("%16" PRIx64, sample->ip);
+		if (print_ip)
+			printf("%16" PRIx64, sample->ip);
+
 		if (print_sym) {
 			printf(" ");
 			if (print_symoffset)
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 8bed17e..69e554a 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -41,6 +41,11 @@ struct perf_session {
 	char			filename[1];
 };
 
+#define PRINT_IP_OPT_IP		(1<<0)
+#define PRINT_IP_OPT_SYM		(1<<1)
+#define PRINT_IP_OPT_DSO		(1<<2)
+#define PRINT_IP_OPT_SYMOFFSET	(1<<3)
+
 struct perf_tool;
 
 struct perf_session *perf_session__new(const char *filename, int mode,
@@ -103,7 +108,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 
 void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 			  struct perf_sample *sample, struct machine *machine,
-			  int print_sym, int print_dso, int print_symoffset);
+			  unsigned int print_opts);
 
 int perf_session__cpu_bitmap(struct perf_session *session,
 			     const char *cpu_list, unsigned long *cpu_bitmap);
-- 
cgit v0.10.2


From b0b35f0179161a5e256eebffa274b0b6f023f451 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 22:50:52 -0400
Subject: perf evsel: Add option to print stack trace on single line

Option is used by upcoming timehist command.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375930261-77273-11-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e5fd658..0d895e7 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1497,6 +1497,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 	int print_sym = print_opts & PRINT_IP_OPT_SYM;
 	int print_dso = print_opts & PRINT_IP_OPT_DSO;
 	int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET;
+	int print_oneline = print_opts & PRINT_IP_OPT_ONELINE;
+	char s = print_oneline ? ' ' : '\t';
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample,
 					  NULL) < 0) {
@@ -1521,7 +1523,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 				break;
 
 			if (print_ip)
-				printf("%16" PRIx64, node->ip);
+				printf("%c%16" PRIx64, s, node->ip);
 
 			if (print_sym) {
 				printf(" ");
@@ -1537,7 +1539,9 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 				map__fprintf_dsoname(node->map, stdout);
 				printf(")");
 			}
-			printf("\n");
+
+			if (!print_oneline)
+				printf("\n");
 
 			callchain_cursor_advance(&callchain_cursor);
 		}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 69e554a..7c00ccb 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -45,6 +45,7 @@ struct perf_session {
 #define PRINT_IP_OPT_SYM		(1<<1)
 #define PRINT_IP_OPT_DSO		(1<<2)
 #define PRINT_IP_OPT_SYMOFFSET	(1<<3)
+#define PRINT_IP_OPT_ONELINE	(1<<4)
 
 struct perf_tool;
 
-- 
cgit v0.10.2


From 307cbb92aa2bdc9eed7c74409ff4d5fc9135b4e2 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 22:50:53 -0400
Subject: perf evsel: Add option to limit stack depth in callchain dumps

Option is used by upcoming timehist command.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375930261-77273-12-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ee5d6f8..33b2d83 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -402,7 +402,8 @@ static void print_sample_bts(union perf_event *event,
 		else
 			printf("\n");
 		perf_evsel__print_ip(evsel, event, sample, machine,
-				     output[attr->type].print_ip_opts);
+				     output[attr->type].print_ip_opts,
+				     PERF_MAX_STACK_DEPTH);
 	}
 
 	printf(" => ");
@@ -444,7 +445,8 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
 			printf("\n");
 
 		perf_evsel__print_ip(evsel, event, sample, machine,
-				     output[attr->type].print_ip_opts);
+				     output[attr->type].print_ip_opts,
+				     PERF_MAX_STACK_DEPTH);
 	}
 
 	printf("\n");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0d895e7..5a89964 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1489,7 +1489,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 
 void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 			  struct perf_sample *sample, struct machine *machine,
-			  unsigned int print_opts)
+			  unsigned int print_opts, unsigned int stack_depth)
 {
 	struct addr_location al;
 	struct callchain_cursor_node *node;
@@ -1517,7 +1517,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 		}
 		callchain_cursor_commit(&callchain_cursor);
 
-		while (1) {
+		while (stack_depth) {
 			node = callchain_cursor_current(&callchain_cursor);
 			if (!node)
 				break;
@@ -1544,6 +1544,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 				printf("\n");
 
 			callchain_cursor_advance(&callchain_cursor);
+
+			stack_depth--;
 		}
 
 	} else {
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 7c00ccb..3aa75fb 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -109,7 +109,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 
 void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 			  struct perf_sample *sample, struct machine *machine,
-			  unsigned int print_opts);
+			  unsigned int print_opts, unsigned int stack_depth);
 
 int perf_session__cpu_bitmap(struct perf_session *session,
 			     const char *cpu_list, unsigned long *cpu_bitmap);
-- 
cgit v0.10.2


From 93ea01c29d4ed5a9fcf6d9a95bc584e54a420834 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 7 Aug 2013 22:50:58 -0400
Subject: perf session: Change perf_session__has_traces to actually check for
 tracepoints

Any event can have RAW data attribute set. The intent of the function is
to determine if the session has tracepoints, so check for the type of
each event explicitly.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375930261-77273-17-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5a89964..4d9028e 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1401,12 +1401,15 @@ int perf_session__process_events(struct perf_session *self,
 
 bool perf_session__has_traces(struct perf_session *session, const char *msg)
 {
-	if (!(perf_evlist__sample_type(session->evlist) & PERF_SAMPLE_RAW)) {
-		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
-		return false;
+	struct perf_evsel *evsel;
+
+	list_for_each_entry(evsel, &session->evlist->entries, node) {
+		if (evsel->attr.type == PERF_TYPE_TRACEPOINT)
+			return true;
 	}
 
-	return true;
+	pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
+	return false;
 }
 
 int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
-- 
cgit v0.10.2


From 611a5ce8aa4cdb2daefbd9bed77ec3b3e9bd00ea Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 8 Aug 2013 14:32:20 +0300
Subject: perf machine: Add symbol filter to struct machine

The symbol filter needs to be applied machine-wide, so add it to struct
machine.

Currently tools pass the symbol filter as a parameter to various
map-related functions.  However a need to load a map can occur anywhere
in the code, at which point the filter is needed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375961547-30267-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 6fcc358..4c7e0a28 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -25,6 +25,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 	machine->kmaps.machine = machine;
 	machine->pid = pid;
 
+	machine->symbol_filter = NULL;
+
 	machine->root_dir = strdup(root_dir);
 	if (machine->root_dir == NULL)
 		return -ENOMEM;
@@ -95,6 +97,7 @@ void machines__init(struct machines *machines)
 {
 	machine__init(&machines->host, "", HOST_KERNEL_ID);
 	machines->guests = RB_ROOT;
+	machines->symbol_filter = NULL;
 }
 
 void machines__exit(struct machines *machines)
@@ -118,6 +121,8 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
 		return NULL;
 	}
 
+	machine->symbol_filter = machines->symbol_filter;
+
 	while (*p != NULL) {
 		parent = *p;
 		pos = rb_entry(parent, struct machine, rb_node);
@@ -133,6 +138,21 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
 	return machine;
 }
 
+void machines__set_symbol_filter(struct machines *machines,
+				 symbol_filter_t symbol_filter)
+{
+	struct rb_node *nd;
+
+	machines->symbol_filter = symbol_filter;
+	machines->host.symbol_filter = symbol_filter;
+
+	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+		machine->symbol_filter = symbol_filter;
+	}
+}
+
 struct machine *machines__find(struct machines *machines, pid_t pid)
 {
 	struct rb_node **p = &machines->guests.rb_node;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 5bb6244..603ffba 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -29,6 +29,7 @@ struct machine {
 	struct list_head  kernel_dsos;
 	struct map_groups kmaps;
 	struct map	  *vmlinux_maps[MAP__NR_TYPES];
+	symbol_filter_t	  symbol_filter;
 };
 
 static inline
@@ -51,6 +52,7 @@ typedef void (*machine__process_t)(struct machine *machine, void *data);
 struct machines {
 	struct machine host;
 	struct rb_root guests;
+	symbol_filter_t symbol_filter;
 };
 
 void machines__init(struct machines *machines);
@@ -68,6 +70,9 @@ struct machine *machines__findnew(struct machines *machines, pid_t pid);
 void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
 char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
 
+void machines__set_symbol_filter(struct machines *machines,
+				 symbol_filter_t symbol_filter);
+
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
 void machine__exit(struct machine *machine);
 void machine__delete_dead_threads(struct machine *machine);
-- 
cgit v0.10.2


From 36035f78ae1714a0762a2b38b64942d6dcb6471d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 8 Aug 2013 14:32:21 +0300
Subject: perf top: Set the machines symbol filter

Take into use the machines symbol filter member.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375961547-30267-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 440c3b3..a63ade2 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -717,7 +717,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		top->exact_samples++;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample,
-					  symbol_filter) < 0 ||
+					  machine->symbol_filter) < 0 ||
 	    al.filtered)
 		return;
 
@@ -938,6 +938,8 @@ static int __cmd_top(struct perf_top *top)
 	if (top->session == NULL)
 		return -ENOMEM;
 
+	machines__set_symbol_filter(&top->session->machines, symbol_filter);
+
 	if (!objdump_path) {
 		ret = perf_session_env__lookup_objdump(&top->session->header.env);
 		if (ret)
-- 
cgit v0.10.2


From b8681711271a0124d9495dae2e1ac0616b0ed47a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 8 Aug 2013 14:32:22 +0300
Subject: perf report: Set the machines symbol filter

Take into use the machines' symbol filter member.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375961547-30267-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index d785d89..f06a5a2 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -49,7 +49,6 @@ struct perf_report {
 	bool			mem_mode;
 	struct perf_read_values	show_threads_values;
 	const char		*pretty_printing_style;
-	symbol_filter_t		annotate_init;
 	const char		*cpu_list;
 	const char		*symbol_filter_str;
 	float			min_percent;
@@ -306,7 +305,7 @@ static int process_sample_event(struct perf_tool *tool,
 	int ret;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample,
-					  rep->annotate_init) < 0) {
+					  machine->symbol_filter) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 			event->header.type);
 		return -1;
@@ -924,7 +923,8 @@ repeat:
 	 */
 	if (use_browser == 1 && sort__has_sym) {
 		symbol_conf.priv_size = sizeof(struct annotation);
-		report.annotate_init  = symbol__annotate_init;
+		machines__set_symbol_filter(&session->machines,
+					    symbol__annotate_init);
 		/*
  		 * For searching by name on the "Browse map details".
  		 * providing it only in verbose mode not to bloat too
-- 
cgit v0.10.2


From 2d8cc6851c7b92857e1171ba5fe587d38d886161 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 8 Aug 2013 14:32:23 +0300
Subject: perf mem: Remove unused symbol filter member

Member 'annotate_init' of struct perf_mem is unused.  Remove it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375961547-30267-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index a8ff6d2..f96168c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -14,7 +14,6 @@ static const char	*mem_operation		= MEM_OPERATION_LOAD;
 struct perf_mem {
 	struct perf_tool	tool;
 	char const		*input_name;
-	symbol_filter_t		annotate_init;
 	bool			hide_unresolved;
 	bool			dump_raw;
 	const char		*cpu_list;
@@ -70,7 +69,7 @@ dump_raw_samples(struct perf_tool *tool,
 	const char *fmt;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample,
-				mem->annotate_init) < 0) {
+				NULL) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 				event->header.type);
 		return -1;
-- 
cgit v0.10.2


From 476d35c2f3a48f81691daad06bc9668c516428d9 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 8 Aug 2013 14:32:24 +0300
Subject: perf annotate: Set the machines symbol filter

Take into use the machines symbol filter member.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375961547-30267-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index db491e9..9754cb1 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -91,7 +91,7 @@ static int process_sample_event(struct perf_tool *tool,
 	struct addr_location al;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample,
-					  symbol__annotate_init) < 0) {
+					  machine->symbol_filter) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;
@@ -195,6 +195,8 @@ static int __cmd_annotate(struct perf_annotate *ann)
 	if (session == NULL)
 		return -ENOMEM;
 
+	machines__set_symbol_filter(&session->machines, symbol__annotate_init);
+
 	if (ann->cpu_list) {
 		ret = perf_session__cpu_bitmap(session, ann->cpu_list,
 					       ann->cpu_bitmap);
-- 
cgit v0.10.2


From e44baa3ea1eaa09d7d247a9b245fcff06561bf96 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 8 Aug 2013 14:32:25 +0300
Subject: perf tools: Remove filter parameter of
 perf_event__preprocess_sample()

Now that the symbol filter is recorded on the machine there is no need
to pass it to perf_event__preprocess_sample().  So remove it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375961547-30267-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 9754cb1..f988d38 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -90,8 +90,7 @@ static int process_sample_event(struct perf_tool *tool,
 	struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
 	struct addr_location al;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample,
-					  machine->symbol_filter) < 0) {
+	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 93de3ac..f28799e 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -319,7 +319,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 {
 	struct addr_location al;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) {
+	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
 			   event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index f96168c..706a1fa 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -68,8 +68,7 @@ dump_raw_samples(struct perf_tool *tool,
 	struct addr_location al;
 	const char *fmt;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample,
-				NULL) < 0) {
+	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 				event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f06a5a2..958a56a 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -304,8 +304,7 @@ static int process_sample_event(struct perf_tool *tool,
 	struct addr_location al;
 	int ret;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample,
-					  machine->symbol_filter) < 0) {
+	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		fprintf(stderr, "problem processing %d event, skipping it.\n",
 			event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 33b2d83..a7d623f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -520,7 +520,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 		return 0;
 	}
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample, 0) < 0) {
+	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		pr_err("problem processing %d event, skipping it.\n",
 		       event->header.type);
 		return -1;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a63ade2..e37521f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -716,8 +716,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
 		top->exact_samples++;
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample,
-					  machine->symbol_filter) < 0 ||
+	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0 ||
 	    al.filtered)
 		return;
 
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 89085a9..50bfb01 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -220,7 +220,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 			};
 
 			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample, 0) < 0)
+							  &sample) < 0)
 				goto out;
 
 			he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
@@ -244,7 +244,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 			};
 
 			if (perf_event__preprocess_sample(&event, machine, &al,
-							  &sample, 0) < 0)
+							  &sample) < 0)
 				goto out;
 
 			he = __hists__add_entry(&evsel->hists, &al, NULL, 1, 1);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index cc7c0c9..f3cf771 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -683,8 +683,7 @@ void thread__find_addr_location(struct thread *thread, struct machine *machine,
 int perf_event__preprocess_sample(const union perf_event *event,
 				  struct machine *machine,
 				  struct addr_location *al,
-				  struct perf_sample *sample,
-				  symbol_filter_t filter)
+				  struct perf_sample *sample)
 {
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 	struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
@@ -709,7 +708,7 @@ int perf_event__preprocess_sample(const union perf_event *event,
 		machine__create_kernel_maps(machine);
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, al, filter);
+			      event->ip.ip, al, machine->symbol_filter);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -727,7 +726,8 @@ int perf_event__preprocess_sample(const union perf_event *event,
 						   dso->long_name)))))
 			goto out_filtered;
 
-		al->sym = map__find_symbol(al->map, al->addr, filter);
+		al->sym = map__find_symbol(al->map, al->addr,
+					   machine->symbol_filter);
 	}
 
 	if (symbol_conf.sym_list &&
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 6119a64..15db071 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -234,8 +234,7 @@ struct addr_location;
 int perf_event__preprocess_sample(const union perf_event *self,
 				  struct machine *machine,
 				  struct addr_location *al,
-				  struct perf_sample *sample,
-				  symbol_filter_t filter);
+				  struct perf_sample *sample);
 
 const char *perf_event__name(unsigned int id);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4d9028e..de16a77 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1503,8 +1503,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 	int print_oneline = print_opts & PRINT_IP_OPT_ONELINE;
 	char s = print_oneline ? ' ' : '\t';
 
-	if (perf_event__preprocess_sample(event, machine, &al, sample,
-					  NULL) < 0) {
+	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		error("problem processing %d event, skipping it.\n",
 			event->header.type);
 		return;
-- 
cgit v0.10.2


From 61710bdee324aab1c148c8573ee49cea59d05874 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 8 Aug 2013 14:32:26 +0300
Subject: perf tools: Remove filter parameter of thread__find_addr_location()

Now that the symbol filter is recorded on the machine there is no need
to pass it to thread__find_addr_location().  So remove it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375961547-30267-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f3cf771..9d301c9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -670,12 +670,13 @@ try_again:
 
 void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				u8 cpumode, enum map_type type, u64 addr,
-				struct addr_location *al,
-				symbol_filter_t filter)
+				struct addr_location *al)
 {
-	thread__find_addr_map(thread, machine, cpumode, type, addr, al, filter);
+	thread__find_addr_map(thread, machine, cpumode, type, addr, al,
+			      machine->symbol_filter);
 	if (al->map != NULL)
-		al->sym = map__find_symbol(al->map, al->addr, filter);
+		al->sym = map__find_symbol(al->map, al->addr,
+					   machine->symbol_filter);
 	else
 		al->sym = NULL;
 }
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 4c7e0a28..4514e7e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1130,7 +1130,7 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread,
 		 * or else, the symbol is unknown
 		 */
 		thread__find_addr_location(thread, machine, m, MAP__FUNCTION,
-				ip, &al, NULL);
+				ip, &al);
 		if (al.sym)
 			goto found;
 	}
@@ -1148,8 +1148,8 @@ static void ip__resolve_data(struct machine *machine, struct thread *thread,
 
 	memset(&al, 0, sizeof(al));
 
-	thread__find_addr_location(thread, machine, m, MAP__VARIABLE, addr, &al,
-				   NULL);
+	thread__find_addr_location(thread, machine, m, MAP__VARIABLE, addr,
+				   &al);
 	ams->addr = addr;
 	ams->al_addr = al.addr;
 	ams->sym = al.sym;
@@ -1244,7 +1244,7 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 
 		al.filtered = false;
 		thread__find_addr_location(thread, machine, cpumode,
-					   MAP__FUNCTION, ip, &al, NULL);
+					   MAP__FUNCTION, ip, &al);
 		if (al.sym != NULL) {
 			if (sort__has_parent && !*parent &&
 			    symbol__match_regex(al.sym, &parent_regex))
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index f98d1d9..0ab47d8 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -45,8 +45,7 @@ void thread__find_addr_map(struct thread *thread, struct machine *machine,
 
 void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				u8 cpumode, enum map_type type, u64 addr,
-				struct addr_location *al,
-				symbol_filter_t filter);
+				struct addr_location *al);
 
 static inline void *thread__priv(struct thread *thread)
 {
diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index 5bbd494..abac3f9 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -473,7 +473,7 @@ static int entry(u64 ip, struct thread *thread, struct machine *machine,
 
 	thread__find_addr_location(thread, machine,
 				   PERF_RECORD_MISC_USER,
-				   MAP__FUNCTION, ip, &al, NULL);
+				   MAP__FUNCTION, ip, &al);
 
 	e.ip = ip;
 	e.map = al.map;
-- 
cgit v0.10.2


From 326f59bf645ea6c99709c67d9712df46019fa7a8 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 8 Aug 2013 14:32:27 +0300
Subject: perf tools: Remove filter parameter of thread__find_addr_map()

Now that the symbol filter is recorded on the machine there is no need
to pass it to thread__find_addr_map().  So remove it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375961547-30267-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index f012a98..1d8de2e 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -206,7 +206,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 	}
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al, NULL);
+			      event->ip.ip, &al);
 
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index a7d623f..2ad9d5b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -361,10 +361,10 @@ static void print_sample_addr(union perf_event *event,
 		return;
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      sample->addr, &al, NULL);
+			      sample->addr, &al);
 	if (!al.map)
 		thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE,
-				      sample->addr, &al, NULL);
+				      sample->addr, &al);
 
 	al.cpu = sample->cpu;
 	al.sym = NULL;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 8e0943b..eec1421 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -147,7 +147,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, addr,
-			      &al, NULL);
+			      &al);
 	if (!al.map || !al.map->dso) {
 		pr_debug("thread__find_addr_map failed\n");
 		return -1;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 3a0f508..5295625 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -33,7 +33,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 	}
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al, NULL);
+			      event->ip.ip, &al);
 
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9d301c9..49713ae 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -592,7 +592,7 @@ int perf_event__process(struct perf_tool *tool __maybe_unused,
 void thread__find_addr_map(struct thread *self,
 			   struct machine *machine, u8 cpumode,
 			   enum map_type type, u64 addr,
-			   struct addr_location *al, symbol_filter_t filter)
+			   struct addr_location *al)
 {
 	struct map_groups *mg = &self->mg;
 	bool load_map = false;
@@ -663,7 +663,7 @@ try_again:
 		 * must be done prior to using kernel maps.
 		 */
 		if (load_map)
-			map__load(al->map, filter);
+			map__load(al->map, machine->symbol_filter);
 		al->addr = al->map->map_ip(al->map, al->addr);
 	}
 }
@@ -672,8 +672,7 @@ void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				u8 cpumode, enum map_type type, u64 addr,
 				struct addr_location *al)
 {
-	thread__find_addr_map(thread, machine, cpumode, type, addr, al,
-			      machine->symbol_filter);
+	thread__find_addr_map(thread, machine, cpumode, type, addr, al);
 	if (al->map != NULL)
 		al->sym = map__find_symbol(al->map, al->addr,
 					   machine->symbol_filter);
@@ -709,7 +708,7 @@ int perf_event__preprocess_sample(const union perf_event *event,
 		machine__create_kernel_maps(machine);
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, al, machine->symbol_filter);
+			      event->ip.ip, al);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 0ab47d8..13c62c9 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -41,7 +41,7 @@ static inline struct map *thread__find_map(struct thread *self,
 
 void thread__find_addr_map(struct thread *thread, struct machine *machine,
 			   u8 cpumode, enum map_type type, u64 addr,
-			   struct addr_location *al, symbol_filter_t filter);
+			   struct addr_location *al);
 
 void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				u8 cpumode, enum map_type type, u64 addr,
diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index abac3f9..2f891f7 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -272,7 +272,7 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
 	struct addr_location al;
 
 	thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, ip, &al, NULL);
+			      MAP__FUNCTION, ip, &al);
 	return al.map;
 }
 
@@ -349,7 +349,7 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
 	ssize_t size;
 
 	thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, addr, &al, NULL);
+			      MAP__FUNCTION, addr, &al);
 	if (!al.map) {
 		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
 		return -1;
-- 
cgit v0.10.2


From 36009d07b79d2a168d6037947357d96e5d8cebe7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 6 Aug 2013 18:08:41 +0200
Subject: tracing/perf: Expand TRACE_EVENT(sched_stat_runtime)

To simplify the review of the next patches:

1. We are going to reimplent __perf_task/counter and embedd them
   into TP_ARGS(). expand TRACE_EVENT(sched_stat_runtime) into
   DECLARE_EVENT_CLASS() + DEFINE_EVENT(), this way they can use
   different TP_ARGS's.

2. Change perf_trace_##call() macro to do perf_fetch_caller_regs()
   right before perf_trace_buf_prepare().

   This way it evaluates TP_ARGS() asap, the next patch explores
   this fact.

   Note: after 87f44bbc perf_trace_buf_prepare() doesn't need
   "struct pt_regs *regs", perhaps it makes sense to remove this
   argument. And perhaps we can teach perf_trace_buf_submit()
   to accept regs == NULL and do fetch_caller_regs(CALLER_ADDR1)
   in this case.

3. Cosmetic, but the typecast from "void*" buys nothing. It just
   adds the noise, remove it.

Link: http://lkml.kernel.org/r/20130806160841.GA2736@redhat.com

Acked-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index e5586ca..249c024 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -372,7 +372,7 @@ DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
  */
-TRACE_EVENT(sched_stat_runtime,
+DECLARE_EVENT_CLASS(sched_stat_runtime,
 
 	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
 
@@ -401,6 +401,10 @@ TRACE_EVENT(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
+DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+	     TP_ARGS(tsk, runtime, vruntime));
+
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 41a6643..618af05 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -663,15 +663,14 @@ perf_trace_##call(void *__data, proto)					\
 	int __data_size;						\
 	int rctx;							\
 									\
-	perf_fetch_caller_regs(&__regs);				\
-									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare(	\
-		__entry_size, event_call->event.type, &__regs, &rctx);	\
+	perf_fetch_caller_regs(&__regs);				\
+	entry = perf_trace_buf_prepare(__entry_size,			\
+			event_call->event.type, &__regs, &rctx);	\
 	if (!entry)							\
 		return;							\
 									\
-- 
cgit v0.10.2


From 12473965c38a527a0c6f7a38d23edce60957f873 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 6 Aug 2013 18:08:44 +0200
Subject: tracing/perf: Reimplement TP_perf_assign() logic

The next patch tries to avoid the costly perf_trace_buf_* calls
when possible but there is a problem. We can only do this if
__task == NULL, perf_tp_event(task != NULL) has the additional
code for this case.

Unfortunately, TP_perf_assign/__perf_xxx which changes the default
values of __count/__task variables for perf_trace_buf_submit() is
called "too late", after we already did perf_trace_buf_prepare(),
and the optimization above can't work.

So this patch simply embeds __perf_xxx() into TP_ARGS(), this way
DECLARE_EVENT_CLASS() can use the result of assignments hidden in
"args" right after ftrace_get_offsets_##call() which is mostly
trivial. This allows us to have the fast-path "__task != NULL"
check at the start, see the next patch.

Link: http://lkml.kernel.org/r/20130806160844.GA2739@redhat.com

Tested-by: David Ahern <dsahern@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 249c024..2e7d994 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -57,7 +57,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
 
 	TP_PROTO(struct task_struct *p, int success),
 
-	TP_ARGS(p, success),
+	TP_ARGS(__perf_task(p), success),
 
 	TP_STRUCT__entry(
 		__array(	char,	comm,	TASK_COMM_LEN	)
@@ -73,9 +73,6 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
 		__entry->prio		= p->prio;
 		__entry->success	= success;
 		__entry->target_cpu	= task_cpu(p);
-	)
-	TP_perf_assign(
-		__perf_task(p);
 	),
 
 	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
@@ -313,7 +310,7 @@ DECLARE_EVENT_CLASS(sched_stat_template,
 
 	TP_PROTO(struct task_struct *tsk, u64 delay),
 
-	TP_ARGS(tsk, delay),
+	TP_ARGS(__perf_task(tsk), __perf_count(delay)),
 
 	TP_STRUCT__entry(
 		__array( char,	comm,	TASK_COMM_LEN	)
@@ -325,10 +322,6 @@ DECLARE_EVENT_CLASS(sched_stat_template,
 		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 		__entry->pid	= tsk->pid;
 		__entry->delay	= delay;
-	)
-	TP_perf_assign(
-		__perf_count(delay);
-		__perf_task(tsk);
 	),
 
 	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
@@ -376,7 +369,7 @@ DECLARE_EVENT_CLASS(sched_stat_runtime,
 
 	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
 
-	TP_ARGS(tsk, runtime, vruntime),
+	TP_ARGS(tsk, __perf_count(runtime), vruntime),
 
 	TP_STRUCT__entry(
 		__array( char,	comm,	TASK_COMM_LEN	)
@@ -390,9 +383,6 @@ DECLARE_EVENT_CLASS(sched_stat_runtime,
 		__entry->pid		= tsk->pid;
 		__entry->runtime	= runtime;
 		__entry->vruntime	= vruntime;
-	)
-	TP_perf_assign(
-		__perf_count(runtime);
 	),
 
 	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 618af05..4163d93 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -507,8 +507,14 @@ static inline notrace int ftrace_get_offsets_##call(			\
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
-#undef TP_perf_assign
-#define TP_perf_assign(args...)
+#undef __perf_addr
+#define __perf_addr(a)	(a)
+
+#undef __perf_count
+#define __perf_count(c)	(c)
+
+#undef __perf_task
+#define __perf_task(t)	(t)
 
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
@@ -636,16 +642,13 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
 #undef __perf_addr
-#define __perf_addr(a) __addr = (a)
+#define __perf_addr(a)	(__addr = (a))
 
 #undef __perf_count
-#define __perf_count(c) __count = (c)
+#define __perf_count(c)	(__count = (c))
 
 #undef __perf_task
-#define __perf_task(t) __task = (t)
-
-#undef TP_perf_assign
-#define TP_perf_assign(args...) args
+#define __perf_task(t)	(__task = (t))
 
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
-- 
cgit v0.10.2


From d027e6a9c83440bf1ca9e5503539d58d8e0914f1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 6 Aug 2013 18:08:47 +0200
Subject: tracing/perf: Avoid perf_trace_buf_*() in perf_trace_##call() when
 possible

perf_trace_buf_prepare() + perf_trace_buf_submit(task => NULL)
make no sense if hlist_empty(head). Change perf_trace_##call()
to check ->perf_events beforehand and do nothing if it is empty.

This removes the overhead for tasks without events associated
with them. For example, "perf record -e sched:sched_switch -p1"
attaches the counter(s) to the single task, but every task in
system will do perf_trace_buf_prepare/submit() just to realize
that it was not attached to this event.

However, we can only do this if __task == NULL, so we also add
the __builtin_constant_p(__task) check.

With this patch "perf bench sched pipe" shows approximately 4%
improvement when "perf record -p1" runs in parallel, many thanks
to Steven for the testing.

Link: http://lkml.kernel.org/r/20130806160847.GA2746@redhat.com

Tested-by: David Ahern <dsahern@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 4163d93..5c7ab17 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -667,6 +667,12 @@ perf_trace_##call(void *__data, proto)					\
 	int rctx;							\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+									\
+	head = this_cpu_ptr(event_call->perf_events);			\
+	if (__builtin_constant_p(!__task) && !__task &&			\
+				hlist_empty(head))			\
+		return;							\
+									\
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
@@ -681,7 +687,6 @@ perf_trace_##call(void *__data, proto)					\
 									\
 	{ assign; }							\
 									\
-	head = this_cpu_ptr(event_call->perf_events);			\
 	perf_trace_buf_submit(entry, __entry_size, rctx, __addr,	\
 		__count, &__regs, head, __task);			\
 }
-- 
cgit v0.10.2


From 309b5185047c5309bbc576025f6c5e257edd9f69 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 13 Aug 2013 22:32:12 -0600
Subject: perf tests: Fix compile failure on do_sort_something
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit b55ae0a9 added code-reading.c which fails to compile on Fedora 16
with compiler version:
$ gcc --version
gcc (GCC) 4.6.3 20120306 (Red Hat 4.6.3-2)

Failure message is:

tests/code-reading.c: In function ‘do_sort_something’:
tests/code-reading.c:305:13: error: stack protector not protecting local variables: variable length buffer [-Werror=stack-protector]
cc1: all warnings being treated as errors
make: *** [/tmp/junk/tests/code-reading.o] Error 1
make: *** Waiting for unfinished jobs....

v2: as Adrian noticed changed sizeof to ARRAY_SIZE

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1376454732-83728-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index eec1421..df9afd9 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -304,15 +304,14 @@ static int comp(const void *a, const void *b)
 
 static void do_sort_something(void)
 {
-	size_t sz = 40960;
-	int buf[sz], i;
+	int buf[40960], i;
 
-	for (i = 0; i < (int)sz; i++)
-		buf[i] = sz - i - 1;
+	for (i = 0; i < (int)ARRAY_SIZE(buf); i++)
+		buf[i] = ARRAY_SIZE(buf) - i - 1;
 
-	qsort(buf, sz, sizeof(int), comp);
+	qsort(buf, ARRAY_SIZE(buf), sizeof(int), comp);
 
-	for (i = 0; i < (int)sz; i++) {
+	for (i = 0; i < (int)ARRAY_SIZE(buf); i++) {
 		if (buf[i] != i) {
 			pr_debug("qsort failed\n");
 			break;
-- 
cgit v0.10.2


From c5885749e4ebe568cca969d43488a233e69e6454 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 14 Aug 2013 12:04:26 +0200
Subject: perf tools: Improve robustness of topology parsing code

This patch improves the robustness of the build_cpu_topo() routine by
allowing either the CPU parsing or the thread parsing to fail and yet
get perf to produce some topology data which could be useful for the
analysis.

Without this patch, if the cpu parsing fails, the thread parsing is not
attempted vice-versa.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130814100426.GA3444@quad
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f558f83..a33197a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -716,18 +716,19 @@ static int build_cpu_topo(struct cpu_topo *tp, int cpu)
 	char filename[MAXPATHLEN];
 	char *buf = NULL, *p;
 	size_t len = 0;
+	ssize_t sret;
 	u32 i = 0;
 	int ret = -1;
 
 	sprintf(filename, CORE_SIB_FMT, cpu);
 	fp = fopen(filename, "r");
 	if (!fp)
-		return -1;
-
-	if (getline(&buf, &len, fp) <= 0)
-		goto done;
+		goto try_threads;
 
+	sret = getline(&buf, &len, fp);
 	fclose(fp);
+	if (sret <= 0)
+		goto try_threads;
 
 	p = strchr(buf, '\n');
 	if (p)
@@ -743,7 +744,9 @@ static int build_cpu_topo(struct cpu_topo *tp, int cpu)
 		buf = NULL;
 		len = 0;
 	}
+	ret = 0;
 
+try_threads:
 	sprintf(filename, THRD_SIB_FMT, cpu);
 	fp = fopen(filename, "r");
 	if (!fp)
-- 
cgit v0.10.2


From 2ae3a312c0ccd8ff615372f00aab1700aac27474 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 9 Aug 2013 12:28:31 -0300
Subject: perf trace: Allow specifying which syscalls to trace

Similar to -e in strace, i.e. a comma separated list of syscall names
to trace.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-5zku7q5wug3103k1dzn3yy63@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 68718cc..3b3552a 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -26,6 +26,10 @@ OPTIONS
 --all-cpus::
         System-wide collection from all CPUs.
 
+-e::
+--expr::
+	List of events to show, currently only syscall names.
+
 -p::
 --pid=::
 	Record events on existing process ID (comma separated list).
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index da7ae01..120fdfb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -5,6 +5,7 @@
 #include "util/machine.h"
 #include "util/thread.h"
 #include "util/parse-options.h"
+#include "util/strlist.h"
 #include "util/thread_map.h"
 
 #include <libaudit.h>
@@ -47,6 +48,7 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
 struct syscall {
 	struct event_format *tp_format;
 	const char	    *name;
+	bool		    filtered;
 	struct syscall_fmt  *fmt;
 };
 
@@ -110,6 +112,7 @@ struct trace {
 	struct perf_record_opts opts;
 	struct machine		host;
 	u64			base_time;
+	struct strlist		*ev_qualifier;
 	unsigned long		nr_events;
 	bool			sched;
 	bool			multiple_threads;
@@ -226,6 +229,16 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 
 	sc = trace->syscalls.table + id;
 	sc->name = name;
+
+	if (trace->ev_qualifier && !strlist__find(trace->ev_qualifier, name)) {
+		sc->filtered = true;
+		/*
+ 		 * No need to do read tracepoint information since this will be
+ 		 * filtered out.
+ 		 */
+		return 0;
+	}
+
 	sc->fmt  = syscall_fmt__find(sc->name);
 
 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
@@ -302,11 +315,19 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	char *msg;
 	void *args;
 	size_t printed = 0;
-	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
+	struct thread *thread;
 	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
-	struct thread_trace *ttrace = thread__trace(thread);
+	struct thread_trace *ttrace;
+
+	if (sc == NULL)
+		return -1;
 
-	if (ttrace == NULL || sc == NULL)
+	if (sc->filtered)
+		return 0;
+
+	thread = machine__findnew_thread(&trace->host, sample->tid);
+	ttrace = thread__trace(thread);
+	if (ttrace == NULL)
 		return -1;
 
 	args = perf_evsel__rawptr(evsel, sample, "args");
@@ -345,11 +366,19 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 {
 	int ret;
 	u64 duration = 0;
-	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
-	struct thread_trace *ttrace = thread__trace(thread);
+	struct thread *thread;
 	struct syscall *sc = trace__syscall_info(trace, evsel, sample);
+	struct thread_trace *ttrace;
+
+	if (sc == NULL)
+		return -1;
 
-	if (ttrace == NULL || sc == NULL)
+	if (sc->filtered)
+		return 0;
+
+	thread = machine__findnew_thread(&trace->host, sample->tid);
+	ttrace = thread__trace(thread);
+	if (ttrace == NULL)
 		return -1;
 
 	ret = perf_evsel__intval(evsel, sample, "ret");
@@ -634,7 +663,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 			.mmap_pages    = 1024,
 		},
 	};
+	const char *ev_qualifier_str = NULL;
 	const struct option trace_options[] = {
+	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
+		    "list of events to trace"),
 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
 		    "trace events on existing process id"),
 	OPT_STRING(0, "tid", &trace.opts.target.tid, "tid",
@@ -660,6 +692,14 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
 
+	if (ev_qualifier_str != NULL) {
+		trace.ev_qualifier = strlist__new(true, ev_qualifier_str);
+		if (trace.ev_qualifier == NULL) {
+			puts("Not enough memory to parse event qualifier");
+			return -ENOMEM;
+		}
+	}
+
 	err = perf_target__validate(&trace.opts.target);
 	if (err) {
 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
-- 
cgit v0.10.2


From 899396cf7b4b31e08be358411ad5c0c066069ebc Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 7 Aug 2013 14:17:23 +0800
Subject: perf/x86/intel/uncore: Add auxiliary pci device support

The QPI uncore boxes have two pairs of MATCH/MASK registers that
user to filter packet traffic serviced by QPI link layer. These
registers are in auxiliary PCI devices.

This patch changes the meaning of (struct pci_device_id)->driver_data.
The first 8 bits are device index of the same uncore type, the second
8 bytes are uncore type index. Auxiliary PCI device's type is defined
as UNCORE_EXTRA_PCI_DEV(0xff)

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1375856245-10717-1-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index cad791d..7ce9b35 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -6,6 +6,8 @@ static struct intel_uncore_type **pci_uncores = empty_uncore;
 /* pci bus to socket mapping */
 static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
 
+static struct pci_dev *extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+
 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
 
 /* mask of cpus that collect uncore events */
@@ -807,43 +809,43 @@ static struct intel_uncore_type *snbep_pci_uncores[] = {
 static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
 	{ /* Home Agent */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
-		.driver_data = SNBEP_PCI_UNCORE_HA,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
 	},
 	{ /* MC Channel 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
-		.driver_data = SNBEP_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
 	},
 	{ /* MC Channel 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
-		.driver_data = SNBEP_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
 	},
 	{ /* MC Channel 2 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
-		.driver_data = SNBEP_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
 	},
 	{ /* MC Channel 3 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
-		.driver_data = SNBEP_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
 	},
 	{ /* QPI Port 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
-		.driver_data = SNBEP_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
 	},
 	{ /* QPI Port 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
-		.driver_data = SNBEP_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
 	},
 	{ /* R2PCIe */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
-		.driver_data = SNBEP_PCI_UNCORE_R2PCIE,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
 	},
 	{ /* R3QPI Link 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
-		.driver_data = SNBEP_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
 	},
 	{ /* R3QPI Link 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
-		.driver_data = SNBEP_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
 	},
 	{ /* end: all zeroes */ }
 };
@@ -1256,71 +1258,71 @@ static struct intel_uncore_type *ivt_pci_uncores[] = {
 static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
 	{ /* Home Agent 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
-		.driver_data = IVT_PCI_UNCORE_HA,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0),
 	},
 	{ /* Home Agent 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
-		.driver_data = IVT_PCI_UNCORE_HA,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 1),
 	},
 	{ /* MC0 Channel 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 0),
 	},
 	{ /* MC0 Channel 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 1),
 	},
 	{ /* MC0 Channel 3 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 2),
 	},
 	{ /* MC0 Channel 4 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 3),
 	},
 	{ /* MC1 Channel 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 4),
 	},
 	{ /* MC1 Channel 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 5),
 	},
 	{ /* MC1 Channel 3 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 6),
 	},
 	{ /* MC1 Channel 4 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
-		.driver_data = IVT_PCI_UNCORE_IMC,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7),
 	},
 	{ /* QPI0 Port 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
-		.driver_data = IVT_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0),
 	},
 	{ /* QPI0 Port 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
-		.driver_data = IVT_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 1),
 	},
 	{ /* QPI1 Port 2 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
-		.driver_data = IVT_PCI_UNCORE_QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 2),
 	},
 	{ /* R2PCIe */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
-		.driver_data = IVT_PCI_UNCORE_R2PCIE,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R2PCIE, 0),
 	},
 	{ /* R3QPI0 Link 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
-		.driver_data = IVT_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 0),
 	},
 	{ /* R3QPI0 Link 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
-		.driver_data = IVT_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 1),
 	},
 	{ /* R3QPI1 Link 2 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
-		.driver_data = IVT_PCI_UNCORE_R3QPI,
+		.driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2),
 	},
 	{ /* end: all zeroes */ }
 };
@@ -3167,16 +3169,24 @@ static bool pcidrv_registered;
 /*
  * add a pci uncore device
  */
-static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
+static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct intel_uncore_pmu *pmu;
 	struct intel_uncore_box *box;
-	int i, phys_id;
+	struct intel_uncore_type *type;
+	int phys_id;
 
 	phys_id = pcibus_to_physid[pdev->bus->number];
 	if (phys_id < 0)
 		return -ENODEV;
 
+	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+		extra_pci_dev[phys_id][UNCORE_PCI_DEV_IDX(id->driver_data)] = pdev;
+		pci_set_drvdata(pdev, NULL);
+		return 0;
+	}
+
+	type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
 	box = uncore_alloc_box(type, 0);
 	if (!box)
 		return -ENOMEM;
@@ -3185,21 +3195,11 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
 	 * for performance monitoring unit with multiple boxes,
 	 * each box has a different function id.
 	 */
-	for (i = 0; i < type->num_boxes; i++) {
-		pmu = &type->pmus[i];
-		if (pmu->func_id == pdev->devfn)
-			break;
-		if (pmu->func_id < 0) {
-			pmu->func_id = pdev->devfn;
-			break;
-		}
-		pmu = NULL;
-	}
-
-	if (!pmu) {
-		kfree(box);
-		return -EINVAL;
-	}
+	pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+	if (pmu->func_id < 0)
+		pmu->func_id = pdev->devfn;
+	else
+		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
 
 	box->phys_id = phys_id;
 	box->pci_dev = pdev;
@@ -3217,9 +3217,22 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
 static void uncore_pci_remove(struct pci_dev *pdev)
 {
 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
-	struct intel_uncore_pmu *pmu = box->pmu;
-	int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+	struct intel_uncore_pmu *pmu;
+	int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number];
 
+	box = pci_get_drvdata(pdev);
+	if (!box) {
+		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
+			if (extra_pci_dev[phys_id][i] == pdev) {
+				extra_pci_dev[phys_id][i] = NULL;
+				break;
+			}
+		}
+		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
+		return;
+	}
+
+	pmu = box->pmu;
 	if (WARN_ON_ONCE(phys_id != box->phys_id))
 		return;
 
@@ -3240,12 +3253,6 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 	kfree(box);
 }
 
-static int uncore_pci_probe(struct pci_dev *pdev,
-			    const struct pci_device_id *id)
-{
-	return uncore_pci_add(pci_uncores[id->driver_data], pdev);
-}
-
 static int __init uncore_pci_init(void)
 {
 	int ret;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 47b3d00..ede5a8c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -12,6 +12,15 @@
 #define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
 #define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
 
+#define UNCORE_PCI_DEV_DATA(type, idx)	((type << 8) | idx)
+#define UNCORE_PCI_DEV_TYPE(data)	((data >> 8) & 0xff)
+#define UNCORE_PCI_DEV_IDX(data)	(data & 0xff)
+#define UNCORE_EXTRA_PCI_DEV		0xff
+#define UNCORE_EXTRA_PCI_DEV_MAX	0
+
+/* support up to 8 sockets */
+#define UNCORE_SOCKET_MAX		8
+
 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
 
 /* SNB event control */
-- 
cgit v0.10.2


From fd1ec259ba814f0ef35dc8ae2cbd0844541b917d Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 7 Aug 2013 14:17:24 +0800
Subject: perf/x86/intel/uncore: Add filter support for QPI boxes

The QPI uncore boxes have two pairs of MATCH/MASK registers that
user to filter packet traffic serviced by QPI link layer. These
registers are in auxiliary PCI devices.

This patch adds the auxiliary PCI devices to snbep_uncore_pci_ids
and adds field definitions for the MATCH/MASK registers.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1375856245-10717-2-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 7ce9b35..6b8b9c9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -47,6 +47,24 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
 DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
+DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
+DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
 
 static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
 {
@@ -303,6 +321,24 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
 	&format_attr_edge.attr,
 	&format_attr_inv.attr,
 	&format_attr_thresh8.attr,
+	&format_attr_match_rds.attr,
+	&format_attr_match_rnid30.attr,
+	&format_attr_match_rnid4.attr,
+	&format_attr_match_dnid.attr,
+	&format_attr_match_mc.attr,
+	&format_attr_match_opc.attr,
+	&format_attr_match_vnw.attr,
+	&format_attr_match0.attr,
+	&format_attr_match1.attr,
+	&format_attr_mask_rds.attr,
+	&format_attr_mask_rnid30.attr,
+	&format_attr_mask_rnid4.attr,
+	&format_attr_mask_dnid.attr,
+	&format_attr_mask_mc.attr,
+	&format_attr_mask_opc.attr,
+	&format_attr_mask_vnw.attr,
+	&format_attr_mask0.attr,
+	&format_attr_mask1.attr,
 	NULL,
 };
 
@@ -358,13 +394,16 @@ static struct intel_uncore_ops snbep_uncore_msr_ops = {
 	SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
 };
 
+#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT()			\
+	.init_box	= snbep_uncore_pci_init_box,		\
+	.disable_box	= snbep_uncore_pci_disable_box,		\
+	.enable_box	= snbep_uncore_pci_enable_box,		\
+	.disable_event	= snbep_uncore_pci_disable_event,	\
+	.read_counter	= snbep_uncore_pci_read_counter
+
 static struct intel_uncore_ops snbep_uncore_pci_ops = {
-	.init_box	= snbep_uncore_pci_init_box,
-	.disable_box	= snbep_uncore_pci_disable_box,
-	.enable_box	= snbep_uncore_pci_enable_box,
-	.disable_event	= snbep_uncore_pci_disable_event,
-	.enable_event	= snbep_uncore_pci_enable_event,
-	.read_counter	= snbep_uncore_pci_read_counter,
+	SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+	.enable_event	= snbep_uncore_pci_enable_event,	\
 };
 
 static struct event_constraint snbep_uncore_cbox_constraints[] = {
@@ -728,6 +767,61 @@ static struct intel_uncore_type *snbep_msr_uncores[] = {
 	NULL,
 };
 
+enum {
+	SNBEP_PCI_QPI_PORT0_FILTER,
+	SNBEP_PCI_QPI_PORT1_FILTER,
+};
+
+static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+	if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
+		reg1->idx = 0;
+		reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
+		reg1->config = event->attr.config1;
+		reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
+		reg2->config = event->attr.config2;
+	}
+	return 0;
+}
+
+static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct pci_dev *pdev = box->pci_dev;
+	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+	struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+	if (reg1->idx != EXTRA_REG_NONE) {
+		int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
+		struct pci_dev *filter_pdev = extra_pci_dev[box->phys_id][idx];
+		WARN_ON_ONCE(!filter_pdev);
+		if (filter_pdev) {
+			pci_write_config_dword(filter_pdev, reg1->reg,
+						(u32)reg1->config);
+			pci_write_config_dword(filter_pdev, reg1->reg + 4,
+						(u32)(reg1->config >> 32));
+			pci_write_config_dword(filter_pdev, reg2->reg,
+						(u32)reg2->config);
+			pci_write_config_dword(filter_pdev, reg2->reg + 4,
+						(u32)(reg2->config >> 32));
+		}
+	}
+
+	pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snbep_uncore_qpi_ops = {
+	SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+	.enable_event		= snbep_qpi_enable_event,
+	.hw_config		= snbep_qpi_hw_config,
+	.get_constraint		= uncore_get_constraint,
+	.put_constraint		= uncore_put_constraint,
+};
+
 #define SNBEP_UNCORE_PCI_COMMON_INIT()				\
 	.perf_ctr	= SNBEP_PCI_PMON_CTR0,			\
 	.event_ctl	= SNBEP_PCI_PMON_CTL0,			\
@@ -757,17 +851,18 @@ static struct intel_uncore_type snbep_uncore_imc = {
 };
 
 static struct intel_uncore_type snbep_uncore_qpi = {
-	.name		= "qpi",
-	.num_counters   = 4,
-	.num_boxes	= 2,
-	.perf_ctr_bits	= 48,
-	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
-	.event_ctl	= SNBEP_PCI_PMON_CTL0,
-	.event_mask	= SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
-	.ops		= &snbep_uncore_pci_ops,
-	.event_descs	= snbep_uncore_qpi_events,
-	.format_group	= &snbep_uncore_qpi_format_group,
+	.name			= "qpi",
+	.num_counters		= 4,
+	.num_boxes		= 2,
+	.perf_ctr_bits		= 48,
+	.perf_ctr		= SNBEP_PCI_PMON_CTR0,
+	.event_ctl		= SNBEP_PCI_PMON_CTL0,
+	.event_mask		= SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SNBEP_PCI_PMON_BOX_CTL,
+	.num_shared_regs	= 1,
+	.ops			= &snbep_uncore_qpi_ops,
+	.event_descs		= snbep_uncore_qpi_events,
+	.format_group		= &snbep_uncore_qpi_format_group,
 };
 
 
@@ -847,6 +942,16 @@ static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
 	},
+	{ /* QPI Port 0 filter  */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
+		.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+						   SNBEP_PCI_QPI_PORT0_FILTER),
+	},
+	{ /* QPI Port 0 filter  */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
+		.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+						   SNBEP_PCI_QPI_PORT1_FILTER),
+	},
 	{ /* end: all zeroes */ }
 };
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index ede5a8c..628500e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -16,7 +16,7 @@
 #define UNCORE_PCI_DEV_TYPE(data)	((data >> 8) & 0xff)
 #define UNCORE_PCI_DEV_IDX(data)	(data & 0xff)
 #define UNCORE_EXTRA_PCI_DEV		0xff
-#define UNCORE_EXTRA_PCI_DEV_MAX	0
+#define UNCORE_EXTRA_PCI_DEV_MAX	2
 
 /* support up to 8 sockets */
 #define UNCORE_SOCKET_MAX		8
-- 
cgit v0.10.2


From 77b339bce3e21f7a069447fc25a414b18e36fa2e Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Tue, 13 Aug 2013 14:29:42 +0800
Subject: perf/x86/intel/uncore: Enable EV_SEL_EXT bit for PCU

This patch adds support for the SNB-EP PCU uncore PMU extra_sel_bit
(bit 21) which is missing from the documentation in Table-2.75 of
Intel Xeon Processor E5-2600 Product Family Uncore Performance
Monitoring Guide. It is referred to later in Table-2.81. Without
this selection bit explicitly enabled by the kernel, some events
such as COREx_TRANSITION_CYCLES do not count correctly.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1376375382-21350-4-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 6b8b9c9..e9696d8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -301,7 +301,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = {
 };
 
 static struct attribute *snbep_uncore_pcu_formats_attr[] = {
-	&format_attr_event.attr,
+	&format_attr_event_ext.attr,
 	&format_attr_occ_sel.attr,
 	&format_attr_edge.attr,
 	&format_attr_inv.attr,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 628500e..a80ab71 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -117,6 +117,7 @@
 				(SNBEP_PMON_CTL_EV_SEL_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
 				 SNBEP_PMON_CTL_EDGE_DET | \
+				 SNBEP_PMON_CTL_EV_SEL_EXT | \
 				 SNBEP_PMON_CTL_INVERT | \
 				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
-- 
cgit v0.10.2


From fc3b86d673e41ac66b4ba5b75a90c2fcafb90089 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 2 Aug 2013 18:29:54 +0200
Subject: perf: Roll back callchain buffer refcount under the callchain mutex

When we fail to allocate the callchain buffers, we roll back the refcount
we did and return from get_callchain_buffers().

However we take the refcount and allocate under the callchain lock
but the rollback is done outside the lock.

As a result, while we roll back, some concurrent callchain user may
call get_callchain_buffers(), see the non-zero refcount and give up
because the buffers are NULL without itself retrying the allocation.

The consequences aren't that bad but that behaviour looks weird enough and
it's better to give their chances to the following callchain users where
we failed.

Reported-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1375460996-16329-2-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 76a8bc5..97b67df 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -116,10 +116,11 @@ int get_callchain_buffers(void)
 
 	err = alloc_callchain_buffers();
 exit:
-	mutex_unlock(&callchain_mutex);
 	if (err)
 		atomic_dec(&nr_callchain_events);
 
+	mutex_unlock(&callchain_mutex);
+
 	return err;
 }
 
-- 
cgit v0.10.2


From 948b26b6ddd08a57cb95ebb0dc96fde2edd5c383 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 2 Aug 2013 18:29:55 +0200
Subject: perf: Account freq events globally

Freq events may not always be affine to a particular CPU. As such,
account_event_cpu() may crash if we account per cpu a freq event
that has event->cpu == -1.

To solve this, lets account freq events globally. In practice
this doesn't change much the picture because perf tools create
per-task perf events with one event per CPU by default. Profiling a
single CPU is usually a corner case so there is no much point in
optimizing things that way.

Reported-by: Jiri Olsa <jolsa@redhat.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1375460996-16329-3-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index e82e700..2e675e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -141,11 +141,11 @@ enum event_type_t {
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
-static DEFINE_PER_CPU(atomic_t, perf_freq_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
+static atomic_t nr_freq_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -1871,9 +1871,6 @@ static int  __perf_install_in_context(void *info)
 	perf_pmu_enable(cpuctx->ctx.pmu);
 	perf_ctx_unlock(cpuctx, task_ctx);
 
-	if (atomic_read(&__get_cpu_var(perf_freq_events)))
-		tick_nohz_full_kick();
-
 	return 0;
 }
 
@@ -2811,7 +2808,7 @@ done:
 #ifdef CONFIG_NO_HZ_FULL
 bool perf_event_can_stop_tick(void)
 {
-	if (atomic_read(&__get_cpu_var(perf_freq_events)) ||
+	if (atomic_read(&nr_freq_events) ||
 	    __this_cpu_read(perf_throttled_count))
 		return false;
 	else
@@ -3140,9 +3137,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 	}
 	if (is_cgroup_event(event))
 		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
-
-	if (event->attr.freq)
-		atomic_dec(&per_cpu(perf_freq_events, cpu));
 }
 
 static void unaccount_event(struct perf_event *event)
@@ -3158,6 +3152,8 @@ static void unaccount_event(struct perf_event *event)
 		atomic_dec(&nr_comm_events);
 	if (event->attr.task)
 		atomic_dec(&nr_task_events);
+	if (event->attr.freq)
+		atomic_dec(&nr_freq_events);
 	if (is_cgroup_event(event))
 		static_key_slow_dec_deferred(&perf_sched_events);
 	if (has_branch_stack(event))
@@ -6489,9 +6485,6 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 	}
 	if (is_cgroup_event(event))
 		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
-
-	if (event->attr.freq)
-		atomic_inc(&per_cpu(perf_freq_events, cpu));
 }
 
 static void account_event(struct perf_event *event)
@@ -6507,6 +6500,10 @@ static void account_event(struct perf_event *event)
 		atomic_inc(&nr_comm_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
+	if (event->attr.freq) {
+		if (atomic_inc_return(&nr_freq_events) == 1)
+			tick_nohz_full_kick_all();
+	}
 	if (has_branch_stack(event))
 		static_key_slow_inc(&perf_sched_events.key);
 	if (is_cgroup_event(event))
-- 
cgit v0.10.2


From 5ec4c599a52362896c3e7c6a31ba6145dca9c6f5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 2 Aug 2013 21:16:30 +0200
Subject: perf: Do not compute time values unnecessarily

We should not be calling calc_timer_values() for events that do not actually
have an mmap()'ed userpage.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20130802191630.GT27162@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2e675e8..928fae7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3670,6 +3670,10 @@ void perf_event_update_userpage(struct perf_event *event)
 	u64 enabled, running, now;
 
 	rcu_read_lock();
+	rb = rcu_dereference(event->rb);
+	if (!rb)
+		goto unlock;
+
 	/*
 	 * compute total_time_enabled, total_time_running
 	 * based on snapshot values taken when the event
@@ -3680,12 +3684,8 @@ void perf_event_update_userpage(struct perf_event *event)
 	 * NMI context
 	 */
 	calc_timer_values(event, &now, &enabled, &running);
-	rb = rcu_dereference(event->rb);
-	if (!rb)
-		goto unlock;
 
 	userpg = rb->user_page;
-
 	/*
 	 * Disable preemption so as to not let the corresponding user-space
 	 * spin too long if we get preempted.
-- 
cgit v0.10.2


From fe32ee0fc1c43f780529da79329d8c87ebc51464 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 14 Aug 2013 14:36:13 -0600
Subject: perf kvm: Remove force option to cmd_record

4a4d371a missed builtin-kvm in the cleanup to remove the force option
to cmd_record.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1376512573-85012-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index fa2f3d7..6cd4de5 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1305,7 +1305,6 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 	const char * const record_args[] = {
 		"record",
 		"-R",
-		"-f",
 		"-m", "1024",
 		"-c", "1",
 	};
-- 
cgit v0.10.2


From bec196720431db2fd6a9b03cbd77eb336e6f52de Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sun, 4 Aug 2013 19:41:26 -0700
Subject: perf tools: Try to increase the file descriptor limits on EMFILE

perf stat -a needs 10 open file descriptors per logical CPU
perf stat -a -dddd needs 20 open fds for each.

This implies that stat -a doesn't work on any system with the default
ulimit -n 1024 which has more than ~100 CPUs and stat -a -dddd doesn't
work on anything with more than 46 CPUs.

Longer term there needs to be probably some way to lower the file
descriptor requirements. This would need some changes in the kernel/user
interface.

But short term this patch just tries to increase the file descriptor
limit in perf itself, when it runs into a EMFILE.

It first sets it to the hard limit, and then tries to increase the hard
limit.

On Fedora systems the default seems to be soft limit 1024 and hard limit
4*1024. So even non root can support 409 or 186 CPUs respectively. root
can go far higher.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1375670486-15480-1-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 960394e..a29c8d0 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -13,6 +13,7 @@
 #include <traceevent/event-parse.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/perf_event.h>
+#include <sys/resource.h>
 #include "asm/bug.h"
 #include "evsel.h"
 #include "evlist.h"
@@ -867,6 +868,7 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 	int cpu, thread;
 	unsigned long flags = 0;
 	int pid = -1, err;
+	enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
 
 	if (evsel->fd == NULL &&
 	    perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
@@ -894,6 +896,7 @@ retry_sample_id:
 
 			group_fd = get_group_fd(evsel, cpu, thread);
 
+retry_open:
 			FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
 								     pid,
 								     cpus->map[cpu],
@@ -902,12 +905,37 @@ retry_sample_id:
 				err = -errno;
 				goto try_fallback;
 			}
+			set_rlimit = NO_CHANGE;
 		}
 	}
 
 	return 0;
 
 try_fallback:
+	/*
+	 * perf stat needs between 5 and 22 fds per CPU. When we run out
+	 * of them try to increase the limits.
+	 */
+	if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
+		struct rlimit l;
+		int old_errno = errno;
+
+		if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+			if (set_rlimit == NO_CHANGE)
+				l.rlim_cur = l.rlim_max;
+			else {
+				l.rlim_cur = l.rlim_max + 1000;
+				l.rlim_max = l.rlim_cur;
+			}
+			if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
+				set_rlimit++;
+				errno = old_errno;
+				goto retry_open;
+			}
+		}
+		errno = old_errno;
+	}
+
 	if (err != -EINVAL || cpu > 0 || thread > 0)
 		goto out_close;
 
-- 
cgit v0.10.2


From 8afb4c018e21c882c8fad196772ef74d494185e2 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 14 Aug 2013 15:48:23 +0300
Subject: perf tools: Re-implement debug print function for linking
 python/perf.so

The python/perf.so python binding links a subset of objects.

Re-implement 'verbose' and 'eprintf' so they (and consequently
'pr_debug') can be used in objects linked into pythin/perf.so.

Note 'eprintf' must be re-implemented because the full version links the
browser ui.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1376484517-5339-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 925e0c3..381f4fd 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -8,6 +8,26 @@
 #include "cpumap.h"
 #include "thread_map.h"
 
+/*
+ * Support debug printing even though util/debug.c is not linked.  That means
+ * implementing 'verbose' and 'eprintf'.
+ */
+int verbose;
+
+int eprintf(int level, const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (verbose >= level) {
+		va_start(args, fmt);
+		ret = vfprintf(stderr, fmt, args);
+		va_end(args);
+	}
+
+	return ret;
+}
+
 /* Define PyVarObject_HEAD_INIT for python 2.5 */
 #ifndef PyVarObject_HEAD_INIT
 # define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
-- 
cgit v0.10.2


From e3e1a54fce81ee045dd152deb5435b136cb0b75f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 14 Aug 2013 15:48:24 +0300
Subject: perf tools: Add debug prints

It is useful to see the arguments to perf_event_open and whether the
perf events ring buffer was mmapped per-cpu or per-thread.

That information will now be displayed when verbose is 2 i.e option -vv.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1376484517-5339-3-git-send-email-adrian.hunter@intel.com
[ fixup trivial conflict with fcb14f7 ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index c7d111f..1f5105a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -14,6 +14,7 @@
 #include "target.h"
 #include "evlist.h"
 #include "evsel.h"
+#include "debug.h"
 #include <unistd.h>
 
 #include "parse-events.h"
@@ -486,6 +487,7 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m
 	int nr_cpus = cpu_map__nr(evlist->cpus);
 	int nr_threads = thread_map__nr(evlist->threads);
 
+	pr_debug2("perf event ring buffer mmapped per cpu\n");
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		int output = -1;
 
@@ -524,6 +526,7 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in
 	int thread;
 	int nr_threads = thread_map__nr(evlist->threads);
 
+	pr_debug2("perf event ring buffer mmapped per thread\n");
 	for (thread = 0; thread < nr_threads; thread++) {
 		int output = -1;
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a29c8d0..47cbe1e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -22,6 +22,7 @@
 #include "thread_map.h"
 #include "target.h"
 #include "perf_regs.h"
+#include "debug.h"
 
 static struct {
 	bool sample_id_all;
@@ -862,6 +863,65 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
 	return fd;
 }
 
+#define __PRINT_ATTR(fmt, cast, field)  \
+	fprintf(fp, "  %-19s "fmt"\n", #field, cast attr->field)
+
+#define PRINT_ATTR_U32(field)  __PRINT_ATTR("%u" , , field)
+#define PRINT_ATTR_X32(field)  __PRINT_ATTR("%#x", , field)
+#define PRINT_ATTR_U64(field)  __PRINT_ATTR("%" PRIu64, (uint64_t), field)
+#define PRINT_ATTR_X64(field)  __PRINT_ATTR("%#"PRIx64, (uint64_t), field)
+
+#define PRINT_ATTR2N(name1, field1, name2, field2)	\
+	fprintf(fp, "  %-19s %u    %-19s %u\n",		\
+	name1, attr->field1, name2, attr->field2)
+
+#define PRINT_ATTR2(field1, field2) \
+	PRINT_ATTR2N(#field1, field1, #field2, field2)
+
+static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp)
+{
+	size_t ret = 0;
+
+	ret += fprintf(fp, "%.60s\n", graph_dotted_line);
+	ret += fprintf(fp, "perf_event_attr:\n");
+
+	ret += PRINT_ATTR_U32(type);
+	ret += PRINT_ATTR_U32(size);
+	ret += PRINT_ATTR_X64(config);
+	ret += PRINT_ATTR_U64(sample_period);
+	ret += PRINT_ATTR_U64(sample_freq);
+	ret += PRINT_ATTR_X64(sample_type);
+	ret += PRINT_ATTR_X64(read_format);
+
+	ret += PRINT_ATTR2(disabled, inherit);
+	ret += PRINT_ATTR2(pinned, exclusive);
+	ret += PRINT_ATTR2(exclude_user, exclude_kernel);
+	ret += PRINT_ATTR2(exclude_hv, exclude_idle);
+	ret += PRINT_ATTR2(mmap, comm);
+	ret += PRINT_ATTR2(freq, inherit_stat);
+	ret += PRINT_ATTR2(enable_on_exec, task);
+	ret += PRINT_ATTR2(watermark, precise_ip);
+	ret += PRINT_ATTR2(mmap_data, sample_id_all);
+	ret += PRINT_ATTR2(exclude_host, exclude_guest);
+	ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel,
+			    "excl.callchain_user", exclude_callchain_user);
+
+	ret += PRINT_ATTR_U32(wakeup_events);
+	ret += PRINT_ATTR_U32(wakeup_watermark);
+	ret += PRINT_ATTR_X32(bp_type);
+	ret += PRINT_ATTR_X64(bp_addr);
+	ret += PRINT_ATTR_X64(config1);
+	ret += PRINT_ATTR_U64(bp_len);
+	ret += PRINT_ATTR_X64(config2);
+	ret += PRINT_ATTR_X64(branch_sample_type);
+	ret += PRINT_ATTR_X64(sample_regs_user);
+	ret += PRINT_ATTR_U32(sample_stack_user);
+
+	ret += fprintf(fp, "%.60s\n", graph_dotted_line);
+
+	return ret;
+}
+
 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 			      struct thread_map *threads)
 {
@@ -886,6 +946,9 @@ retry_sample_id:
 	if (perf_missing_features.sample_id_all)
 		evsel->attr.sample_id_all = 0;
 
+	if (verbose >= 2)
+		perf_event_attr__fprintf(&evsel->attr, stderr);
+
 	for (cpu = 0; cpu < cpus->nr; cpu++) {
 
 		for (thread = 0; thread < threads->nr; thread++) {
@@ -895,8 +958,10 @@ retry_sample_id:
 				pid = threads->map[thread];
 
 			group_fd = get_group_fd(evsel, cpu, thread);
-
 retry_open:
+			pr_debug2("perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx\n",
+				  pid, cpus->map[cpu], group_fd, flags);
+
 			FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
 								     pid,
 								     cpus->map[cpu],
-- 
cgit v0.10.2


From c24ff998fc420891f17d73acab6766823d492175 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Aug 2013 12:01:10 -0300
Subject: perf trace: Implement -o/--output filename

To output all 'trace' output to a filename, just like 'strace -ofile'

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-6q1homkwoayhmoq64y5vhel6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 3b3552a..2794efc 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -30,6 +30,10 @@ OPTIONS
 --expr::
 	List of events to show, currently only syscall names.
 
+-o::
+--output=::
+	Output file name.
+
 -p::
 --pid=::
 	Record events on existing process ID (comma separated list).
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 120fdfb..4235316 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -63,7 +63,7 @@ static size_t fprintf_duration(unsigned long t, FILE *fp)
 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
 	else
 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
-	return printed + fprintf(stdout, "): ");
+	return printed + fprintf(fp, "): ");
 }
 
 struct thread_trace {
@@ -80,7 +80,7 @@ static struct thread_trace *thread_trace__new(void)
 	return zalloc(sizeof(struct thread_trace));
 }
 
-static struct thread_trace *thread__trace(struct thread *thread)
+static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
 {
 	struct thread_trace *ttrace;
 
@@ -98,12 +98,13 @@ static struct thread_trace *thread__trace(struct thread *thread)
 
 	return ttrace;
 fail:
-	color_fprintf(stdout, PERF_COLOR_RED,
+	color_fprintf(fp, PERF_COLOR_RED,
 		      "WARNING: not enough memory, dropping samples!\n");
 	return NULL;
 }
 
 struct trace {
+	struct perf_tool	tool;
 	int			audit_machine;
 	struct {
 		int		max;
@@ -112,6 +113,7 @@ struct trace {
 	struct perf_record_opts opts;
 	struct machine		host;
 	u64			base_time;
+	FILE			*output;
 	struct strlist		*ev_qualifier;
 	unsigned long		nr_events;
 	bool			sched;
@@ -151,13 +153,14 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre
 	return printed;
 }
 
-static int trace__process_event(struct machine *machine, union perf_event *event)
+static int trace__process_event(struct trace *trace, struct machine *machine,
+				union perf_event *event)
 {
 	int ret = 0;
 
 	switch (event->header.type) {
 	case PERF_RECORD_LOST:
-		color_fprintf(stdout, PERF_COLOR_RED,
+		color_fprintf(trace->output, PERF_COLOR_RED,
 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
 		ret = machine__process_lost_event(machine, event);
 	default:
@@ -168,12 +171,13 @@ static int trace__process_event(struct machine *machine, union perf_event *event
 	return ret;
 }
 
-static int trace__tool_process(struct perf_tool *tool __maybe_unused,
+static int trace__tool_process(struct perf_tool *tool,
 			       union perf_event *event,
 			       struct perf_sample *sample __maybe_unused,
 			       struct machine *machine)
 {
-	return trace__process_event(machine, event);
+	struct trace *trace = container_of(tool, struct trace, tool);
+	return trace__process_event(trace, machine, event);
 }
 
 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
@@ -187,11 +191,11 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 	machine__create_kernel_maps(&trace->host);
 
 	if (perf_target__has_task(&trace->opts.target)) {
-		err = perf_event__synthesize_thread_map(NULL, evlist->threads,
+		err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads,
 							trace__tool_process,
 							&trace->host);
 	} else {
-		err = perf_event__synthesize_threads(NULL, trace__tool_process,
+		err = perf_event__synthesize_threads(&trace->tool, trace__tool_process,
 						     &trace->host);
 	}
 
@@ -288,7 +292,7 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 	int id = perf_evsel__intval(evsel, sample, "id");
 
 	if (id < 0) {
-		printf("Invalid syscall %d id, skipping...\n", id);
+		fprintf(trace->output, "Invalid syscall %d id, skipping...\n", id);
 		return NULL;
 	}
 
@@ -302,10 +306,10 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 	return &trace->syscalls.table[id];
 
 out_cant_read:
-	printf("Problems reading syscall %d", id);
+	fprintf(trace->output, "Problems reading syscall %d", id);
 	if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
-		printf("(%s)", trace->syscalls.table[id].name);
-	puts(" information");
+		fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
+	fputs(" information", trace->output);
 	return NULL;
 }
 
@@ -326,13 +330,13 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 		return 0;
 
 	thread = machine__findnew_thread(&trace->host, sample->tid);
-	ttrace = thread__trace(thread);
+	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
 		return -1;
 
 	args = perf_evsel__rawptr(evsel, sample, "args");
 	if (args == NULL) {
-		printf("Problems reading syscall arguments\n");
+		fprintf(trace->output, "Problems reading syscall arguments\n");
 		return -1;
 	}
 
@@ -352,8 +356,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
 	if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
 		if (!trace->duration_filter) {
-			trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout);
-			printf("%-70s\n", ttrace->entry_str);
+			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
+			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
 		}
 	} else
 		ttrace->entry_pending = true;
@@ -377,7 +381,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 		return 0;
 
 	thread = machine__findnew_thread(&trace->host, sample->tid);
-	ttrace = thread__trace(thread);
+	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
 		return -1;
 
@@ -394,14 +398,14 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	} else if (trace->duration_filter)
 		goto out;
 
-	trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout);
+	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
 
 	if (ttrace->entry_pending) {
-		printf("%-70s", ttrace->entry_str);
+		fprintf(trace->output, "%-70s", ttrace->entry_str);
 	} else {
-		printf(" ... [");
-		color_fprintf(stdout, PERF_COLOR_YELLOW, "continued");
-		printf("]: %s()", sc->name);
+		fprintf(trace->output, " ... [");
+		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
+		fprintf(trace->output, "]: %s()", sc->name);
 	}
 
 	if (ret < 0 && sc->fmt && sc->fmt->errmsg) {
@@ -409,13 +413,13 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
 			   *e = audit_errno_to_name(-ret);
 
-		printf(") = -1 %s %s", e, emsg);
+		fprintf(trace->output, ") = -1 %s %s", e, emsg);
 	} else if (ret == 0 && sc->fmt && sc->fmt->timeout)
-		printf(") = 0 Timeout");
+		fprintf(trace->output, ") = 0 Timeout");
 	else
-		printf(") = %d", ret);
+		fprintf(trace->output, ") = %d", ret);
 
-	putchar('\n');
+	fputc('\n', trace->output);
 out:
 	ttrace->entry_pending = false;
 
@@ -428,7 +432,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
 	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
-	struct thread_trace *ttrace = thread__trace(thread);
+	struct thread_trace *ttrace = thread__trace(thread, trace->output);
 
 	if (ttrace == NULL)
 		goto out_dump;
@@ -438,7 +442,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
 	return 0;
 
 out_dump:
-	printf("%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
+	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
 	       evsel->name,
 	       perf_evsel__strval(evsel, sample, "comm"),
 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
@@ -456,32 +460,32 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	const bool forks = argc > 0;
 
 	if (evlist == NULL) {
-		printf("Not enough memory to run!\n");
+		fprintf(trace->output, "Not enough memory to run!\n");
 		goto out;
 	}
 
 	if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
 	    perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
-		printf("Couldn't read the raw_syscalls tracepoints information!\n");
+		fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n");
 		goto out_delete_evlist;
 	}
 
 	if (trace->sched &&
 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
 				   trace__sched_stat_runtime)) {
-		printf("Couldn't read the sched_stat_runtime tracepoint information!\n");
+		fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n");
 		goto out_delete_evlist;
 	}
 
 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
 	if (err < 0) {
-		printf("Problems parsing the target to trace, check your options!\n");
+		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
 		goto out_delete_evlist;
 	}
 
 	err = trace__symbols_init(trace, evlist);
 	if (err < 0) {
-		printf("Problems initializing symbol libraries!\n");
+		fprintf(trace->output, "Problems initializing symbol libraries!\n");
 		goto out_delete_maps;
 	}
 
@@ -494,20 +498,20 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
 						    argv, false, false);
 		if (err < 0) {
-			printf("Couldn't run the workload!\n");
+			fprintf(trace->output, "Couldn't run the workload!\n");
 			goto out_delete_maps;
 		}
 	}
 
 	err = perf_evlist__open(evlist);
 	if (err < 0) {
-		printf("Couldn't create the events: %s\n", strerror(errno));
+		fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno));
 		goto out_delete_maps;
 	}
 
 	err = perf_evlist__mmap(evlist, UINT_MAX, false);
 	if (err < 0) {
-		printf("Couldn't mmap the events: %s\n", strerror(errno));
+		fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
 		goto out_close_evlist;
 	}
 
@@ -532,7 +536,7 @@ again:
 
 			err = perf_evlist__parse_sample(evlist, event, &sample);
 			if (err) {
-				printf("Can't parse sample, err = %d, skipping...\n", err);
+				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
 				continue;
 			}
 
@@ -540,18 +544,18 @@ again:
 				trace->base_time = sample.time;
 
 			if (type != PERF_RECORD_SAMPLE) {
-				trace__process_event(&trace->host, event);
+				trace__process_event(trace, &trace->host, event);
 				continue;
 			}
 
 			evsel = perf_evlist__id2evsel(evlist, sample.id);
 			if (evsel == NULL) {
-				printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
+				fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
 				continue;
 			}
 
 			if (sample.raw_data == NULL) {
-				printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
+				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
 				       perf_evsel__name(evsel), sample.tid,
 				       sample.cpu, sample.raw_size);
 				continue;
@@ -640,6 +644,23 @@ static int trace__set_duration(const struct option *opt, const char *str,
 	return 0;
 }
 
+static int trace__open_output(struct trace *trace, const char *filename)
+{
+	struct stat st;
+
+	if (!stat(filename, &st) && st.st_size) {
+		char oldname[PATH_MAX];
+
+		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
+		unlink(oldname);
+		rename(filename, oldname);
+	}
+
+	trace->output = fopen(filename, "w");
+
+	return trace->output == NULL ? -errno : 0;
+}
+
 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const trace_usage[] = {
@@ -662,11 +683,14 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 			.no_delay      = true,
 			.mmap_pages    = 1024,
 		},
+		.output = stdout,
 	};
+	const char *output_name = NULL;
 	const char *ev_qualifier_str = NULL;
 	const struct option trace_options[] = {
 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
 		    "list of events to trace"),
+	OPT_STRING('o', "output", &output_name, "file", "output file name"),
 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
 		    "trace events on existing process id"),
 	OPT_STRING(0, "tid", &trace.opts.target.tid, "tid",
@@ -692,26 +716,36 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	argc = parse_options(argc, argv, trace_options, trace_usage, 0);
 
+	if (output_name != NULL) {
+		err = trace__open_output(&trace, output_name);
+		if (err < 0) {
+			perror("failed to create output file");
+			goto out;
+		}
+	}
+
 	if (ev_qualifier_str != NULL) {
 		trace.ev_qualifier = strlist__new(true, ev_qualifier_str);
 		if (trace.ev_qualifier == NULL) {
-			puts("Not enough memory to parse event qualifier");
-			return -ENOMEM;
+			fputs("Not enough memory to parse event qualifier",
+			      trace.output);
+			err = -ENOMEM;
+			goto out_close;
 		}
 	}
 
 	err = perf_target__validate(&trace.opts.target);
 	if (err) {
 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
-		printf("%s", bf);
-		return err;
+		fprintf(trace.output, "%s", bf);
+		goto out_close;
 	}
 
 	err = perf_target__parse_uid(&trace.opts.target);
 	if (err) {
 		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
-		printf("%s", bf);
-		return err;
+		fprintf(trace.output, "%s", bf);
+		goto out_close;
 	}
 
 	if (!argc && perf_target__none(&trace.opts.target))
@@ -720,7 +754,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	err = trace__run(&trace, argc, argv);
 
 	if (trace.sched && !err)
-		trace__fprintf_thread_summary(&trace, stdout);
+		trace__fprintf_thread_summary(&trace, trace.output);
 
+out_close:
+	if (output_name != NULL)
+		fclose(trace.output);
+out:
 	return err;
 }
-- 
cgit v0.10.2


From ac9be8ee4ecdeae73c78d84ebfe37009e11cf99d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 20 Aug 2013 11:15:45 -0600
Subject: perf trace: Make command line arguments consistent with perf-record

Common arguments like thread id, CPU list, mmap pages, etc should be
consistent across perf commands.

v3: Updated man page
v2: rebased to latest core branch

Signed-off-by: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1377018945-21940-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 2794efc..cb3371e 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -23,6 +23,7 @@ analysis phases.
 OPTIONS
 -------
 
+-a::
 --all-cpus::
         System-wide collection from all CPUs.
 
@@ -38,18 +39,23 @@ OPTIONS
 --pid=::
 	Record events on existing process ID (comma separated list).
 
+-t::
 --tid=::
         Record events on existing thread ID (comma separated list).
 
+-u::
 --uid=::
         Record events in threads owned by uid. Name or number.
 
+-i::
 --no-inherit::
 	Child tasks do not inherit counters.
 
+-m::
 --mmap-pages=::
 	Number of mmap data pages. Must be a power of two.
 
+-C::
 --cpu::
 Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
 comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4235316..9891d8c 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -693,17 +693,17 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
 		    "trace events on existing process id"),
-	OPT_STRING(0, "tid", &trace.opts.target.tid, "tid",
+	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
 		    "trace events on existing thread id"),
-	OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide,
+	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
 		    "system-wide collection from all CPUs"),
-	OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu",
+	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
 		    "list of cpus to monitor"),
-	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
+	OPT_BOOLEAN('i', "no-inherit", &trace.opts.no_inherit,
 		    "child tasks do not inherit counters"),
-	OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages,
+	OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
 		     "number of mmap data pages"),
-	OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user",
+	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
 		   "user to profile"),
 	OPT_CALLBACK(0, "duration", &trace, "float",
 		     "show only events with duration > N.M ms",
-- 
cgit v0.10.2


From 236a3bbd5cb51edbf9550f5a7df885665d18a271 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 14 Aug 2013 08:49:27 -0600
Subject: perf tools: Sample after exit loses thread correlation

Occassionally events (e.g., context-switch, sched tracepoints) are losing
the conversion of sample data associated with a thread. For example:

$ perf record -e sched:sched_switch -c 1 -a -- sleep 5
$ perf script
<selected events shown>
    ls 30482 [000] 1379727.583037: sched:sched_switch: prev_comm=ls prev_pid=30482 ...
    ls 30482 [000] 1379727.586339: sched:sched_switch: prev_comm=ls prev_pid=30482 ...
:30482 30482 [000] 1379727.589462: sched:sched_switch: prev_comm=ls prev_pid=30482 ...

The last line lost the conversion from tid to comm. If you look at the events
(perf script -D) you see why - a SAMPLE event is generated after the EXIT:

0 1379727589449774 0x1540b0 [0x38]: PERF_RECORD_EXIT(30482:30482):(30482:30482)
0 1379727589462497 0x1540e8 [0x80]: PERF_RECORD_SAMPLE(IP, 1): 30482/30482: 0xffffffff816416f1 period: 1 addr: 0
... thread: :30482:30482

When perf processes the EXIT event the thread is moved to the dead_threads
list. When the SAMPLE event is processed no thread exists for the pid so a new
one is created by machine__findnew_thread.

This patch address the problem by delaying the move to the dead_threads list
until the tid is re-used (per Adrian's suggestion).

With this patch we get the previous example shows:

  ls 30482 [000] 1379727.583037: sched:sched_switch: prev_comm=ls prev_pid=30482 ...
  ls 30482 [000] 1379727.586339: sched:sched_switch: prev_comm=ls prev_pid=30482 ...
  ls 30482 [000] 1379727.589462: sched:sched_switch: prev_comm=ls prev_pid=30482 ...

and

  0 1379727589449774 0x1540b0 [0x38]: PERF_RECORD_EXIT(30482:30482):(30482:30482)
  0 1379727589462497 0x1540e8 [0x80]: PERF_RECORD_SAMPLE(IP, 1): 30482/30482: 0xffffffff816416f1 period: 1 addr: 0
  ... thread: ls:30482

v4: per Arnaldo's request add dead flag to thread struct and set when task exits

v3: re-do from a time based check to a delayed move to dead_threads list

v2: Rebased to latest perf/core branch. Changed time comparison to use
    a macro which explicitly shows the time basis

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1376491767-84171-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 4514e7e..574feb7 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1031,11 +1031,27 @@ out_problem:
 	return 0;
 }
 
+static void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+	machine->last_match = NULL;
+	rb_erase(&th->rb_node, &machine->threads);
+	/*
+	 * We may have references to this thread, for instance in some hist_entry
+	 * instances, so just move them to a separate list.
+	 */
+	list_add_tail(&th->node, &machine->dead_threads);
+}
+
 int machine__process_fork_event(struct machine *machine, union perf_event *event)
 {
-	struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
+	struct thread *thread = machine__find_thread(machine, event->fork.tid);
 	struct thread *parent = machine__findnew_thread(machine, event->fork.ptid);
 
+	/* if a thread currently exists for the thread id remove it */
+	if (thread != NULL)
+		machine__remove_thread(machine, thread);
+
+	thread = machine__findnew_thread(machine, event->fork.tid);
 	if (dump_trace)
 		perf_event__fprintf_task(event, stdout);
 
@@ -1048,18 +1064,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 	return 0;
 }
 
-static void machine__remove_thread(struct machine *machine, struct thread *th)
-{
-	machine->last_match = NULL;
-	rb_erase(&th->rb_node, &machine->threads);
-	/*
-	 * We may have references to this thread, for instance in some hist_entry
-	 * instances, so just move them to a separate list.
-	 */
-	list_add_tail(&th->node, &machine->dead_threads);
-}
-
-int machine__process_exit_event(struct machine *machine, union perf_event *event)
+int machine__process_exit_event(struct machine *machine __maybe_unused,
+				union perf_event *event)
 {
 	struct thread *thread = machine__find_thread(machine, event->fork.tid);
 
@@ -1067,7 +1073,7 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
 		perf_event__fprintf_task(event, stdout);
 
 	if (thread != NULL)
-		machine__remove_thread(machine, thread);
+		thread__exited(thread);
 
 	return 0;
 }
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 13c62c9..32d0601 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -16,6 +16,7 @@ struct thread {
 	pid_t			ppid;
 	char			shortname[3];
 	bool			comm_set;
+	bool			dead; /* if set thread has exited */
 	char			*comm;
 	int			comm_len;
 
@@ -26,6 +27,10 @@ struct machine;
 
 struct thread *thread__new(pid_t tid);
 void thread__delete(struct thread *self);
+static inline void thread__exited(struct thread *thread)
+{
+	thread->dead = true;
+}
 
 int thread__set_comm(struct thread *self, const char *comm);
 int thread__comm_len(struct thread *self);
-- 
cgit v0.10.2


From 8c9dc52850d686f614aa44c59eee682d58a6656c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Aug 2013 16:15:06 -0300
Subject: perf tools: Don't install scripting files files when disabled

No need to install perl or python files when the respective
NO_LIBP{YTHON,ERL} define is set.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-c69d4jz08gb1zm2vpervva2q@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index e0d3d9f..7d30a7d 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -766,17 +766,21 @@ check: $(OUTPUT)common-cmds.h
 install-bin: all
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
 	$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
+	$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBPERL
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
-	$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
 	$(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
 	$(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+endif
+ifndef NO_LIBPYTHON
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
 	$(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
 	$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
 	$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
+endif
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'
 	$(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
 	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'
-- 
cgit v0.10.2


From b059efdf52a27819b78aa30f171f1e8e439152b6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 21 Aug 2013 12:56:21 -0300
Subject: perf trace: Support ! in -e expressions

So that we can ask for all but a set of syscalls to be traced.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-9j6hvap23qanyl96wx4mrj9k@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index cb3371e..4754f11 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -30,6 +30,8 @@ OPTIONS
 -e::
 --expr::
 	List of events to show, currently only syscall names.
+	Prefixing with ! shows all syscalls but the ones specified.  You may
+	need to escape it.
 
 -o::
 --output=::
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 9891d8c..6ab7a7a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -114,8 +114,9 @@ struct trace {
 	struct machine		host;
 	u64			base_time;
 	FILE			*output;
-	struct strlist		*ev_qualifier;
 	unsigned long		nr_events;
+	struct strlist		*ev_qualifier;
+	bool			not_ev_qualifier;
 	bool			sched;
 	bool			multiple_threads;
 	double			duration_filter;
@@ -234,13 +235,17 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 	sc = trace->syscalls.table + id;
 	sc->name = name;
 
-	if (trace->ev_qualifier && !strlist__find(trace->ev_qualifier, name)) {
-		sc->filtered = true;
-		/*
- 		 * No need to do read tracepoint information since this will be
- 		 * filtered out.
- 		 */
-		return 0;
+	if (trace->ev_qualifier) {
+		bool in = strlist__find(trace->ev_qualifier, name) != NULL;
+
+		if (!(in ^ trace->not_ev_qualifier)) {
+			sc->filtered = true;
+			/*
+			 * No need to do read tracepoint information since this will be
+			 * filtered out.
+			 */
+			return 0;
+		}
 	}
 
 	sc->fmt  = syscall_fmt__find(sc->name);
@@ -725,7 +730,12 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	}
 
 	if (ev_qualifier_str != NULL) {
-		trace.ev_qualifier = strlist__new(true, ev_qualifier_str);
+		const char *s = ev_qualifier_str;
+
+		trace.not_ev_qualifier = *s == '!';
+		if (trace.not_ev_qualifier)
+			++s;
+		trace.ev_qualifier = strlist__new(true, s);
 		if (trace.ev_qualifier == NULL) {
 			fputs("Not enough memory to parse event qualifier",
 			      trace.output);
-- 
cgit v0.10.2


From 7c304ee0fc66b4c21282e1cce32631c263f8c481 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 22 Aug 2013 16:49:54 -0300
Subject: perf trace: Add --verbose option

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-ain6q4u8g3bpnh18yhw24v2x@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 4754f11..fe19811 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -49,6 +49,10 @@ OPTIONS
 --uid=::
         Record events in threads owned by uid. Name or number.
 
+-v::
+--verbose=::
+        Verbosity level.
+
 -i::
 --no-inherit::
 	Child tasks do not inherit counters.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 6ab7a7a..c907e7e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1,6 +1,7 @@
 #include <traceevent/event-parse.h>
 #include "builtin.h"
 #include "util/color.h"
+#include "util/debug.h"
 #include "util/evlist.h"
 #include "util/machine.h"
 #include "util/thread.h"
@@ -311,10 +312,12 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 	return &trace->syscalls.table[id];
 
 out_cant_read:
-	fprintf(trace->output, "Problems reading syscall %d", id);
-	if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
-		fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
-	fputs(" information", trace->output);
+	if (verbose) {
+		fprintf(trace->output, "Problems reading syscall %d", id);
+		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
+			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
+		fputs(" information\n", trace->output);
+	}
 	return NULL;
 }
 
@@ -714,6 +717,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "show only events with duration > N.M ms",
 		     trace__set_duration),
 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
+	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
 	OPT_END()
 	};
 	int err;
-- 
cgit v0.10.2


From adaa18bf5d9128c4a34f5350b1d46555a949ebc4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 22 Aug 2013 17:55:25 -0300
Subject: perf trace: Hide sys_exit messages about syscall id = -1

That was reproduced via ftrace as described in this cset comment log,
need to investigate further.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-n1i3m0vo6mgq3ddjj95sls2s@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c907e7e..c3caabb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -298,7 +298,22 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 	int id = perf_evsel__intval(evsel, sample, "id");
 
 	if (id < 0) {
-		fprintf(trace->output, "Invalid syscall %d id, skipping...\n", id);
+
+		/*
+		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
+		 * before that, leaving at a higher verbosity level till that is
+		 * explained. Reproduced with plain ftrace with:
+		 *
+		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
+		 * grep "NR -1 " /t/trace_pipe
+		 *
+		 * After generating some load on the machine.
+ 		 */
+		if (verbose > 1) {
+			static u64 n;
+			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
+				id, perf_evsel__name(evsel), ++n);
+		}
 		return NULL;
 	}
 
-- 
cgit v0.10.2


From 13d4ff3eb36474728be2acfa773b31ff39f3ea4d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 23 Aug 2013 18:14:48 -0300
Subject: perf trace: Introduce syscall arg formatters

Starting with one for printing pointers in hexadecimal, using the
information in the syscall tracepoint format.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-c4y4jy7qqkn8wsd8q6j1g7zh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c3caabb..86568ed 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -12,6 +12,11 @@
 #include <libaudit.h>
 #include <stdlib.h>
 
+static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long arg)
+{
+	return scnprintf(bf, size, "%#lx", arg);
+}
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
@@ -51,6 +56,7 @@ struct syscall {
 	const char	    *name;
 	bool		    filtered;
 	struct syscall_fmt  *fmt;
+	size_t		    (**arg_scnprintf)(char *bf, size_t size, unsigned long arg);
 };
 
 static size_t fprintf_duration(unsigned long t, FILE *fp)
@@ -207,6 +213,24 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 	return err;
 }
 
+static int syscall__set_arg_fmts(struct syscall *sc)
+{
+	struct format_field *field;
+	int idx = 0;
+
+	sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
+	if (sc->arg_scnprintf == NULL)
+		return -1;
+
+	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
+		if (field->flags & FIELD_IS_POINTER)
+			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
+		++idx;
+	}
+
+	return 0;
+}
+
 static int trace__read_syscall_info(struct trace *trace, int id)
 {
 	char tp_name[128];
@@ -259,7 +283,10 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 		sc->tp_format = event_format__new("syscalls", tp_name);
 	}
 
-	return sc->tp_format != NULL ? 0 : -1;
+	if (sc->tp_format == NULL)
+		return -1;
+
+	return syscall__set_arg_fmts(sc);
 }
 
 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
@@ -273,8 +300,14 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 
 		for (field = sc->tp_format->format.fields->next; field; field = field->next) {
 			printed += scnprintf(bf + printed, size - printed,
-					     "%s%s: %ld", printed ? ", " : "",
-					     field->name, args[i++]);
+					     "%s%s: ", printed ? ", " : "", field->name);
+
+			if (sc->arg_scnprintf && sc->arg_scnprintf[i])
+				printed += sc->arg_scnprintf[i](bf + printed, size - printed, args[i]);
+			else
+				printed += scnprintf(bf + printed, size - printed,
+						     "%ld", args[i]);
+                       ++i;
 		}
 	} else {
 		while (i < 6) {
-- 
cgit v0.10.2


From da3c9a448af7ab2beab62cfff42bdea9590d9bea Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 26 Aug 2013 11:28:34 -0300
Subject: perf trace: Simplify sys_exit return printing

Avoiding multiple sc->fmt != NULL tests.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-w28d1o3uslden0k57653kda7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 86568ed..9e23660 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -464,16 +464,19 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 		fprintf(trace->output, "]: %s()", sc->name);
 	}
 
-	if (ret < 0 && sc->fmt && sc->fmt->errmsg) {
+	if (sc->fmt == NULL) {
+signed_print:
+		fprintf(trace->output, ") = %d", ret);
+	} else if (ret < 0 && sc->fmt->errmsg) {
 		char bf[256];
 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
 			   *e = audit_errno_to_name(-ret);
 
 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
-	} else if (ret == 0 && sc->fmt && sc->fmt->timeout)
+	} else if (ret == 0 && sc->fmt->timeout)
 		fprintf(trace->output, ") = 0 Timeout");
 	else
-		fprintf(trace->output, ") = %d", ret);
+		goto signed_print;
 
 	fputc('\n', trace->output);
 out:
-- 
cgit v0.10.2


From 04b34729e4584adfcb2e8ea908ff9478b1563001 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 26 Aug 2013 11:36:30 -0300
Subject: perf trace: Allow printing syscall return values in hex

event_format->flags has a FIELD_IS_POINTER, but it is not set for
the sys_exit 'ret' field in syscalls like mmap, so we need a way to
ask for hex printing for pointer returns and keep things like 'read'
returns printing in decimal.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-lfuveegw4od1t08n7bsmonrm@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 9e23660..b2038fd 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -22,13 +22,18 @@ static struct syscall_fmt {
 	const char *alias;
 	bool	   errmsg;
 	bool	   timeout;
+	bool	   hexret;
 } syscall_fmts[] = {
 	{ .name	    = "access",	    .errmsg = true, },
 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
+	{ .name	    = "brk",	    .hexret = true, },
+	{ .name	    = "mmap",	    .hexret = true, },
 	{ .name	    = "connect",    .errmsg = true, },
 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
 	{ .name	    = "futex",	    .errmsg = true, },
+	{ .name	    = "mmap",	    .hexret = true, },
+	{ .name	    = "mremap",	    .hexret = true, },
 	{ .name	    = "open",	    .errmsg = true, },
 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
@@ -475,6 +480,8 @@ signed_print:
 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
 	} else if (ret == 0 && sc->fmt->timeout)
 		fprintf(trace->output, ") = 0 Timeout");
+	else if (sc->fmt->hexret)
+		fprintf(trace->output, ") = %#x", ret);
 	else
 		goto signed_print;
 
-- 
cgit v0.10.2


From e5959683adb6b15cc2aebfbed5aa1315e24a53db Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 26 Aug 2013 12:21:41 -0300
Subject: perf trace: Add aliases to remaining syscalls of the sys_enter_newfoo

Before:

  2392.918 ( 0.008 ms): 21581 lstat(arg0: 140734915488448, arg1: 140734915488240, arg2: 140734915488240, arg3: 3, arg4: 24426352, arg5: 98) = 0

After:

  7408.087 ( 0.013 ms): 21969 lstat(filename: 0x7fff44b4bf20, statbuf: 0x7fff44b4be50               ) = 0

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-8nxaole8mb7zyopk47tdellj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b2038fd..2d759f5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -32,16 +32,20 @@ static struct syscall_fmt {
 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
 	{ .name	    = "futex",	    .errmsg = true, },
+	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
 	{ .name	    = "mmap",	    .hexret = true, },
 	{ .name	    = "mremap",	    .hexret = true, },
 	{ .name	    = "open",	    .errmsg = true, },
 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
+	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
+	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64", },
 	{ .name	    = "read",	    .errmsg = true, },
 	{ .name	    = "recvfrom",   .errmsg = true, },
 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "socket",	    .errmsg = true, },
 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat", },
+	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
 };
 
 static int syscall_fmt__cmp(const void *name, const void *fmtp)
-- 
cgit v0.10.2


From beccb2b54a39ca5e49a7aa2912faa33617a45cc1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 26 Aug 2013 12:29:38 -0300
Subject: perf trace: Allow overiding the formatting of syscall fields

The mmap syscalls, for instance, don't have the FORMAT_IS_POINTER for
its pointer arguments, override it.

This also paves the way for more specialized argument beautifiers, like
for mmap's prot and flags arguments.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-mm864hvhrpt39muxmmbtjasz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 2d759f5..60ee811 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -17,24 +17,35 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long ar
 	return scnprintf(bf, size, "%#lx", arg);
 }
 
+#define SCA_HEX syscall_arg__scnprintf_hex
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
+	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg);
 	bool	   errmsg;
 	bool	   timeout;
 	bool	   hexret;
 } syscall_fmts[] = {
 	{ .name	    = "access",	    .errmsg = true, },
 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
-	{ .name	    = "brk",	    .hexret = true, },
+	{ .name	    = "brk",	    .hexret = true,
+	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
 	{ .name	    = "mmap",	    .hexret = true, },
 	{ .name	    = "connect",    .errmsg = true, },
 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
 	{ .name	    = "futex",	    .errmsg = true, },
+	{ .name	    = "ioctl",	    .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
-	{ .name	    = "mmap",	    .hexret = true, },
+	{ .name	    = "mmap",	    .hexret = true,
+	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
+	{ .name	    = "mprotect",   .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "mremap",	    .hexret = true, },
+	{ .name	    = "munmap",	    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "open",	    .errmsg = true, },
 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
@@ -232,7 +243,9 @@ static int syscall__set_arg_fmts(struct syscall *sc)
 		return -1;
 
 	for (field = sc->tp_format->format.fields->next; field; field = field->next) {
-		if (field->flags & FIELD_IS_POINTER)
+		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
+			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
+		else if (field->flags & FIELD_IS_POINTER)
 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
 		++idx;
 	}
-- 
cgit v0.10.2


From ae685380b9a1ffe5e1935852ec11f0aa38b1d77b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Aug 2013 17:44:42 -0300
Subject: perf trace: Add beautifier for mmap prot parm

[root@zoo ~]# perf trace -e mmap,mprotect sleep 1
     0.984 ( 0.015 ms): mmap(addr: 0, len: 4096, prot: READ|WRITE, flags: 34, fd: 4294967295, off: 0) = 0xd62ae000
     1.114 ( 0.016 ms): mmap(addr: 0, len: 125100, prot: READ, flags: 2, fd: 3, off: 0        ) = 0xd628f000
     1.252 ( 0.020 ms): mmap(addr: 0x33c1600000, len: 3896312, prot: EXEC|READ, flags: 2050, fd: 3, off: 0) = 0xc1600000
     1.282 ( 0.024 ms): mprotect(start: 0x33c17ad000, len: 2097152, prot: NONE                ) = 0
     1.315 ( 0.026 ms): mmap(addr: 0x33c19ad000, len: 24576, prot: READ|WRITE, flags: 2066, fd: 3, off: 1757184) = 0xc19ad000
     1.352 ( 0.017 ms): mmap(addr: 0x33c19b3000, len: 17400, prot: READ|WRITE, flags: 50, fd: 4294967295, off: 0) = 0xc19b3000
     1.415 ( 0.011 ms): mmap(addr: 0, len: 4096, prot: READ|WRITE, flags: 34, fd: 4294967295, off: 0) = 0xd628e000
     1.440 ( 0.011 ms): mmap(addr: 0, len: 8192, prot: READ|WRITE, flags: 34, fd: 4294967295, off: 0) = 0xd628c000
     1.569 ( 0.019 ms): mprotect(start: 0x606000, len: 4096, prot: READ                       ) = 0
     1.591 ( 0.017 ms): mprotect(start: 0x33c19ad000, len: 16384, prot: READ                  ) = 0
     1.616 ( 0.016 ms): mprotect(start: 0x33c1420000, len: 4096, prot: READ                   ) = 0
     2.105 ( 0.018 ms): mmap(addr: 0, len: 104789808, prot: READ, flags: 2, fd: 3, off: 0     ) = 0xcfe9c000
[root@zoo ~]#

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-q1ubhdd9wigxneam616ggdsn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 60ee811..81c20a6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -11,6 +11,7 @@
 
 #include <libaudit.h>
 #include <stdlib.h>
+#include <sys/mman.h>
 
 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long arg)
 {
@@ -19,6 +20,36 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long ar
 
 #define SCA_HEX syscall_arg__scnprintf_hex
 
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, unsigned long arg)
+{
+	int printed = 0, prot = arg;
+
+	if (prot == PROT_NONE)
+		return scnprintf(bf, size, "NONE");
+#define	P_MMAP_PROT(n) \
+	if (prot & PROT_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		prot &= ~PROT_##n; \
+	}
+
+	P_MMAP_PROT(EXEC);
+	P_MMAP_PROT(READ);
+	P_MMAP_PROT(WRITE);
+#ifdef PROT_SEM
+	P_MMAP_PROT(SEM);
+#endif
+	P_MMAP_PROT(GROWSDOWN);
+	P_MMAP_PROT(GROWSUP);
+#undef P_MMAP_PROT
+
+	if (prot)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
+
+	return printed;
+}
+
+#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
@@ -40,10 +71,14 @@ static struct syscall_fmt {
 	  .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
 	{ .name	    = "mmap",	    .hexret = true,
-	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
+	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
+			     [2] = SCA_MMAP_PROT, /* prot */ }, },
 	{ .name	    = "mprotect",   .errmsg = true,
-	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
-	{ .name	    = "mremap",	    .hexret = true, },
+	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
+			     [2] = SCA_MMAP_PROT, /* prot */ }, },
+	{ .name	    = "mremap",	    .hexret = true,
+	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
+			     [4] = SCA_HEX, /* new_addr */ }, },
 	{ .name	    = "munmap",	    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "open",	    .errmsg = true, },
-- 
cgit v0.10.2


From 941557e0e4e90bbf970f4241c26055a4683e9c0d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 23 Aug 2013 10:48:33 -0300
Subject: perf trace: Add beautifier for mmap flags parm

[root@zoo ~]# perf trace -e mmap,mprotect sleep 1
     0.992 ( 0.015 ms): mmap(addr: 0, len: 4096, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS, fd: 4294967295, off: 0) = 0xa60be000
     1.108 ( 0.012 ms): mmap(addr: 0, len: 125100, prot: READ, flags: PRIVATE, fd: 3, off: 0  ) = 0xa609f000
     1.209 ( 0.014 ms): mmap(addr: 0x33c1600000, len: 3896312, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3, off: 0) = 0xc1600000
     1.232 ( 0.018 ms): mprotect(start: 0x33c17ad000, len: 2097152, prot: NONE                ) = 0
     1.255 ( 0.018 ms): mmap(addr: 0x33c19ad000, len: 24576, prot: READ|WRITE, flags: PRIVATE|DENYWRITE|FIXED, fd: 3, off: 1757184) = 0xc19ad000
     1.281 ( 0.011 ms): mmap(addr: 0x33c19b3000, len: 17400, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS|FIXED, fd: 4294967295, off: 0) = 0xc19b3000
     1.328 ( 0.008 ms): mmap(addr: 0, len: 4096, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS, fd: 4294967295, off: 0) = 0xa609e000
     1.346 ( 0.008 ms): mmap(addr: 0, len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS, fd: 4294967295, off: 0) = 0xa609c000
     1.443 ( 0.013 ms): mprotect(start: 0x606000, len: 4096, prot: READ                       ) = 0
     1.459 ( 0.011 ms): mprotect(start: 0x33c19ad000, len: 16384, prot: READ                  ) = 0
     1.477 ( 0.011 ms): mprotect(start: 0x33c1420000, len: 4096, prot: READ                   ) = 0
     1.855 ( 0.013 ms): mmap(addr: 0, len: 104789808, prot: READ, flags: PRIVATE, fd: 3, off: 0) = 0x9fcac000
[root@zoo ~]#

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-q1ubhdd9wigxneam616ggdsn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 81c20a6..034152c 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -50,6 +50,44 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, unsigned l
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
+static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, unsigned long arg)
+{
+	int printed = 0, flags = arg;
+
+#define	P_MMAP_FLAG(n) \
+	if (flags & MAP_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~MAP_##n; \
+	}
+
+	P_MMAP_FLAG(SHARED);
+	P_MMAP_FLAG(PRIVATE);
+	P_MMAP_FLAG(32BIT);
+	P_MMAP_FLAG(ANONYMOUS);
+	P_MMAP_FLAG(DENYWRITE);
+	P_MMAP_FLAG(EXECUTABLE);
+	P_MMAP_FLAG(FILE);
+	P_MMAP_FLAG(FIXED);
+	P_MMAP_FLAG(GROWSDOWN);
+	P_MMAP_FLAG(HUGETLB);
+	P_MMAP_FLAG(LOCKED);
+	P_MMAP_FLAG(NONBLOCK);
+	P_MMAP_FLAG(NORESERVE);
+	P_MMAP_FLAG(POPULATE);
+	P_MMAP_FLAG(STACK);
+#ifdef MAP_UNINITIALIZED
+	P_MMAP_FLAG(UNINITIALIZED);
+#endif
+#undef P_MMAP_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
@@ -72,7 +110,8 @@ static struct syscall_fmt {
 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
 	{ .name	    = "mmap",	    .hexret = true,
 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
-			     [2] = SCA_MMAP_PROT, /* prot */ }, },
+			     [2] = SCA_MMAP_PROT, /* prot */
+			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
 	{ .name	    = "mprotect",   .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
-- 
cgit v0.10.2


From 9e9716d1b929ddb6955a5954fe1d9a74b233df0d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 23 Aug 2013 10:06:41 -0300
Subject: perf trace: Add beautifier for madvise behaviour/advice parm

  [root@zoo ~]# perf trace -e madvise -a
    35299.631 ( 0.019 ms): 19553 madvise(start: 0x7f5b101d4000, len_in: 4063232, behavior: DONTNEED    ) = 0

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-a3twa1ia5sxt0hsxqika4efq@git.kernel.org
[ ifdef DO(NT)?DUMP to fix build on f16, from David Ahern ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 034152c..b72afc7 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -88,6 +88,43 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, unsigned
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 
+static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, unsigned long arg)
+{
+	int behavior = arg;
+
+	switch (behavior) {
+#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
+	P_MADV_BHV(NORMAL);
+	P_MADV_BHV(RANDOM);
+	P_MADV_BHV(SEQUENTIAL);
+	P_MADV_BHV(WILLNEED);
+	P_MADV_BHV(DONTNEED);
+	P_MADV_BHV(REMOVE);
+	P_MADV_BHV(DONTFORK);
+	P_MADV_BHV(DOFORK);
+	P_MADV_BHV(HWPOISON);
+#ifdef MADV_SOFT_OFFLINE
+	P_MADV_BHV(SOFT_OFFLINE);
+#endif
+	P_MADV_BHV(MERGEABLE);
+	P_MADV_BHV(UNMERGEABLE);
+	P_MADV_BHV(HUGEPAGE);
+	P_MADV_BHV(NOHUGEPAGE);
+#ifdef MADV_DONTDUMP
+	P_MADV_BHV(DONTDUMP);
+#endif
+#ifdef MADV_DODUMP
+	P_MADV_BHV(DODUMP);
+#endif
+#undef P_MADV_PHV
+	default: break;
+	}
+
+	return scnprintf(bf, size, "%#x", behavior);
+}
+
+#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
@@ -108,6 +145,9 @@ static struct syscall_fmt {
 	{ .name	    = "ioctl",	    .errmsg = true,
 	  .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
+	{ .name     = "madvise",    .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
+			     [2] = SCA_MADV_BHV, /* behavior */ }, },
 	{ .name	    = "mmap",	    .hexret = true,
 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
 			     [2] = SCA_MMAP_PROT, /* prot */
-- 
cgit v0.10.2


From 99d725fc65563a85d4290342c81b00a673c6be66 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 26 Aug 2013 16:00:19 +0300
Subject: perf tools: Add pid to struct thread

Record pid on struct thread.  The member is named 'pid_' to avoid
confusion with the 'tid' member which was previously named 'pid'.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377522030-27870-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 574feb7..59486c1 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -253,7 +253,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
 	return;
 }
 
-static struct thread *__machine__findnew_thread(struct machine *machine, pid_t tid,
+static struct thread *__machine__findnew_thread(struct machine *machine,
+						pid_t pid, pid_t tid,
 						bool create)
 {
 	struct rb_node **p = &machine->threads.rb_node;
@@ -265,8 +266,11 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t
 	 * so most of the time we dont have to look up
 	 * the full rbtree:
 	 */
-	if (machine->last_match && machine->last_match->tid == tid)
+	if (machine->last_match && machine->last_match->tid == tid) {
+		if (pid && pid != machine->last_match->pid_)
+			machine->last_match->pid_ = pid;
 		return machine->last_match;
+	}
 
 	while (*p != NULL) {
 		parent = *p;
@@ -274,6 +278,8 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t
 
 		if (th->tid == tid) {
 			machine->last_match = th;
+			if (pid && pid != th->pid_)
+				th->pid_ = pid;
 			return th;
 		}
 
@@ -286,7 +292,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t
 	if (!create)
 		return NULL;
 
-	th = thread__new(tid);
+	th = thread__new(pid, tid);
 	if (th != NULL) {
 		rb_link_node(&th->rb_node, parent, p);
 		rb_insert_color(&th->rb_node, &machine->threads);
@@ -298,12 +304,12 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t
 
 struct thread *machine__findnew_thread(struct machine *machine, pid_t tid)
 {
-	return __machine__findnew_thread(machine, tid, true);
+	return __machine__findnew_thread(machine, 0, tid, true);
 }
 
 struct thread *machine__find_thread(struct machine *machine, pid_t tid)
 {
-	return __machine__findnew_thread(machine, tid, false);
+	return __machine__findnew_thread(machine, 0, tid, false);
 }
 
 int machine__process_comm_event(struct machine *machine, union perf_event *event)
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 6feeb88..e3d4a55 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,12 +7,13 @@
 #include "util.h"
 #include "debug.h"
 
-struct thread *thread__new(pid_t tid)
+struct thread *thread__new(pid_t pid, pid_t tid)
 {
 	struct thread *self = zalloc(sizeof(*self));
 
 	if (self != NULL) {
 		map_groups__init(&self->mg);
+		self->pid_ = pid;
 		self->tid = tid;
 		self->ppid = -1;
 		self->comm = malloc(32);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 32d0601..4ebbb40 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -12,6 +12,7 @@ struct thread {
 		struct list_head node;
 	};
 	struct map_groups	mg;
+	pid_t			pid_; /* Not all tools update this */
 	pid_t			tid;
 	pid_t			ppid;
 	char			shortname[3];
@@ -25,7 +26,7 @@ struct thread {
 
 struct machine;
 
-struct thread *thread__new(pid_t tid);
+struct thread *thread__new(pid_t pid, pid_t tid);
 void thread__delete(struct thread *self);
 static inline void thread__exited(struct thread *thread)
 {
-- 
cgit v0.10.2


From 456da532a5fb04f8a79622df7dd49e84e04f31a8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 26 Aug 2013 13:04:04 -0300
Subject: tools lib traceevent: Fixup jobserver setup

Getting rid of:

make[1]: Entering directory `/home/git/linux/tools/lib/traceevent'
make[2]: warning: jobserver unavailable: using -j1.  Add `+' to parent
make rule.
make[1]: Entering directory `/home/git/linux/tools/lib/lk

When running:

 make -j4 O=/tmp/build/perf -C tools/perf install-bin

Acked-by: Borislav Petkov <bp@suse.de>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-pvr7uppe329gw9onchgdu0m6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 0794acc..ca6cb77 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -60,7 +60,7 @@ ifeq ($(BUILD_SRC),)
 ifneq ($(BUILD_OUTPUT),)
 
 define build_output
-	$(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) 	\
+	$(if $(VERBOSE:1=),@)+$(MAKE) -C $(BUILD_OUTPUT) 	\
 	BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1
 endef
 
-- 
cgit v0.10.2


From 314add6b1f045b59ca39683bd0cbc5310cd203f2 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:03 +0300
Subject: perf tools: change machine__findnew_thread() to set thread pid

Add a new parameter for 'pid' to machine__findnew_thread().
Change callers to pass 'pid' when it is known.

Note that callers sometimes want to find the main thread
which has the memory maps.  The main thread has tid == pid
so the usage in that case is:

	machine__findnew_thread(machine, pid, pid)

whereas the usage to find the specific thread is:

	machine__findnew_thread(machine, pid, tid)

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 1d8de2e..0d4ae1d 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -198,7 +198,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 
 	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	thread = machine__findnew_thread(machine, event->ip.pid);
+	thread = machine__findnew_thread(machine, event->ip.pid, event->ip.pid);
 	if (thread == NULL) {
 		pr_err("problem processing %d event, skipping it.\n",
 		       event->header.type);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index b49f5c5..c324778 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -305,7 +305,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				struct perf_evsel *evsel,
 				struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
+	struct thread *thread = machine__findnew_thread(machine, event->ip.pid,
+							event->ip.pid);
 
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 6cd4de5..47b3540 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -815,7 +815,7 @@ static int process_sample_event(struct perf_tool *tool,
 	if (skip_sample(kvm, sample))
 		return 0;
 
-	thread = machine__findnew_thread(machine, sample->tid);
+	thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			event->header.type);
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 76543a4..ee33ba2 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -805,7 +805,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				struct perf_evsel *evsel,
 				struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, sample->tid);
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->tid);
 
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index f809cc7..d8c51b2 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -724,8 +724,10 @@ static int replay_fork_event(struct perf_sched *sched,
 {
 	struct thread *child, *parent;
 
-	child = machine__findnew_thread(machine, event->fork.tid);
-	parent = machine__findnew_thread(machine, event->fork.ptid);
+	child = machine__findnew_thread(machine, event->fork.pid,
+					event->fork.tid);
+	parent = machine__findnew_thread(machine, event->fork.ppid,
+					 event->fork.ptid);
 
 	if (child == NULL || parent == NULL) {
 		pr_debug("thread does not exist on fork event: child %p, parent %p\n",
@@ -934,8 +936,8 @@ static int latency_switch_event(struct perf_sched *sched,
 		return -1;
 	}
 
-	sched_out = machine__findnew_thread(machine, prev_pid);
-	sched_in = machine__findnew_thread(machine, next_pid);
+	sched_out = machine__findnew_thread(machine, 0, prev_pid);
+	sched_in = machine__findnew_thread(machine, 0, next_pid);
 
 	out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
 	if (!out_events) {
@@ -978,7 +980,7 @@ static int latency_runtime_event(struct perf_sched *sched,
 {
 	const u32 pid	   = perf_evsel__intval(evsel, sample, "pid");
 	const u64 runtime  = perf_evsel__intval(evsel, sample, "runtime");
-	struct thread *thread = machine__findnew_thread(machine, pid);
+	struct thread *thread = machine__findnew_thread(machine, 0, pid);
 	struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
 	u64 timestamp = sample->time;
 	int cpu = sample->cpu;
@@ -1016,7 +1018,7 @@ static int latency_wakeup_event(struct perf_sched *sched,
 	if (!success)
 		return 0;
 
-	wakee = machine__findnew_thread(machine, pid);
+	wakee = machine__findnew_thread(machine, 0, pid);
 	atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
 	if (!atoms) {
 		if (thread_atoms_insert(sched, wakee))
@@ -1070,7 +1072,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 	if (sched->profile_cpu == -1)
 		return 0;
 
-	migrant = machine__findnew_thread(machine, pid);
+	migrant = machine__findnew_thread(machine, 0, pid);
 	atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
 	if (!atoms) {
 		if (thread_atoms_insert(sched, migrant))
@@ -1289,8 +1291,8 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 		return -1;
 	}
 
-	sched_out = machine__findnew_thread(machine, prev_pid);
-	sched_in = machine__findnew_thread(machine, next_pid);
+	sched_out = machine__findnew_thread(machine, 0, prev_pid);
+	sched_in = machine__findnew_thread(machine, 0, next_pid);
 
 	sched->curr_thread[this_cpu] = sched_in;
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 2ad9d5b..d82712f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -501,7 +501,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				struct machine *machine)
 {
 	struct addr_location al;
-	struct thread *thread = machine__findnew_thread(machine, event->ip.tid);
+	struct thread *thread = machine__findnew_thread(machine, event->ip.pid,
+							event->ip.tid);
 
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b72afc7..88387c5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -521,7 +521,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	if (sc->filtered)
 		return 0;
 
-	thread = machine__findnew_thread(&trace->host, sample->tid);
+	thread = machine__findnew_thread(&trace->host, sample->pid,
+					 sample->tid);
 	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
 		return -1;
@@ -572,7 +573,8 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	if (sc->filtered)
 		return 0;
 
-	thread = machine__findnew_thread(&trace->host, sample->tid);
+	thread = machine__findnew_thread(&trace->host, sample->pid,
+					 sample->tid);
 	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
 		return -1;
@@ -628,7 +630,9 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
 {
         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
-	struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
+	struct thread *thread = machine__findnew_thread(&trace->host,
+							sample->pid,
+							sample->tid);
 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
 
 	if (ttrace == NULL)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index df9afd9..6fb781d 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -257,7 +257,7 @@ static int process_sample_event(struct machine *machine,
 		return -1;
 	}
 
-	thread = machine__findnew_thread(machine, sample.pid);
+	thread = machine__findnew_thread(machine, sample.pid, sample.pid);
 	if (!thread) {
 		pr_debug("machine__findnew_thread failed\n");
 		return -1;
@@ -446,7 +446,7 @@ static int do_test_code_reading(bool try_kcore)
 		goto out_err;
 	}
 
-	thread = machine__findnew_thread(machine, pid);
+	thread = machine__findnew_thread(machine, pid, pid);
 	if (!thread) {
 		pr_debug("machine__findnew_thread failed\n");
 		goto out_err;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 50bfb01..87f9f72 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -88,7 +88,8 @@ static struct machine *setup_fake_machine(struct machines *machines)
 	for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
 		struct thread *thread;
 
-		thread = machine__findnew_thread(machine, fake_threads[i].pid);
+		thread = machine__findnew_thread(machine, fake_threads[i].pid,
+						 fake_threads[i].pid);
 		if (thread == NULL)
 			goto out;
 
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 5295625..0f9d27a 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -24,7 +24,8 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 {
 	struct addr_location al;
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-	struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
+	struct thread *thread = machine__findnew_thread(machine, event->ip.pid,
+							event->ip.pid);
 
 	if (thread == NULL) {
 		pr_err("problem processing %d event, skipping it.\n",
@@ -47,7 +48,9 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
 				       __maybe_unused,
 				       struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
+	struct thread *thread = machine__findnew_thread(machine,
+							event->fork.pid,
+							event->fork.tid);
 
 	dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
 		    event->fork.ppid, event->fork.ptid);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 49713ae..61cecf9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -686,7 +686,8 @@ int perf_event__preprocess_sample(const union perf_event *event,
 				  struct perf_sample *sample)
 {
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-	struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
+	struct thread *thread = machine__findnew_thread(machine, event->ip.pid,
+							event->ip.pid);
 
 	if (thread == NULL)
 		return -1;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 59486c1..1dca61f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -32,7 +32,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 		return -ENOMEM;
 
 	if (pid != HOST_KERNEL_ID) {
-		struct thread *thread = machine__findnew_thread(machine, pid);
+		struct thread *thread = machine__findnew_thread(machine, 0,
+								pid);
 		char comm[64];
 
 		if (thread == NULL)
@@ -302,9 +303,10 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
 	return th;
 }
 
-struct thread *machine__findnew_thread(struct machine *machine, pid_t tid)
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
+				       pid_t tid)
 {
-	return __machine__findnew_thread(machine, 0, tid, true);
+	return __machine__findnew_thread(machine, pid, tid, true);
 }
 
 struct thread *machine__find_thread(struct machine *machine, pid_t tid)
@@ -314,7 +316,9 @@ struct thread *machine__find_thread(struct machine *machine, pid_t tid)
 
 int machine__process_comm_event(struct machine *machine, union perf_event *event)
 {
-	struct thread *thread = machine__findnew_thread(machine, event->comm.tid);
+	struct thread *thread = machine__findnew_thread(machine,
+							event->comm.pid,
+							event->comm.tid);
 
 	if (dump_trace)
 		perf_event__fprintf_comm(event, stdout);
@@ -1012,7 +1016,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 		return 0;
 	}
 
-	thread = machine__findnew_thread(machine, event->mmap.pid);
+	thread = machine__findnew_thread(machine, event->mmap.pid,
+					 event->mmap.pid);
 	if (thread == NULL)
 		goto out_problem;
 
@@ -1051,13 +1056,16 @@ static void machine__remove_thread(struct machine *machine, struct thread *th)
 int machine__process_fork_event(struct machine *machine, union perf_event *event)
 {
 	struct thread *thread = machine__find_thread(machine, event->fork.tid);
-	struct thread *parent = machine__findnew_thread(machine, event->fork.ptid);
+	struct thread *parent = machine__findnew_thread(machine,
+							event->fork.ppid,
+							event->fork.ptid);
 
 	/* if a thread currently exists for the thread id remove it */
 	if (thread != NULL)
 		machine__remove_thread(machine, thread);
 
-	thread = machine__findnew_thread(machine, event->fork.tid);
+	thread = machine__findnew_thread(machine, event->fork.pid,
+					 event->fork.tid);
 	if (dump_trace)
 		perf_event__fprintf_task(event, stdout);
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 603ffba..0df925b 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -106,7 +106,8 @@ static inline bool machine__is_host(struct machine *machine)
 	return machine ? machine->pid == HOST_KERNEL_ID : false;
 }
 
-struct thread *machine__findnew_thread(struct machine *machine, pid_t tid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
+				       pid_t tid);
 
 size_t machine__fprintf(struct machine *machine, FILE *fp);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index de16a77..57b6f38 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1099,7 +1099,7 @@ void perf_event_header__bswap(struct perf_event_header *self)
 
 struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
 {
-	return machine__findnew_thread(&session->machines.host, pid);
+	return machine__findnew_thread(&session->machines.host, 0, pid);
 }
 
 static struct thread *perf_session__register_idle_thread(struct perf_session *self)
-- 
cgit v0.10.2


From 03b6ea9b91e0914caa847a1ade759af549555298 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:04 +0300
Subject: perf evsel: Tidy up sample parsing overflow checking

The size of data retrieved from a sample event must be validated to
ensure it does not go past the end of the event.  That was being done
sporadically and without considering integer overflows.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 47cbe1e..9a5fb23 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1131,24 +1131,30 @@ static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
 	return 0;
 }
 
-static bool sample_overlap(const union perf_event *event,
-			   const void *offset, u64 size)
+static inline bool overflow(const void *endp, u16 max_size, const void *offset,
+			    u64 size)
 {
-	const void *base = event;
+	return size > max_size || offset + size > endp;
+}
 
-	if (offset + size > base + event->header.size)
-		return true;
+#define OVERFLOW_CHECK(offset, size, max_size)				\
+	do {								\
+		if (overflow(endp, (max_size), (offset), (size)))	\
+			return -EFAULT;					\
+	} while (0)
 
-	return false;
-}
+#define OVERFLOW_CHECK_u64(offset) \
+	OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
 
 int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 			     struct perf_sample *data)
 {
 	u64 type = evsel->attr.sample_type;
-	u64 regs_user = evsel->attr.sample_regs_user;
 	bool swapped = evsel->needs_swap;
 	const u64 *array;
+	u16 max_size = event->header.size;
+	const void *endp = (void *)event + max_size;
+	u64 sz;
 
 	/*
 	 * used for cross-endian analysis. See git commit 65014ab3
@@ -1170,6 +1176,11 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 
 	array = event->sample.array;
 
+	/*
+	 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
+	 * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
+	 * check the format does not go past the end of the event.
+	 */
 	if (evsel->sample_size + sizeof(event->header) > event->header.size)
 		return -EFAULT;
 
@@ -1235,6 +1246,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	if (type & PERF_SAMPLE_READ) {
 		u64 read_format = evsel->attr.read_format;
 
+		OVERFLOW_CHECK_u64(array);
 		if (read_format & PERF_FORMAT_GROUP)
 			data->read.group.nr = *array;
 		else
@@ -1243,41 +1255,51 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		array++;
 
 		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			OVERFLOW_CHECK_u64(array);
 			data->read.time_enabled = *array;
 			array++;
 		}
 
 		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+			OVERFLOW_CHECK_u64(array);
 			data->read.time_running = *array;
 			array++;
 		}
 
 		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
 		if (read_format & PERF_FORMAT_GROUP) {
-			data->read.group.values = (struct sample_read_value *) array;
-			array = (void *) array + data->read.group.nr *
-				sizeof(struct sample_read_value);
+			const u64 max_group_nr = UINT64_MAX /
+					sizeof(struct sample_read_value);
+
+			if (data->read.group.nr > max_group_nr)
+				return -EFAULT;
+			sz = data->read.group.nr *
+			     sizeof(struct sample_read_value);
+			OVERFLOW_CHECK(array, sz, max_size);
+			data->read.group.values =
+					(struct sample_read_value *)array;
+			array = (void *)array + sz;
 		} else {
+			OVERFLOW_CHECK_u64(array);
 			data->read.one.id = *array;
 			array++;
 		}
 	}
 
 	if (type & PERF_SAMPLE_CALLCHAIN) {
-		if (sample_overlap(event, array, sizeof(data->callchain->nr)))
-			return -EFAULT;
-
-		data->callchain = (struct ip_callchain *)array;
+		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
 
-		if (sample_overlap(event, array, data->callchain->nr))
+		OVERFLOW_CHECK_u64(array);
+		data->callchain = (struct ip_callchain *)array++;
+		if (data->callchain->nr > max_callchain_nr)
 			return -EFAULT;
-
-		array += 1 + data->callchain->nr;
+		sz = data->callchain->nr * sizeof(u64);
+		OVERFLOW_CHECK(array, sz, max_size);
+		array = (void *)array + sz;
 	}
 
 	if (type & PERF_SAMPLE_RAW) {
-		const u64 *pdata;
-
+		OVERFLOW_CHECK_u64(array);
 		u.val64 = *array;
 		if (WARN_ONCE(swapped,
 			      "Endianness of raw data not corrected!\n")) {
@@ -1286,65 +1308,73 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 			u.val32[0] = bswap_32(u.val32[0]);
 			u.val32[1] = bswap_32(u.val32[1]);
 		}
-
-		if (sample_overlap(event, array, sizeof(u32)))
-			return -EFAULT;
-
 		data->raw_size = u.val32[0];
-		pdata = (void *) array + sizeof(u32);
-
-		if (sample_overlap(event, pdata, data->raw_size))
-			return -EFAULT;
+		array = (void *)array + sizeof(u32);
 
-		data->raw_data = (void *) pdata;
-
-		array = (void *)array + data->raw_size + sizeof(u32);
+		OVERFLOW_CHECK(array, data->raw_size, max_size);
+		data->raw_data = (void *)array;
+		array = (void *)array + data->raw_size;
 	}
 
 	if (type & PERF_SAMPLE_BRANCH_STACK) {
-		u64 sz;
+		const u64 max_branch_nr = UINT64_MAX /
+					  sizeof(struct branch_entry);
 
-		data->branch_stack = (struct branch_stack *)array;
-		array++; /* nr */
+		OVERFLOW_CHECK_u64(array);
+		data->branch_stack = (struct branch_stack *)array++;
 
+		if (data->branch_stack->nr > max_branch_nr)
+			return -EFAULT;
 		sz = data->branch_stack->nr * sizeof(struct branch_entry);
-		sz /= sizeof(u64);
-		array += sz;
+		OVERFLOW_CHECK(array, sz, max_size);
+		array = (void *)array + sz;
 	}
 
 	if (type & PERF_SAMPLE_REGS_USER) {
+		u64 avail;
+
 		/* First u64 tells us if we have any regs in sample. */
-		u64 avail = *array++;
+		OVERFLOW_CHECK_u64(array);
+		avail = *array++;
 
 		if (avail) {
+			u64 regs_user = evsel->attr.sample_regs_user;
+
+			sz = hweight_long(regs_user) * sizeof(u64);
+			OVERFLOW_CHECK(array, sz, max_size);
 			data->user_regs.regs = (u64 *)array;
-			array += hweight_long(regs_user);
+			array = (void *)array + sz;
 		}
 	}
 
 	if (type & PERF_SAMPLE_STACK_USER) {
-		u64 size = *array++;
+		OVERFLOW_CHECK_u64(array);
+		sz = *array++;
 
 		data->user_stack.offset = ((char *)(array - 1)
 					  - (char *) event);
 
-		if (!size) {
+		if (!sz) {
 			data->user_stack.size = 0;
 		} else {
+			OVERFLOW_CHECK(array, sz, max_size);
 			data->user_stack.data = (char *)array;
-			array += size / sizeof(*array);
+			array = (void *)array + sz;
+			OVERFLOW_CHECK_u64(array);
 			data->user_stack.size = *array++;
 		}
 	}
 
 	data->weight = 0;
 	if (type & PERF_SAMPLE_WEIGHT) {
+		OVERFLOW_CHECK_u64(array);
 		data->weight = *array;
 		array++;
 	}
 
 	data->data_src = PERF_MEM_DATA_SRC_NONE;
 	if (type & PERF_SAMPLE_DATA_SRC) {
+		OVERFLOW_CHECK_u64(array);
 		data->data_src = *array;
 		array++;
 	}
-- 
cgit v0.10.2


From 07940293ba7a43070cdebda952b0e6025d80a383 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:05 +0300
Subject: perf callchain: Remove unnecessary validation

Now that the sample parsing correctly checks data sizes there is no
reason for it to be done again for callchains.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 4fee33b..482f680 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -21,14 +21,6 @@
 
 __thread struct callchain_cursor callchain_cursor;
 
-bool ip_callchain__valid(struct ip_callchain *chain,
-			 const union perf_event *event)
-{
-	unsigned int chain_size = event->header.size;
-	chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
-	return chain->nr * sizeof(u64) <= chain_size;
-}
-
 #define chain_for_each_child(child, parent)	\
 	list_for_each_entry(child, &parent->children, siblings)
 
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 812d5a0..2b585bc 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -109,11 +109,6 @@ int callchain_append(struct callchain_root *root,
 int callchain_merge(struct callchain_cursor *cursor,
 		    struct callchain_root *dst, struct callchain_root *src);
 
-struct ip_callchain;
-union perf_event;
-
-bool ip_callchain__valid(struct ip_callchain *chain,
-			 const union perf_event *event);
 /*
  * Initialize a cursor before adding entries inside, but keep
  * the previously allocated entries as a cache.
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 57b6f38..07590c3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -997,22 +997,6 @@ static int perf_session_deliver_event(struct perf_session *session,
 	}
 }
 
-static int perf_session__preprocess_sample(struct perf_session *session,
-					   union perf_event *event, struct perf_sample *sample)
-{
-	if (event->header.type != PERF_RECORD_SAMPLE ||
-	    !(perf_evlist__sample_type(session->evlist) & PERF_SAMPLE_CALLCHAIN))
-		return 0;
-
-	if (!ip_callchain__valid(sample->callchain, event)) {
-		pr_debug("call-chain problem with event, skipping it.\n");
-		++session->stats.nr_invalid_chains;
-		session->stats.total_invalid_chains += sample->period;
-		return -EINVAL;
-	}
-	return 0;
-}
-
 static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
 					    struct perf_tool *tool, u64 file_offset)
 {
@@ -1075,10 +1059,6 @@ static int perf_session__process_event(struct perf_session *session,
 	if (ret)
 		return ret;
 
-	/* Preprocess sample records - precheck callchains */
-	if (perf_session__preprocess_sample(session, event, &sample))
-		return 0;
-
 	if (tool->ordered_samples) {
 		ret = perf_session_queue_event(session, event, &sample,
 					       file_offset);
-- 
cgit v0.10.2


From ef89325f773bc9f2f4e6019bd7f3d968ba85df9a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:06 +0300
Subject: perf tools: Remove references to struct ip_event

The ip_event struct assumes fixed positions for ip, pid and tid.  That
is no longer true with the addition of PERF_SAMPLE_IDENTIFIER.  The
information is anyway in struct sample, so use that instead.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0d4ae1d..ffacd46 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -198,7 +198,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 
 	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	thread = machine__findnew_thread(machine, event->ip.pid, event->ip.pid);
+	thread = machine__findnew_thread(machine, sample->pid, sample->pid);
 	if (thread == NULL) {
 		pr_err("problem processing %d event, skipping it.\n",
 		       event->header.type);
@@ -206,7 +206,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 	}
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al);
+			      sample->ip, &al);
 
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index c324778..c2dff9c 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -305,8 +305,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				struct perf_evsel *evsel,
 				struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, event->ip.pid,
-							event->ip.pid);
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->pid);
 
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 706a1fa..791b432 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -94,7 +94,7 @@ dump_raw_samples(struct perf_tool *tool,
 		symbol_conf.field_sep,
 		sample->tid,
 		symbol_conf.field_sep,
-		event->ip.ip,
+		sample->ip,
 		symbol_conf.field_sep,
 		sample->addr,
 		symbol_conf.field_sep,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d82712f..93a34ce 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -501,8 +501,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				struct machine *machine)
 {
 	struct addr_location al;
-	struct thread *thread = machine__findnew_thread(machine, event->ip.pid,
-							event->ip.tid);
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->tid);
 
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e37521f..2122141 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -689,7 +689,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 {
 	struct perf_top *top = container_of(tool, struct perf_top, tool);
 	struct symbol *parent = NULL;
-	u64 ip = event->ip.ip;
+	u64 ip = sample->ip;
 	struct addr_location al;
 	int err;
 
@@ -699,10 +699,10 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		if (!seen)
 			seen = intlist__new(NULL);
 
-		if (!intlist__has_entry(seen, event->ip.pid)) {
+		if (!intlist__has_entry(seen, sample->pid)) {
 			pr_err("Can't find guest [%d]'s kernel information\n",
-				event->ip.pid);
-			intlist__add(seen, event->ip.pid);
+				sample->pid);
+			intlist__add(seen, sample->pid);
 		}
 		return;
 	}
@@ -836,7 +836,8 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 			break;
 		case PERF_RECORD_MISC_GUEST_KERNEL:
 			++top->guest_kernel_samples;
-			machine = perf_session__find_machine(session, event->ip.pid);
+			machine = perf_session__find_machine(session,
+							     sample.pid);
 			break;
 		case PERF_RECORD_MISC_GUEST_USER:
 			++top->guest_us_samples;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 87f9f72..4228ffc 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -211,15 +211,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 	list_for_each_entry(evsel, &evlist->entries, node) {
 		for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
 			const union perf_event event = {
-				.ip = {
-					.header = {
-						.misc = PERF_RECORD_MISC_USER,
-					},
-					.pid = fake_common_samples[k].pid,
-					.ip  = fake_common_samples[k].ip,
+				.header = {
+					.misc = PERF_RECORD_MISC_USER,
 				},
 			};
 
+			sample.pid = fake_common_samples[k].pid;
+			sample.ip = fake_common_samples[k].ip;
 			if (perf_event__preprocess_sample(&event, machine, &al,
 							  &sample) < 0)
 				goto out;
@@ -235,15 +233,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 
 		for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
 			const union perf_event event = {
-				.ip = {
-					.header = {
-						.misc = PERF_RECORD_MISC_USER,
-					},
-					.pid = fake_samples[i][k].pid,
-					.ip  = fake_samples[i][k].ip,
+				.header = {
+					.misc = PERF_RECORD_MISC_USER,
 				},
 			};
 
+			sample.pid = fake_samples[i][k].pid;
+			sample.ip = fake_samples[i][k].ip;
 			if (perf_event__preprocess_sample(&event, machine, &al,
 							  &sample) < 0)
 				goto out;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 0f9d27a..fb58409 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -18,14 +18,14 @@
 
 int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 			   union perf_event *event,
-			   struct perf_sample *sample __maybe_unused,
+			   struct perf_sample *sample,
 			   struct perf_evsel *evsel __maybe_unused,
 			   struct machine *machine)
 {
 	struct addr_location al;
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-	struct thread *thread = machine__findnew_thread(machine, event->ip.pid,
-							event->ip.pid);
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->pid);
 
 	if (thread == NULL) {
 		pr_err("problem processing %d event, skipping it.\n",
@@ -34,7 +34,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 	}
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al);
+			      sample->ip, &al);
 
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 61cecf9..8d51f21 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -686,8 +686,8 @@ int perf_event__preprocess_sample(const union perf_event *event,
 				  struct perf_sample *sample)
 {
 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-	struct thread *thread = machine__findnew_thread(machine, event->ip.pid,
-							event->ip.pid);
+	struct thread *thread = machine__findnew_thread(machine, sample->pid,
+							sample->pid);
 
 	if (thread == NULL)
 		return -1;
@@ -709,7 +709,7 @@ int perf_event__preprocess_sample(const union perf_event *event,
 		machine__create_kernel_maps(machine);
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, al);
+			      sample->ip, al);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 15db071..19d911c 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -8,16 +8,6 @@
 #include "map.h"
 #include "build-id.h"
 
-/*
- * PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
- */
-struct ip_event {
-	struct perf_event_header header;
-	u64 ip;
-	u32 pid, tid;
-	unsigned char __more_data[];
-};
-
 struct mmap_event {
 	struct perf_event_header header;
 	u32 pid, tid;
@@ -166,7 +156,6 @@ struct tracing_data_event {
 
 union perf_event {
 	struct perf_event_header	header;
-	struct ip_event			ip;
 	struct mmap_event		mmap;
 	struct comm_event		comm;
 	struct fork_event		fork;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9a5fb23..7e328c4 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1185,7 +1185,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		return -EFAULT;
 
 	if (type & PERF_SAMPLE_IP) {
-		data->ip = event->ip.ip;
+		data->ip = *array;
 		array++;
 	}
 
@@ -1397,7 +1397,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 	array = event->sample.array;
 
 	if (type & PERF_SAMPLE_IP) {
-		event->ip.ip = sample->ip;
+		*array = sample->ip;
 		array++;
 	}
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 07590c3..c3ac483 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -840,7 +840,8 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
 
 static struct machine *
 	perf_session__find_machine_for_cpumode(struct perf_session *session,
-					       union perf_event *event)
+					       union perf_event *event,
+					       struct perf_sample *sample)
 {
 	const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
@@ -852,7 +853,7 @@ static struct machine *
 		if (event->header.type == PERF_RECORD_MMAP)
 			pid = event->mmap.pid;
 		else
-			pid = event->ip.pid;
+			pid = sample->pid;
 
 		return perf_session__findnew_machine(session, pid);
 	}
@@ -958,7 +959,8 @@ static int perf_session_deliver_event(struct perf_session *session,
 		hists__inc_nr_events(&evsel->hists, event->header.type);
 	}
 
-	machine = perf_session__find_machine_for_cpumode(session, event);
+	machine = perf_session__find_machine_for_cpumode(session, event,
+							 sample);
 
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
-- 
cgit v0.10.2


From ff3d527cebc1fa3707c617bfe9e74f53fcfb0955 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:07 +0300
Subject: perf: make events stream always parsable

The event stream is not always parsable because the format of a sample
is dependent on the sample_type of the selected event.  When there is
more than one selected event and the sample_types are not the same then
parsing becomes problematic.  A sample can be matched to its selected
event using the ID that is allocated when the event is opened.
Unfortunately, to get the ID from the sample means first parsing it.

This patch adds a new sample format bit PERF_SAMPLE_IDENTIFER that puts
the ID at a fixed position so that the ID can be retrieved without
parsing the sample.  For sample events, that is the first position
immediately after the header.  For non-sample events, that is the last
position.

In this respect parsing samples requires that the sample_type and ID
values are recorded.  For example, perf tools records struct
perf_event_attr and the IDs within the perf.data file.  Those must be
read first before it is possible to parse samples found later in the
perf.data file.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 62c25a2..42cb7b6 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -134,8 +134,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_STACK_USER			= 1U << 13,
 	PERF_SAMPLE_WEIGHT			= 1U << 14,
 	PERF_SAMPLE_DATA_SRC			= 1U << 15,
+	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 
-	PERF_SAMPLE_MAX = 1U << 16,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 17,		/* non-ABI */
 };
 
 /*
@@ -492,12 +493,12 @@ enum perf_event_type {
 	/*
 	 * If perf_event_attr.sample_id_all is set then all event types will
 	 * have the sample_type selected fields related to where/when
-	 * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
-	 * described in PERF_RECORD_SAMPLE below, it will be stashed just after
-	 * the perf_event_header and the fields already present for the existing
-	 * fields, i.e. at the end of the payload. That way a newer perf.data
-	 * file will be supported by older perf tools, with these new optional
-	 * fields being ignored.
+	 * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
+	 * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
+	 * just after the perf_event_header and the fields already present for
+	 * the existing fields, i.e. at the end of the payload. That way a newer
+	 * perf.data file will be supported by older perf tools, with these new
+	 * optional fields being ignored.
 	 *
 	 * struct sample_id {
 	 * 	{ u32			pid, tid; } && PERF_SAMPLE_TID
@@ -505,7 +506,12 @@ enum perf_event_type {
 	 * 	{ u64			id;       } && PERF_SAMPLE_ID
 	 * 	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
 	 * 	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
 	 * } && perf_event_attr::sample_id_all
+	 *
+	 * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.  The
+	 * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
+	 * relative to header.size.
 	 */
 
 	/*
@@ -594,6 +600,13 @@ enum perf_event_type {
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *
+	 *	#
+	 *	# Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
+	 *	# The advantage of PERF_SAMPLE_IDENTIFIER is that its position
+	 *	# is fixed relative to header.
+	 *	#
+	 *
+	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
 	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
 	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
 	 *	{ u64			time;     } && PERF_SAMPLE_TIME
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 928fae7..15d0f24 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1213,6 +1213,9 @@ static void perf_event__id_header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_TIME)
 		size += sizeof(data->time);
 
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		size += sizeof(data->id);
+
 	if (sample_type & PERF_SAMPLE_ID)
 		size += sizeof(data->id);
 
@@ -4280,7 +4283,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
 	if (sample_type & PERF_SAMPLE_TIME)
 		data->time = perf_clock();
 
-	if (sample_type & PERF_SAMPLE_ID)
+	if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
 		data->id = primary_event_id(event);
 
 	if (sample_type & PERF_SAMPLE_STREAM_ID)
@@ -4319,6 +4322,9 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle,
 
 	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(handle, data->cpu_entry);
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		perf_output_put(handle, data->id);
 }
 
 void perf_event__output_id_sample(struct perf_event *event,
@@ -4432,6 +4438,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 
 	perf_output_put(handle, *header);
 
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		perf_output_put(handle, data->id);
+
 	if (sample_type & PERF_SAMPLE_IP)
 		perf_output_put(handle, data->ip);
 
-- 
cgit v0.10.2


From faf967068e8c4d8df52f01f9361241101b3065a0 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:08 +0300
Subject: perf evlist: Move perf_evlist__config() to a new source file

perf_evlist__config() must be moved to a separate source file to avoid
Python link errors when adding support for PERF_SAMPLE_IDENTIFIER.

It is appropriate to do this because perf_evlist__config() is a helper
function for event recording.  It is used by tools to apply recording
options to perf_evlist.  It is not used by the Python API.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7d30a7d..845cc76 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -360,6 +360,7 @@ LIB_OBJS += $(OUTPUT)util/rblist.o
 LIB_OBJS += $(OUTPUT)util/intlist.o
 LIB_OBJS += $(OUTPUT)util/vdso.o
 LIB_OBJS += $(OUTPUT)util/stat.o
+LIB_OBJS += $(OUTPUT)util/record.o
 
 LIB_OBJS += $(OUTPUT)ui/setup.o
 LIB_OBJS += $(OUTPUT)ui/helpline.o
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 1f5105a..9d682e5 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -49,28 +49,6 @@ struct perf_evlist *perf_evlist__new(void)
 	return evlist;
 }
 
-void perf_evlist__config(struct perf_evlist *evlist,
-			struct perf_record_opts *opts)
-{
-	struct perf_evsel *evsel;
-	/*
-	 * Set the evsel leader links before we configure attributes,
-	 * since some might depend on this info.
-	 */
-	if (opts->group)
-		perf_evlist__set_leader(evlist);
-
-	if (evlist->cpus->map[0] < 0)
-		opts->no_inherit = true;
-
-	list_for_each_entry(evsel, &evlist->entries, node) {
-		perf_evsel__config(evsel, opts);
-
-		if (evlist->nr_entries > 1)
-			perf_evsel__set_sample_id(evsel);
-	}
-}
-
 static void perf_evlist__purge(struct perf_evlist *evlist)
 {
 	struct perf_evsel *pos, *n;
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
new file mode 100644
index 0000000..9b5ef79
--- /dev/null
+++ b/tools/perf/util/record.c
@@ -0,0 +1,25 @@
+#include "evlist.h"
+#include "evsel.h"
+#include "cpumap.h"
+
+void perf_evlist__config(struct perf_evlist *evlist,
+			struct perf_record_opts *opts)
+{
+	struct perf_evsel *evsel;
+	/*
+	 * Set the evsel leader links before we configure attributes,
+	 * since some might depend on this info.
+	 */
+	if (opts->group)
+		perf_evlist__set_leader(evlist);
+
+	if (evlist->cpus->map[0] < 0)
+		opts->no_inherit = true;
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		perf_evsel__config(evsel, opts);
+
+		if (evlist->nr_entries > 1)
+			perf_evsel__set_sample_id(evsel);
+	}
+}
-- 
cgit v0.10.2


From 75562573bab35b129cfd342fc2bcf89da84a6644 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:09 +0300
Subject: perf tools: Add support for PERF_SAMPLE_IDENTIFIER

Enable parsing of samples with sample format bit PERF_SAMPLE_IDENTIFIER.
In addition, if the kernel supports it, prefer it to selecting
PERF_SAMPLE_ID thereby allowing non-matching sample types.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 958a56a..9725aa3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -365,7 +365,7 @@ static int process_read_event(struct perf_tool *tool,
 static int perf_report__setup_sample_type(struct perf_report *rep)
 {
 	struct perf_session *self = rep->session;
-	u64 sample_type = perf_evlist__sample_type(self->evlist);
+	u64 sample_type = perf_evlist__combined_sample_type(self->evlist);
 
 	if (!self->fd_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		if (sort__has_parent) {
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 5b1b5ab..c4185b9 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -72,7 +72,7 @@ int test__basic_mmap(void)
 		}
 
 		evsels[i]->attr.wakeup_events = 1;
-		perf_evsel__set_sample_id(evsels[i]);
+		perf_evsel__set_sample_id(evsels[i], false);
 
 		perf_evlist__add(evlist, evsels[i]);
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 19d911c..4913339 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -53,7 +53,8 @@ struct read_event {
 	(PERF_SAMPLE_IP | PERF_SAMPLE_TID |		\
 	 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR |		\
 	PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID |	\
-	 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
+	 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD |		\
+	 PERF_SAMPLE_IDENTIFIER)
 
 struct sample_event {
 	struct perf_event_header        header;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 9d682e5..6a629af 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -49,6 +49,21 @@ struct perf_evlist *perf_evlist__new(void)
 	return evlist;
 }
 
+/**
+ * perf_evlist__set_id_pos - set the positions of event ids.
+ * @evlist: selected event list
+ *
+ * Events with compatible sample types all have the same id_pos
+ * and is_pos.  For convenience, put a copy on evlist.
+ */
+void perf_evlist__set_id_pos(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+
+	evlist->id_pos = first->id_pos;
+	evlist->is_pos = first->is_pos;
+}
+
 static void perf_evlist__purge(struct perf_evlist *evlist)
 {
 	struct perf_evsel *pos, *n;
@@ -79,15 +94,20 @@ void perf_evlist__delete(struct perf_evlist *evlist)
 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
 {
 	list_add_tail(&entry->node, &evlist->entries);
-	++evlist->nr_entries;
+	if (!evlist->nr_entries++)
+		perf_evlist__set_id_pos(evlist);
 }
 
 void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
 				   struct list_head *list,
 				   int nr_entries)
 {
+	bool set_id_pos = !evlist->nr_entries;
+
 	list_splice_tail(list, &evlist->entries);
 	evlist->nr_entries += nr_entries;
+	if (set_id_pos)
+		perf_evlist__set_id_pos(evlist);
 }
 
 void __perf_evlist__set_leader(struct list_head *list)
@@ -349,6 +369,55 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 	return NULL;
 }
 
+static int perf_evlist__event2id(struct perf_evlist *evlist,
+				 union perf_event *event, u64 *id)
+{
+	const u64 *array = event->sample.array;
+	ssize_t n;
+
+	n = (event->header.size - sizeof(event->header)) >> 3;
+
+	if (event->header.type == PERF_RECORD_SAMPLE) {
+		if (evlist->id_pos >= n)
+			return -1;
+		*id = array[evlist->id_pos];
+	} else {
+		if (evlist->is_pos > n)
+			return -1;
+		n -= evlist->is_pos;
+		*id = array[n];
+	}
+	return 0;
+}
+
+static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
+						   union perf_event *event)
+{
+	struct hlist_head *head;
+	struct perf_sample_id *sid;
+	int hash;
+	u64 id;
+
+	if (evlist->nr_entries == 1)
+		return perf_evlist__first(evlist);
+
+	if (perf_evlist__event2id(evlist, event, &id))
+		return NULL;
+
+	/* Synthesized events have an id of zero */
+	if (!id)
+		return perf_evlist__first(evlist);
+
+	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+	head = &evlist->heads[hash];
+
+	hlist_for_each_entry(sid, head, node) {
+		if (sid->id == id)
+			return sid->evsel;
+	}
+	return NULL;
+}
+
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	struct perf_mmap *md = &evlist->mmap[idx];
@@ -659,20 +728,40 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
 
 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
 {
-	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
+	struct perf_evsel *pos;
 
-	list_for_each_entry_continue(pos, &evlist->entries, node) {
-		if (first->attr.sample_type != pos->attr.sample_type)
+	if (evlist->nr_entries == 1)
+		return true;
+
+	if (evlist->id_pos < 0 || evlist->is_pos < 0)
+		return false;
+
+	list_for_each_entry(pos, &evlist->entries, node) {
+		if (pos->id_pos != evlist->id_pos ||
+		    pos->is_pos != evlist->is_pos)
 			return false;
 	}
 
 	return true;
 }
 
-u64 perf_evlist__sample_type(struct perf_evlist *evlist)
+u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
 {
-	struct perf_evsel *first = perf_evlist__first(evlist);
-	return first->attr.sample_type;
+	struct perf_evsel *evsel;
+
+	if (evlist->combined_sample_type)
+		return evlist->combined_sample_type;
+
+	list_for_each_entry(evsel, &evlist->entries, node)
+		evlist->combined_sample_type |= evsel->attr.sample_type;
+
+	return evlist->combined_sample_type;
+}
+
+u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
+{
+	evlist->combined_sample_type = 0;
+	return __perf_evlist__combined_sample_type(evlist);
 }
 
 bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
@@ -727,6 +816,9 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
 
 	if (sample_type & PERF_SAMPLE_CPU)
 		size += sizeof(data->cpu) * 2;
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		size += sizeof(data->id);
 out:
 	return size;
 }
@@ -885,7 +977,10 @@ int perf_evlist__start_workload(struct perf_evlist *evlist)
 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
 			      struct perf_sample *sample)
 {
-	struct perf_evsel *evsel = perf_evlist__first(evlist);
+	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+	if (!evsel)
+		return -EFAULT;
 	return perf_evsel__parse_sample(evsel, event, sample);
 }
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 327abab..ab95d72 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -32,6 +32,9 @@ struct perf_evlist {
 	int		 nr_fds;
 	int		 nr_mmaps;
 	int		 mmap_len;
+	int		 id_pos;
+	int		 is_pos;
+	u64		 combined_sample_type;
 	struct {
 		int	cork_fd;
 		pid_t	pid;
@@ -85,6 +88,8 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
 int perf_evlist__open(struct perf_evlist *evlist);
 void perf_evlist__close(struct perf_evlist *evlist);
 
+void perf_evlist__set_id_pos(struct perf_evlist *evlist);
+bool perf_can_sample_identifier(void);
 void perf_evlist__config(struct perf_evlist *evlist,
 			 struct perf_record_opts *opts);
 
@@ -121,7 +126,8 @@ void __perf_evlist__set_leader(struct list_head *list);
 void perf_evlist__set_leader(struct perf_evlist *evlist);
 
 u64 perf_evlist__read_format(struct perf_evlist *evlist);
-u64 perf_evlist__sample_type(struct perf_evlist *evlist);
+u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
+u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
 bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7e328c4..db4e431 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -31,7 +31,7 @@ static struct {
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 
-static int __perf_evsel__sample_size(u64 sample_type)
+int __perf_evsel__sample_size(u64 sample_type)
 {
 	u64 mask = sample_type & PERF_SAMPLE_MASK;
 	int size = 0;
@@ -47,6 +47,72 @@ static int __perf_evsel__sample_size(u64 sample_type)
 	return size;
 }
 
+/**
+ * __perf_evsel__calc_id_pos - calculate id_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
+ * sample_event.
+ */
+static int __perf_evsel__calc_id_pos(u64 sample_type)
+{
+	int idx = 0;
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		return 0;
+
+	if (!(sample_type & PERF_SAMPLE_ID))
+		return -1;
+
+	if (sample_type & PERF_SAMPLE_IP)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_TID)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		idx += 1;
+
+	return idx;
+}
+
+/**
+ * __perf_evsel__calc_is_pos - calculate is_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position (counting backwards) of the event id
+ * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
+ * sample_id_all is used there is an id sample appended to non-sample events.
+ */
+static int __perf_evsel__calc_is_pos(u64 sample_type)
+{
+	int idx = 1;
+
+	if (sample_type & PERF_SAMPLE_IDENTIFIER)
+		return 1;
+
+	if (!(sample_type & PERF_SAMPLE_ID))
+		return -1;
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		idx += 1;
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		idx += 1;
+
+	return idx;
+}
+
+void perf_evsel__calc_id_pos(struct perf_evsel *evsel)
+{
+	evsel->id_pos = __perf_evsel__calc_id_pos(evsel->attr.sample_type);
+	evsel->is_pos = __perf_evsel__calc_is_pos(evsel->attr.sample_type);
+}
+
 void hists__init(struct hists *hists)
 {
 	memset(hists, 0, sizeof(*hists));
@@ -63,6 +129,7 @@ void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
 	if (!(evsel->attr.sample_type & bit)) {
 		evsel->attr.sample_type |= bit;
 		evsel->sample_size += sizeof(u64);
+		perf_evsel__calc_id_pos(evsel);
 	}
 }
 
@@ -72,12 +139,19 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
 	if (evsel->attr.sample_type & bit) {
 		evsel->attr.sample_type &= ~bit;
 		evsel->sample_size -= sizeof(u64);
+		perf_evsel__calc_id_pos(evsel);
 	}
 }
 
-void perf_evsel__set_sample_id(struct perf_evsel *evsel)
+void perf_evsel__set_sample_id(struct perf_evsel *evsel,
+			       bool can_sample_identifier)
 {
-	perf_evsel__set_sample_bit(evsel, ID);
+	if (can_sample_identifier) {
+		perf_evsel__reset_sample_bit(evsel, ID);
+		perf_evsel__set_sample_bit(evsel, IDENTIFIER);
+	} else {
+		perf_evsel__set_sample_bit(evsel, ID);
+	}
 	evsel->attr.read_format |= PERF_FORMAT_ID;
 }
 
@@ -90,6 +164,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
 	INIT_LIST_HEAD(&evsel->node);
 	hists__init(&evsel->hists);
 	evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
+	perf_evsel__calc_id_pos(evsel);
 }
 
 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
@@ -509,7 +584,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
 		 * We need ID even in case of single event, because
 		 * PERF_SAMPLE_READ process ID specific data.
 		 */
-		perf_evsel__set_sample_id(evsel);
+		perf_evsel__set_sample_id(evsel, false);
 
 		/*
 		 * Apply group format only if we belong to group
@@ -1088,6 +1163,11 @@ static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
 	array += ((event->header.size -
 		   sizeof(event->header)) / sizeof(u64)) - 1;
 
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		sample->id = *array;
+		array--;
+	}
+
 	if (type & PERF_SAMPLE_CPU) {
 		u.val64 = *array;
 		if (swapped) {
@@ -1184,6 +1264,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	if (evsel->sample_size + sizeof(event->header) > event->header.size)
 		return -EFAULT;
 
+	data->id = -1ULL;
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		data->id = *array;
+		array++;
+	}
+
 	if (type & PERF_SAMPLE_IP) {
 		data->ip = *array;
 		array++;
@@ -1214,7 +1300,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		array++;
 	}
 
-	data->id = -1ULL;
 	if (type & PERF_SAMPLE_ID) {
 		data->id = *array;
 		array++;
@@ -1396,6 +1481,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 
 	array = event->sample.array;
 
+	if (type & PERF_SAMPLE_IDENTIFIER) {
+		*array = sample->id;
+		array++;
+	}
+
 	if (type & PERF_SAMPLE_IP) {
 		*array = sample->ip;
 		array++;
@@ -1584,6 +1674,7 @@ static int sample_type__fprintf(FILE *fp, bool *first, u64 value)
 		bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
 		bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
 		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+		bit_name(IDENTIFIER),
 		{ .name = NULL, }
 	};
 #undef bit_name
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 532a5f9..4a7bdc7 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -48,6 +48,12 @@ struct perf_sample_id {
  * @name - Can be set to retain the original event name passed by the user,
  *         so that when showing results in tools such as 'perf stat', we
  *         show the name used, not some alias.
+ * @id_pos: the position of the event id (PERF_SAMPLE_ID or
+ *          PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of
+ *          struct sample_event
+ * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or
+ *          PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all
+ *          is used there is an id sample appended to non-sample events
  */
 struct perf_evsel {
 	struct list_head	node;
@@ -74,6 +80,8 @@ struct perf_evsel {
 	} handler;
 	struct cpu_map		*cpus;
 	unsigned int		sample_size;
+	int			id_pos;
+	int			is_pos;
 	bool 			supported;
 	bool 			needs_swap;
 	/* parse modifier helper */
@@ -104,6 +112,9 @@ void perf_evsel__delete(struct perf_evsel *evsel);
 void perf_evsel__config(struct perf_evsel *evsel,
 			struct perf_record_opts *opts);
 
+int __perf_evsel__sample_size(u64 sample_type);
+void perf_evsel__calc_id_pos(struct perf_evsel *evsel);
+
 bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
 
 #define PERF_EVSEL__MAX_ALIASES 8
@@ -142,7 +153,8 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
 #define perf_evsel__reset_sample_bit(evsel, bit) \
 	__perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
 
-void perf_evsel__set_sample_id(struct perf_evsel *evsel);
+void perf_evsel__set_sample_id(struct perf_evsel *evsel,
+			       bool use_sample_identifier);
 
 int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			   const char *filter);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 9b5ef79..18d73aa 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -1,11 +1,83 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "cpumap.h"
+#include "parse-events.h"
+
+typedef void (*setup_probe_fn_t)(struct perf_evsel *evsel);
+
+static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+{
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel;
+	int err = -EAGAIN, fd;
+
+	evlist = perf_evlist__new();
+	if (!evlist)
+		return -ENOMEM;
+
+	if (parse_events(evlist, str))
+		goto out_delete;
+
+	evsel = perf_evlist__first(evlist);
+
+	fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, 0);
+	if (fd < 0)
+		goto out_delete;
+	close(fd);
+
+	fn(evsel);
+
+	fd = sys_perf_event_open(&evsel->attr, -1, cpu, -1, 0);
+	if (fd < 0) {
+		if (errno == EINVAL)
+			err = -EINVAL;
+		goto out_delete;
+	}
+	close(fd);
+	err = 0;
+
+out_delete:
+	perf_evlist__delete(evlist);
+	return err;
+}
+
+static bool perf_probe_api(setup_probe_fn_t fn)
+{
+	const char *try[] = {"cycles:u", "instructions:u", "cpu-clock", NULL};
+	struct cpu_map *cpus;
+	int cpu, ret, i = 0;
+
+	cpus = cpu_map__new(NULL);
+	if (!cpus)
+		return false;
+	cpu = cpus->map[0];
+	cpu_map__delete(cpus);
+
+	do {
+		ret = perf_do_probe_api(fn, cpu, try[i++]);
+		if (!ret)
+			return true;
+	} while (ret == -EAGAIN && try[i]);
+
+	return false;
+}
+
+static void perf_probe_sample_identifier(struct perf_evsel *evsel)
+{
+	evsel->attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+}
+
+bool perf_can_sample_identifier(void)
+{
+	return perf_probe_api(perf_probe_sample_identifier);
+}
 
 void perf_evlist__config(struct perf_evlist *evlist,
 			struct perf_record_opts *opts)
 {
 	struct perf_evsel *evsel;
+	bool use_sample_identifier = false;
+
 	/*
 	 * Set the evsel leader links before we configure attributes,
 	 * since some might depend on this info.
@@ -16,10 +88,21 @@ void perf_evlist__config(struct perf_evlist *evlist,
 	if (evlist->cpus->map[0] < 0)
 		opts->no_inherit = true;
 
-	list_for_each_entry(evsel, &evlist->entries, node) {
+	list_for_each_entry(evsel, &evlist->entries, node)
 		perf_evsel__config(evsel, opts);
 
-		if (evlist->nr_entries > 1)
-			perf_evsel__set_sample_id(evsel);
+	if (evlist->nr_entries > 1) {
+		struct perf_evsel *first = perf_evlist__first(evlist);
+
+		list_for_each_entry(evsel, &evlist->entries, node) {
+			if (evsel->attr.sample_type == first->attr.sample_type)
+				continue;
+			use_sample_identifier = perf_can_sample_identifier();
+			break;
+		}
+		list_for_each_entry(evsel, &evlist->entries, node)
+			perf_evsel__set_sample_id(evsel, use_sample_identifier);
 	}
+
+	perf_evlist__set_id_pos(evlist);
 }
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c3ac483..07642a7 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -739,7 +739,7 @@ static void perf_session__print_tstamp(struct perf_session *session,
 				       union perf_event *event,
 				       struct perf_sample *sample)
 {
-	u64 sample_type = perf_evlist__sample_type(session->evlist);
+	u64 sample_type = __perf_evlist__combined_sample_type(session->evlist);
 
 	if (event->header.type != PERF_RECORD_SAMPLE &&
 	    !perf_evlist__sample_id_all(session->evlist)) {
-- 
cgit v0.10.2


From 5b95a4a3b52a1de84022dfaf580df4b5251f4a15 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:10 +0300
Subject: perf tools: Add missing 'abi' member to 'struct regs_dump'

And store the parsed value there.  Note that the 'abi' is 0 (no
registers), 1 (32-bit registers) or 2 (64-bit registers), but the
registers are anyway copied one-by-one as 64-bit values onto the event
i.e. see 'perf_output_sample_regs()'

Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 4913339..f959801 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -62,6 +62,7 @@ struct sample_event {
 };
 
 struct regs_dump {
+	u64 abi;
 	u64 *regs;
 };
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index db4e431..1184188 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1416,13 +1416,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	}
 
 	if (type & PERF_SAMPLE_REGS_USER) {
-		u64 avail;
-
 		/* First u64 tells us if we have any regs in sample. */
 		OVERFLOW_CHECK_u64(array);
-		avail = *array++;
+		data->user_regs.abi = *array;
+		array++;
 
-		if (avail) {
+		if (data->user_regs.abi) {
 			u64 regs_user = evsel->attr.sample_regs_user;
 
 			sz = hweight_long(regs_user) * sizeof(u64);
-- 
cgit v0.10.2


From d03f2170546d2f0c236a42706d211e15ffb64184 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:11 +0300
Subject: perf tools: Expand perf_event__synthesize_sample()

Expand perf_event__synthesize_sample() to handle all sample format bits.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-10-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index ffacd46..9b336fd 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -301,7 +301,9 @@ found:
 	sample_sw.period = sample->period;
 	sample_sw.time	 = sample->time;
 	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
-				      &sample_sw, false);
+				      evsel->attr.sample_regs_user,
+				      evsel->attr.read_format, &sample_sw,
+				      false);
 	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
 	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
 }
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index f959801..1c80e13 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -230,6 +230,7 @@ int perf_event__preprocess_sample(const union perf_event *self,
 const char *perf_event__name(unsigned int id);
 
 int perf_event__synthesize_sample(union perf_event *event, u64 type,
+				  u64 sample_regs_user, u64 read_format,
 				  const struct perf_sample *sample,
 				  bool swapped);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1184188..7d62373 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1416,7 +1416,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	}
 
 	if (type & PERF_SAMPLE_REGS_USER) {
-		/* First u64 tells us if we have any regs in sample. */
 		OVERFLOW_CHECK_u64(array);
 		data->user_regs.abi = *array;
 		array++;
@@ -1467,11 +1466,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 }
 
 int perf_event__synthesize_sample(union perf_event *event, u64 type,
+				  u64 sample_regs_user, u64 read_format,
 				  const struct perf_sample *sample,
 				  bool swapped)
 {
 	u64 *array;
-
+	size_t sz;
 	/*
 	 * used for cross-endian analysis. See git commit 65014ab3
 	 * for why this goofiness is needed.
@@ -1544,6 +1544,97 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 		array++;
 	}
 
+	if (type & PERF_SAMPLE_READ) {
+		if (read_format & PERF_FORMAT_GROUP)
+			*array = sample->read.group.nr;
+		else
+			*array = sample->read.one.value;
+		array++;
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			*array = sample->read.time_enabled;
+			array++;
+		}
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+			*array = sample->read.time_running;
+			array++;
+		}
+
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			sz = sample->read.group.nr *
+			     sizeof(struct sample_read_value);
+			memcpy(array, sample->read.group.values, sz);
+			array = (void *)array + sz;
+		} else {
+			*array = sample->read.one.id;
+			array++;
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		sz = (sample->callchain->nr + 1) * sizeof(u64);
+		memcpy(array, sample->callchain, sz);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		u.val32[0] = sample->raw_size;
+		if (WARN_ONCE(swapped,
+			      "Endianness of raw data not corrected!\n")) {
+			/*
+			 * Inverse of what is done in perf_evsel__parse_sample
+			 */
+			u.val32[0] = bswap_32(u.val32[0]);
+			u.val32[1] = bswap_32(u.val32[1]);
+			u.val64 = bswap_64(u.val64);
+		}
+		*array = u.val64;
+		array = (void *)array + sizeof(u32);
+
+		memcpy(array, sample->raw_data, sample->raw_size);
+		array = (void *)array + sample->raw_size;
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+		sz += sizeof(u64);
+		memcpy(array, sample->branch_stack, sz);
+		array = (void *)array + sz;
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		if (sample->user_regs.abi) {
+			*array++ = sample->user_regs.abi;
+			sz = hweight_long(sample_regs_user) * sizeof(u64);
+			memcpy(array, sample->user_regs.regs, sz);
+			array = (void *)array + sz;
+		} else {
+			*array++ = 0;
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		sz = sample->user_stack.size;
+		*array++ = sz;
+		if (sz) {
+			memcpy(array, sample->user_stack.data, sz);
+			array = (void *)array + sz;
+			*array++ = sz;
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT) {
+		*array = sample->weight;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_DATA_SRC) {
+		*array = sample->data_src;
+		array++;
+	}
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From b1cf6f65aa7096984836addab7cec6b5b6d4393a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:12 +0300
Subject: perf tools: Add a function to calculate sample event size

Add perf_event__sample_event_size() which can be used when synthesizing
sample events to determine how big the resulting event will be, and
therefore how much memory to allocate.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-11-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1c80e13..93130d8 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -229,6 +229,8 @@ int perf_event__preprocess_sample(const union perf_event *self,
 
 const char *perf_event__name(unsigned int id);
 
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
+				     u64 sample_regs_user, u64 read_format);
 int perf_event__synthesize_sample(union perf_event *event, u64 type,
 				  u64 sample_regs_user, u64 read_format,
 				  const struct perf_sample *sample,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7d62373..e8745fb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1465,6 +1465,98 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	return 0;
 }
 
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
+				     u64 sample_regs_user, u64 read_format)
+{
+	size_t sz, result = sizeof(struct sample_event);
+
+	if (type & PERF_SAMPLE_IDENTIFIER)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_IP)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_TID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_TIME)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_ADDR)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_ID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_STREAM_ID)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_CPU)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_PERIOD)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_READ) {
+		result += sizeof(u64);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+			result += sizeof(u64);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+			result += sizeof(u64);
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			sz = sample->read.group.nr *
+			     sizeof(struct sample_read_value);
+			result += sz;
+		} else {
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		sz = (sample->callchain->nr + 1) * sizeof(u64);
+		result += sz;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		result += sizeof(u32);
+		result += sample->raw_size;
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+		sz += sizeof(u64);
+		result += sz;
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		if (sample->user_regs.abi) {
+			result += sizeof(u64);
+			sz = hweight_long(sample_regs_user) * sizeof(u64);
+			result += sz;
+		} else {
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		sz = sample->user_stack.size;
+		result += sizeof(u64);
+		if (sz) {
+			result += sz;
+			result += sizeof(u64);
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT)
+		result += sizeof(u64);
+
+	if (type & PERF_SAMPLE_DATA_SRC)
+		result += sizeof(u64);
+
+	return result;
+}
+
 int perf_event__synthesize_sample(union perf_event *event, u64 type,
 				  u64 sample_regs_user, u64 read_format,
 				  const struct perf_sample *sample,
-- 
cgit v0.10.2


From 045f8cd8542d2fcd424a32fa10fcd3dd29c6d374 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Aug 2013 11:23:13 +0300
Subject: perf tests: Add a sample parsing test

Add a test that checks that sample parsing is correctly implemented.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377591794-30553-12-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 845cc76..ecebfd0 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -439,6 +439,7 @@ PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
 ifneq ($(OUTPUT),)
   CFLAGS += -I$(OUTPUT)
 endif
+LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
 
 ifdef NO_LIBELF
 EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index f5af192..8ad9415 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -104,6 +104,10 @@ static struct test {
 		.func = test__code_reading,
 	},
 	{
+		.desc = "Test sample parsing",
+		.func = test__sample_parsing,
+	},
+	{
 		.func = NULL,
 	},
 };
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
new file mode 100644
index 0000000..77f598d
--- /dev/null
+++ b/tools/perf/tests/sample-parsing.c
@@ -0,0 +1,316 @@
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include "util.h"
+#include "event.h"
+#include "evsel.h"
+
+#include "tests.h"
+
+#define COMP(m) do {					\
+	if (s1->m != s2->m) {				\
+		pr_debug("Samples differ at '"#m"'\n");	\
+		return false;				\
+	}						\
+} while (0)
+
+#define MCOMP(m) do {					\
+	if (memcmp(&s1->m, &s2->m, sizeof(s1->m))) {	\
+		pr_debug("Samples differ at '"#m"'\n");	\
+		return false;				\
+	}						\
+} while (0)
+
+static bool samples_same(const struct perf_sample *s1,
+			 const struct perf_sample *s2, u64 type, u64 regs_user,
+			 u64 read_format)
+{
+	size_t i;
+
+	if (type & PERF_SAMPLE_IDENTIFIER)
+		COMP(id);
+
+	if (type & PERF_SAMPLE_IP)
+		COMP(ip);
+
+	if (type & PERF_SAMPLE_TID) {
+		COMP(pid);
+		COMP(tid);
+	}
+
+	if (type & PERF_SAMPLE_TIME)
+		COMP(time);
+
+	if (type & PERF_SAMPLE_ADDR)
+		COMP(addr);
+
+	if (type & PERF_SAMPLE_ID)
+		COMP(id);
+
+	if (type & PERF_SAMPLE_STREAM_ID)
+		COMP(stream_id);
+
+	if (type & PERF_SAMPLE_CPU)
+		COMP(cpu);
+
+	if (type & PERF_SAMPLE_PERIOD)
+		COMP(period);
+
+	if (type & PERF_SAMPLE_READ) {
+		if (read_format & PERF_FORMAT_GROUP)
+			COMP(read.group.nr);
+		else
+			COMP(read.one.value);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+			COMP(read.time_enabled);
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+			COMP(read.time_running);
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			for (i = 0; i < s1->read.group.nr; i++)
+				MCOMP(read.group.values[i]);
+		} else {
+			COMP(read.one.id);
+		}
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		COMP(callchain->nr);
+		for (i = 0; i < s1->callchain->nr; i++)
+			COMP(callchain->ips[i]);
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		COMP(raw_size);
+		if (memcmp(s1->raw_data, s2->raw_data, s1->raw_size)) {
+			pr_debug("Samples differ at 'raw_data'\n");
+			return false;
+		}
+	}
+
+	if (type & PERF_SAMPLE_BRANCH_STACK) {
+		COMP(branch_stack->nr);
+		for (i = 0; i < s1->branch_stack->nr; i++)
+			MCOMP(branch_stack->entries[i]);
+	}
+
+	if (type & PERF_SAMPLE_REGS_USER) {
+		size_t sz = hweight_long(regs_user) * sizeof(u64);
+
+		COMP(user_regs.abi);
+		if (s1->user_regs.abi &&
+		    (!s1->user_regs.regs || !s2->user_regs.regs ||
+		     memcmp(s1->user_regs.regs, s2->user_regs.regs, sz))) {
+			pr_debug("Samples differ at 'user_regs'\n");
+			return false;
+		}
+	}
+
+	if (type & PERF_SAMPLE_STACK_USER) {
+		COMP(user_stack.size);
+		if (memcmp(s1->user_stack.data, s1->user_stack.data,
+			   s1->user_stack.size)) {
+			pr_debug("Samples differ at 'user_stack'\n");
+			return false;
+		}
+	}
+
+	if (type & PERF_SAMPLE_WEIGHT)
+		COMP(weight);
+
+	if (type & PERF_SAMPLE_DATA_SRC)
+		COMP(data_src);
+
+	return true;
+}
+
+static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format)
+{
+	struct perf_evsel evsel = {
+		.needs_swap = false,
+		.attr = {
+			.sample_type = sample_type,
+			.sample_regs_user = sample_regs_user,
+			.read_format = read_format,
+		},
+	};
+	union perf_event *event;
+	union {
+		struct ip_callchain callchain;
+		u64 data[64];
+	} callchain = {
+		/* 3 ips */
+		.data = {3, 201, 202, 203},
+	};
+	union {
+		struct branch_stack branch_stack;
+		u64 data[64];
+	} branch_stack = {
+		/* 1 branch_entry */
+		.data = {1, 211, 212, 213},
+	};
+	u64 user_regs[64];
+	const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
+	const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL};
+	struct perf_sample sample = {
+		.ip		= 101,
+		.pid		= 102,
+		.tid		= 103,
+		.time		= 104,
+		.addr		= 105,
+		.id		= 106,
+		.stream_id	= 107,
+		.period		= 108,
+		.weight		= 109,
+		.cpu		= 110,
+		.raw_size	= sizeof(raw_data),
+		.data_src	= 111,
+		.raw_data	= (void *)raw_data,
+		.callchain	= &callchain.callchain,
+		.branch_stack	= &branch_stack.branch_stack,
+		.user_regs	= {
+			.abi	= PERF_SAMPLE_REGS_ABI_64,
+			.regs	= user_regs,
+		},
+		.user_stack	= {
+			.size	= sizeof(data),
+			.data	= (void *)data,
+		},
+		.read		= {
+			.time_enabled = 0x030a59d664fca7deULL,
+			.time_running = 0x011b6ae553eb98edULL,
+		},
+	};
+	struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
+	struct perf_sample sample_out;
+	size_t i, sz, bufsz;
+	int err, ret = -1;
+
+	for (i = 0; i < sizeof(user_regs); i++)
+		*(i + (u8 *)user_regs) = i & 0xfe;
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		sample.read.group.nr     = 4;
+		sample.read.group.values = values;
+	} else {
+		sample.read.one.value = 0x08789faeb786aa87ULL;
+		sample.read.one.id    = 99;
+	}
+
+	sz = perf_event__sample_event_size(&sample, sample_type,
+					   sample_regs_user, read_format);
+	bufsz = sz + 4096; /* Add a bit for overrun checking */
+	event = malloc(bufsz);
+	if (!event) {
+		pr_debug("malloc failed\n");
+		return -1;
+	}
+
+	memset(event, 0xff, bufsz);
+	event->header.type = PERF_RECORD_SAMPLE;
+	event->header.misc = 0;
+	event->header.size = sz;
+
+	err = perf_event__synthesize_sample(event, sample_type,
+					    sample_regs_user, read_format,
+					    &sample, false);
+	if (err) {
+		pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+			 "perf_event__synthesize_sample", sample_type, err);
+		goto out_free;
+	}
+
+	/* The data does not contain 0xff so we use that to check the size */
+	for (i = bufsz; i > 0; i--) {
+		if (*(i - 1 + (u8 *)event) != 0xff)
+			break;
+	}
+	if (i != sz) {
+		pr_debug("Event size mismatch: actual %zu vs expected %zu\n",
+			 i, sz);
+		goto out_free;
+	}
+
+	evsel.sample_size = __perf_evsel__sample_size(sample_type);
+
+	err = perf_evsel__parse_sample(&evsel, event, &sample_out);
+	if (err) {
+		pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+			 "perf_evsel__parse_sample", sample_type, err);
+		goto out_free;
+	}
+
+	if (!samples_same(&sample, &sample_out, sample_type,
+			  sample_regs_user, read_format)) {
+		pr_debug("parsing failed for sample_type %#"PRIx64"\n",
+			 sample_type);
+		goto out_free;
+	}
+
+	ret = 0;
+out_free:
+	free(event);
+	if (ret && read_format)
+		pr_debug("read_format %#"PRIx64"\n", read_format);
+	return ret;
+}
+
+/**
+ * test__sample_parsing - test sample parsing.
+ *
+ * This function implements a test that synthesizes a sample event, parses it
+ * and then checks that the parsed sample matches the original sample.  The test
+ * checks sample format bits separately and together.  If the test passes %0 is
+ * returned, otherwise %-1 is returned.
+ */
+int test__sample_parsing(void)
+{
+	const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
+	u64 sample_type;
+	u64 sample_regs_user;
+	size_t i;
+	int err;
+
+	/*
+	 * Fail the test if it has not been updated when new sample format bits
+	 * were added.
+	 */
+	if (PERF_SAMPLE_MAX > PERF_SAMPLE_IDENTIFIER << 1) {
+		pr_debug("sample format has changed - test needs updating\n");
+		return -1;
+	}
+
+	/* Test each sample format bit separately */
+	for (sample_type = 1; sample_type != PERF_SAMPLE_MAX;
+	     sample_type <<= 1) {
+		/* Test read_format variations */
+		if (sample_type == PERF_SAMPLE_READ) {
+			for (i = 0; i < ARRAY_SIZE(rf); i++) {
+				err = do_test(sample_type, 0, rf[i]);
+				if (err)
+					return err;
+			}
+			continue;
+		}
+
+		if (sample_type == PERF_SAMPLE_REGS_USER)
+			sample_regs_user = 0x3fff;
+		else
+			sample_regs_user = 0;
+
+		err = do_test(sample_type, sample_regs_user, 0);
+		if (err)
+			return err;
+	}
+
+	/* Test all sample format bits together */
+	sample_type = PERF_SAMPLE_MAX - 1;
+	sample_regs_user = 0x3fff;
+	for (i = 0; i < ARRAY_SIZE(rf); i++) {
+		err = do_test(sample_type, sample_regs_user, rf[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index c748f53..83d5b71 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -37,5 +37,6 @@ int test__task_exit(void);
 int test__sw_clock_freq(void);
 int test__perf_time_to_tsc(void);
 int test__code_reading(void);
+int test__sample_parsing(void);
 
 #endif /* TESTS_H */
-- 
cgit v0.10.2


From a2f2804a7142b043dafd39f21b86777840e1a78c Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 28 Aug 2013 22:29:51 -0600
Subject: perf evlist: Add tracepoint lookup by name

Will be used by upcoming perf-trace replay option.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377750593-48046-2-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6a629af..5df4ca9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -208,6 +208,21 @@ perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
 	return NULL;
 }
 
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
+				     const char *name)
+{
+	struct perf_evsel *evsel;
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
+		    (strcmp(evsel->name, name) == 0))
+			return evsel;
+	}
+
+	return NULL;
+}
+
 int perf_evlist__add_newtp(struct perf_evlist *evlist,
 			   const char *sys, const char *name, void *handler)
 {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index ab95d72..841a394 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -74,6 +74,10 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
 struct perf_evsel *
 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id);
 
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
+				     const char *name);
+
 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
 			 int cpu, int thread, u64 id);
 
-- 
cgit v0.10.2


From 6810fc915f7a89d8134edb3996dbbf8eac386c26 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 28 Aug 2013 22:29:52 -0600
Subject: perf trace: Add option to analyze events in a file versus live

Allows capture of raw_syscall:* events and analyzed at a later time.

v2: change -i option from inherit to input name for consistency with
    other perf commands

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377750593-48046-3-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index fe19811..daccd2c 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -74,6 +74,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --sched:
 	Accrue thread runtime and provide a summary at the end of the session.
 
+-i
+--input
+	Process events from a given perf data file.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 88387c5..2a6ebe1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4,6 +4,7 @@
 #include "util/debug.h"
 #include "util/evlist.h"
 #include "util/machine.h"
+#include "util/session.h"
 #include "util/thread.h"
 #include "util/parse-options.h"
 #include "util/strlist.h"
@@ -652,6 +653,36 @@ out_dump:
 	return 0;
 }
 
+static int trace__process_sample(struct perf_tool *tool,
+				 union perf_event *event __maybe_unused,
+				 struct perf_sample *sample,
+				 struct perf_evsel *evsel,
+				 struct machine *machine __maybe_unused)
+{
+	struct trace *trace = container_of(tool, struct trace, tool);
+	int err = 0;
+
+	tracepoint_handler handler = evsel->handler.func;
+
+	if (trace->base_time == 0)
+		trace->base_time = sample->time;
+
+	if (handler)
+		handler(trace, evsel, sample);
+
+	return err;
+}
+
+static bool
+perf_session__has_tp(struct perf_session *session, const char *name)
+{
+	struct perf_evsel *evsel;
+
+	evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
+
+	return evsel != NULL;
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
 	struct perf_evlist *evlist = perf_evlist__new();
@@ -791,6 +822,65 @@ out:
 	return err;
 }
 
+static int trace__replay(struct trace *trace)
+{
+	const struct perf_evsel_str_handler handlers[] = {
+		{ "raw_syscalls:sys_enter",  trace__sys_enter, },
+		{ "raw_syscalls:sys_exit",   trace__sys_exit, },
+	};
+
+	struct perf_session *session;
+	int err = -1;
+
+	trace->tool.sample	  = trace__process_sample;
+	trace->tool.mmap	  = perf_event__process_mmap;
+	trace->tool.comm	  = perf_event__process_comm;
+	trace->tool.exit	  = perf_event__process_exit;
+	trace->tool.fork	  = perf_event__process_fork;
+	trace->tool.attr	  = perf_event__process_attr;
+	trace->tool.tracing_data = perf_event__process_tracing_data;
+	trace->tool.build_id	  = perf_event__process_build_id;
+
+	trace->tool.ordered_samples = true;
+	trace->tool.ordering_requires_timestamps = true;
+
+	/* add tid to output */
+	trace->multiple_threads = true;
+
+	if (symbol__init() < 0)
+		return -1;
+
+	session = perf_session__new(input_name, O_RDONLY, 0, false,
+				    &trace->tool);
+	if (session == NULL)
+		return -ENOMEM;
+
+	err = perf_session__set_tracepoints_handlers(session, handlers);
+	if (err)
+		goto out;
+
+	if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
+		pr_err("Data file does not have raw_syscalls:sys_enter events\n");
+		goto out;
+	}
+
+	if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
+		pr_err("Data file does not have raw_syscalls:sys_exit events\n");
+		goto out;
+	}
+
+	setup_pager();
+
+	err = perf_session__process_events(session, &trace->tool);
+	if (err)
+		pr_err("Failed to process events, error %d", err);
+
+out:
+	perf_session__delete(session);
+
+	return err;
+}
+
 static size_t trace__fprintf_threads_header(FILE *fp)
 {
 	size_t printed;
@@ -892,6 +982,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
 		    "list of events to trace"),
 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
+	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
 		    "trace events on existing process id"),
 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
@@ -900,7 +991,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
 		    "list of cpus to monitor"),
-	OPT_BOOLEAN('i', "no-inherit", &trace.opts.no_inherit,
+	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
 		    "child tasks do not inherit counters"),
 	OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages,
 		     "number of mmap data pages"),
@@ -958,7 +1049,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (!argc && perf_target__none(&trace.opts.target))
 		trace.opts.target.system_wide = true;
 
-	err = trace__run(&trace, argc, argv);
+	if (input_name)
+		err = trace__replay(&trace);
+	else
+		err = trace__run(&trace, argc, argv);
 
 	if (trace.sched && !err)
 		trace__fprintf_thread_summary(&trace, trace.output);
-- 
cgit v0.10.2


From bdc896617b4fcaa9c89da9a9c5b72660f6741d46 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 28 Aug 2013 22:29:53 -0600
Subject: perf trace: Honor target pid / tid options when analyzing a file

Allows capture of raw_syscall events for all processes or threads in a
task and then analyzing specific ones.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377750593-48046-4-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 2a6ebe1..845facc 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -8,6 +8,7 @@
 #include "util/thread.h"
 #include "util/parse-options.h"
 #include "util/strlist.h"
+#include "util/intlist.h"
 #include "util/thread_map.h"
 
 #include <libaudit.h>
@@ -259,6 +260,8 @@ struct trace {
 	unsigned long		nr_events;
 	struct strlist		*ev_qualifier;
 	bool			not_ev_qualifier;
+	struct intlist		*tid_list;
+	struct intlist		*pid_list;
 	bool			sched;
 	bool			multiple_threads;
 	double			duration_filter;
@@ -653,6 +656,18 @@ out_dump:
 	return 0;
 }
 
+static bool skip_sample(struct trace *trace, struct perf_sample *sample)
+{
+	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
+	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
+		return false;
+
+	if (trace->pid_list || trace->tid_list)
+		return true;
+
+	return false;
+}
+
 static int trace__process_sample(struct perf_tool *tool,
 				 union perf_event *event __maybe_unused,
 				 struct perf_sample *sample,
@@ -664,6 +679,9 @@ static int trace__process_sample(struct perf_tool *tool,
 
 	tracepoint_handler handler = evsel->handler.func;
 
+	if (skip_sample(trace, sample))
+		return 0;
+
 	if (trace->base_time == 0)
 		trace->base_time = sample->time;
 
@@ -683,6 +701,27 @@ perf_session__has_tp(struct perf_session *session, const char *name)
 	return evsel != NULL;
 }
 
+static int parse_target_str(struct trace *trace)
+{
+	if (trace->opts.target.pid) {
+		trace->pid_list = intlist__new(trace->opts.target.pid);
+		if (trace->pid_list == NULL) {
+			pr_err("Error parsing process id string\n");
+			return -EINVAL;
+		}
+	}
+
+	if (trace->opts.target.tid) {
+		trace->tid_list = intlist__new(trace->opts.target.tid);
+		if (trace->tid_list == NULL) {
+			pr_err("Error parsing thread id string\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
 	struct perf_evlist *evlist = perf_evlist__new();
@@ -869,6 +908,10 @@ static int trace__replay(struct trace *trace)
 		goto out;
 	}
 
+	err = parse_target_str(trace);
+	if (err != 0)
+		goto out;
+
 	setup_pager();
 
 	err = perf_session__process_events(session, &trace->tool);
-- 
cgit v0.10.2


From f2935f3e585226b8203ec3861907e1cb16ad3d6a Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 27 Aug 2013 10:50:40 -0600
Subject: perf trace: Handle missing HUGEPAGE defines

Needed for compile on Fedora 12 which goes back to the 2.6.32 kernel.
Might be needed for RHEL6. I use F12 to compile static binaries for
Wind River Linux 4.3.

Signed-off-by: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/n/tip-nd0d7rbajgm8k6tah3xv34v1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 845facc..69a065e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -71,7 +71,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, unsigned
 	P_MMAP_FLAG(FILE);
 	P_MMAP_FLAG(FIXED);
 	P_MMAP_FLAG(GROWSDOWN);
+#ifdef MAP_HUGETLB
 	P_MMAP_FLAG(HUGETLB);
+#endif
 	P_MMAP_FLAG(LOCKED);
 	P_MMAP_FLAG(NONBLOCK);
 	P_MMAP_FLAG(NORESERVE);
@@ -110,8 +112,12 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, uns
 #endif
 	P_MADV_BHV(MERGEABLE);
 	P_MADV_BHV(UNMERGEABLE);
+#ifdef MADV_HUGEPAGE
 	P_MADV_BHV(HUGEPAGE);
+#endif
+#ifdef MADV_NOHUGEPAGE
 	P_MADV_BHV(NOHUGEPAGE);
+#endif
 #ifdef MADV_DONTDUMP
 	P_MADV_BHV(DONTDUMP);
 #endif
-- 
cgit v0.10.2


From ae23bff1d71f8b416ed740bc458df67355c77c92 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Sat, 24 Aug 2013 16:45:54 +0200
Subject: perf: Prevent race in unthrottling code

The current throttling code triggers WARN below via following
workload (only hit on AMD machine with 48 CPUs):

  # while [ 1 ]; do perf record perf bench sched messaging; done

  WARNING: at arch/x86/kernel/cpu/perf_event.c:1054 x86_pmu_start+0xc6/0x100()
  SNIP
  Call Trace:
   <IRQ>  [<ffffffff815f62d6>] dump_stack+0x19/0x1b
   [<ffffffff8105f531>] warn_slowpath_common+0x61/0x80
   [<ffffffff8105f60a>] warn_slowpath_null+0x1a/0x20
   [<ffffffff810213a6>] x86_pmu_start+0xc6/0x100
   [<ffffffff81129dd2>] perf_adjust_freq_unthr_context.part.75+0x182/0x1a0
   [<ffffffff8112a058>] perf_event_task_tick+0xc8/0xf0
   [<ffffffff81093221>] scheduler_tick+0xd1/0x140
   [<ffffffff81070176>] update_process_times+0x66/0x80
   [<ffffffff810b9565>] tick_sched_handle.isra.15+0x25/0x60
   [<ffffffff810b95e1>] tick_sched_timer+0x41/0x60
   [<ffffffff81087c24>] __run_hrtimer+0x74/0x1d0
   [<ffffffff810b95a0>] ? tick_sched_handle.isra.15+0x60/0x60
   [<ffffffff81088407>] hrtimer_interrupt+0xf7/0x240
   [<ffffffff81606829>] smp_apic_timer_interrupt+0x69/0x9c
   [<ffffffff8160569d>] apic_timer_interrupt+0x6d/0x80
   <EOI>  [<ffffffff81129f74>] ? __perf_event_task_sched_in+0x184/0x1a0
   [<ffffffff814dd937>] ? kfree_skbmem+0x37/0x90
   [<ffffffff815f2c47>] ? __slab_free+0x1ac/0x30f
   [<ffffffff8118143d>] ? kfree+0xfd/0x130
   [<ffffffff81181622>] kmem_cache_free+0x1b2/0x1d0
   [<ffffffff814dd937>] kfree_skbmem+0x37/0x90
   [<ffffffff814e03c4>] consume_skb+0x34/0x80
   [<ffffffff8158b057>] unix_stream_recvmsg+0x4e7/0x820
   [<ffffffff814d5546>] sock_aio_read.part.7+0x116/0x130
   [<ffffffff8112c10c>] ? __perf_sw_event+0x19c/0x1e0
   [<ffffffff814d5581>] sock_aio_read+0x21/0x30
   [<ffffffff8119a5d0>] do_sync_read+0x80/0xb0
   [<ffffffff8119ac85>] vfs_read+0x145/0x170
   [<ffffffff8119b699>] SyS_read+0x49/0xa0
   [<ffffffff810df516>] ? __audit_syscall_exit+0x1f6/0x2a0
   [<ffffffff81604a19>] system_call_fastpath+0x16/0x1b
  ---[ end trace 622b7e226c4a766a ]---

The reason is a race in perf_event_task_tick() throttling code.
The race flow (simplified code):

  - perf_throttled_count is per cpu variable and is
    CPU throttling flag, here starting with 0

  - perf_throttled_seq is sequence/domain for allowed
    count of interrupts within the tick, gets increased
    each tick

    on single CPU (CPU bounded event):

      ... workload

    perf_event_task_tick:
    |
    | T0    inc(perf_throttled_seq)
    | T1    needs_unthr = xchg(perf_throttled_count, 0) == 0
     tick gets interrupted:

            ... event gets throttled under new seq ...

      T2    last NMI comes, event is throttled - inc(perf_throttled_count)

     back to tick:
    | perf_adjust_freq_unthr_context:
    |
    | T3    unthrottling is skiped for event (needs_unthr == 0)
    | T4    event is stop and started via freq adjustment
    |
    tick ends

      ... workload
      ... no sample is hit for event ...

    perf_event_task_tick:
    |
    | T5    needs_unthr = xchg(perf_throttled_count, 0) != 0 (from T2)
    | T6    unthrottling is done on event (interrupts == MAX_INTERRUPTS)
    |       event is already started (from T4) -> WARN

Fixing this by not checking needs_unthr again and thus
check all events for unthrottling.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reported-by: Jan Stancek <jstancek@redhat.com>
Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1377355554-8934-1-git-send-email-jolsa@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index f86599e..258eaaf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2712,7 +2712,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
 
 		hwc = &event->hw;
 
-		if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
+		if (hwc->interrupts == MAX_INTERRUPTS) {
 			hwc->interrupts = 0;
 			perf_log_throttle(event, 1);
 			event->pmu->start(event, 0);
-- 
cgit v0.10.2


From 53ad0447208d3f5897f673ca0b16c776583eedba Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 18 Jul 2013 17:02:23 +0800
Subject: perf/x86: use INTEL_UEVENT_EXTRA_REG to define MSR_OFFCORE_RSP_X

Silvermont (22nm Atom) has two offcore response configuration MSRs,
unlike other Intel CPU, its event code for MSR_OFFCORE_RSP_1 is 0x02b7.

To avoid complicating intel_fixup_er(), use INTEL_UEVENT_EXTRA_REG to
define MSR_OFFCORE_RSP_X. So intel_fixup_er() can find the event code
for OFFCORE_RSP_N by x86_pmu.extra_regs[N].event.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374138144-17278-1-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a45d8d4..0d59a42 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -81,7 +81,8 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
 	EVENT_EXTRA_END
 };
@@ -143,8 +144,9 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
 	EVENT_EXTRA_END
 };
@@ -163,15 +165,17 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 };
 
 static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 	EVENT_EXTRA_END
 };
 
 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
-	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
-	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 	EVENT_EXTRA_END
 };
@@ -1301,11 +1305,11 @@ static void intel_fixup_er(struct perf_event *event, int idx)
 
 	if (idx == EXTRA_REG_RSP_0) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01b7;
+		event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
 	} else if (idx == EXTRA_REG_RSP_1) {
 		event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-		event->hw.config |= 0x01bb;
+		event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
 		event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
 	}
 }
-- 
cgit v0.10.2


From 1fa64180fbf7a33b7a30636a2f174a5cad68d48f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 18 Jul 2013 17:02:24 +0800
Subject: perf/x86: Add Silvermont (22nm Atom) support

Compared to old atom, Silvermont has offcore and has more events
that support PEBS.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1374138144-17278-2-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 97e557b..cc16faa 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -641,6 +641,8 @@ extern struct event_constraint intel_core2_pebs_event_constraints[];
 
 extern struct event_constraint intel_atom_pebs_event_constraints[];
 
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
 
 extern struct event_constraint intel_westmere_pebs_event_constraints[];
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0d59a42..0abf674 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -164,6 +164,15 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+	EVENT_CONSTRAINT_END
+};
+
 static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
@@ -886,6 +895,140 @@ static __initconst const u64 atom_hw_cache_event_ids
  },
 };
 
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1),
+	EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ		SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE		SNB_DMND_RFO
+#define SLM_DMND_PREFETCH	(SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY		(SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS		SNB_RESP_ANY
+#define SLM_LLC_MISS		(SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+		[ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+	},
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+		[ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+	[ C(OP_WRITE) ] = {
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+	[ C(OP_PREFETCH) ] = {
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
 static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
 {
 	/* user explicitly requested branch sampling */
@@ -2180,6 +2323,21 @@ __init int intel_pmu_init(void)
 		pr_cont("Atom events, ");
 		break;
 
+	case 55: /* Atom 22nm "Silvermont" */
+		memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+			sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_atom();
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_slm_extra_regs;
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		pr_cont("Silvermont events, ");
+		break;
+
 	case 37: /* 32 nm nehalem, "Clarkdale" */
 	case 44: /* 32 nm nehalem, "Gulftown" */
 	case 47: /* 32 nm Xeon E7 */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 3065c57..442fcc2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -517,6 +517,32 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+	INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
+	INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
+	INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
+	INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
+	INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
+	INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
+	INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
+	INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
-- 
cgit v0.10.2


From 13d7a2410fa637f450a29ecb515ac318ee40c741 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 21 Aug 2013 12:10:24 +0200
Subject: perf: Add attr->mmap2 attribute to an event

Adds a new PERF_RECORD_MMAP2 record type which is essence
an expanded version of PERF_RECORD_MMAP.

Used to request mmap records with more information about
the mapping, including device major, minor and the inode
number and generation for mappings associated with files
or shared memory segments. Works for code and data
(with attr->mmap_data set).

Existing PERF_RECORD_MMAP record is unmodified by this patch.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Link: http://lkml.kernel.org/r/1377079825-19057-2-git-send-email-eranian@google.com
[ Added Al to the Cc:. Are the ino, maj/min exports of vma->vm_file OK? ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 42cb7b6..a77f43a 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -276,8 +276,9 @@ struct perf_event_attr {
 
 				exclude_callchain_kernel : 1, /* exclude kernel callchains */
 				exclude_callchain_user   : 1, /* exclude user callchains */
+				mmap2          :  1, /* include mmap with inode data     */
 
-				__reserved_1   : 41;
+				__reserved_1   : 40;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -651,6 +652,27 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_SAMPLE			= 9,
 
+	/*
+	 * The MMAP2 records are an augmented version of MMAP, they add
+	 * maj, min, ino numbers to be used to uniquely identify each mapping
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	u32				maj;
+	 *	u32				min;
+	 *	u64				ino;
+	 *	u64				ino_generation;
+	 *	char				filename[];
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_MMAP2			= 10,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 15d0f24..c7ee497 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4776,7 +4776,7 @@ next:
 /*
  * task tracking -- fork/exit
  *
- * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
+ * enabled by: attr.comm | attr.mmap | attr.mmap2 | attr.mmap_data | attr.task
  */
 
 struct perf_task_event {
@@ -4796,8 +4796,9 @@ struct perf_task_event {
 
 static int perf_event_task_match(struct perf_event *event)
 {
-	return event->attr.comm || event->attr.mmap ||
-	       event->attr.mmap_data || event->attr.task;
+	return event->attr.comm  || event->attr.mmap ||
+	       event->attr.mmap2 || event->attr.mmap_data ||
+	       event->attr.task;
 }
 
 static void perf_event_task_output(struct perf_event *event,
@@ -4992,6 +4993,9 @@ struct perf_mmap_event {
 
 	const char		*file_name;
 	int			file_size;
+	int			maj, min;
+	u64			ino;
+	u64			ino_generation;
 
 	struct {
 		struct perf_event_header	header;
@@ -5012,7 +5016,7 @@ static int perf_event_mmap_match(struct perf_event *event,
 	int executable = vma->vm_flags & VM_EXEC;
 
 	return (!executable && event->attr.mmap_data) ||
-	       (executable && event->attr.mmap);
+	       (executable && (event->attr.mmap || event->attr.mmap2));
 }
 
 static void perf_event_mmap_output(struct perf_event *event,
@@ -5027,6 +5031,13 @@ static void perf_event_mmap_output(struct perf_event *event,
 	if (!perf_event_mmap_match(event, data))
 		return;
 
+	if (event->attr.mmap2) {
+		mmap_event->event_id.header.type = PERF_RECORD_MMAP2;
+		mmap_event->event_id.header.size += sizeof(mmap_event->maj);
+		mmap_event->event_id.header.size += sizeof(mmap_event->min);
+		mmap_event->event_id.header.size += sizeof(mmap_event->ino);
+	}
+
 	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
 	ret = perf_output_begin(&handle, event,
 				mmap_event->event_id.header.size);
@@ -5037,6 +5048,14 @@ static void perf_event_mmap_output(struct perf_event *event,
 	mmap_event->event_id.tid = perf_event_tid(event, current);
 
 	perf_output_put(&handle, mmap_event->event_id);
+
+	if (event->attr.mmap2) {
+		perf_output_put(&handle, mmap_event->maj);
+		perf_output_put(&handle, mmap_event->min);
+		perf_output_put(&handle, mmap_event->ino);
+		perf_output_put(&handle, mmap_event->ino_generation);
+	}
+
 	__output_copy(&handle, mmap_event->file_name,
 				   mmap_event->file_size);
 
@@ -5051,6 +5070,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 {
 	struct vm_area_struct *vma = mmap_event->vma;
 	struct file *file = vma->vm_file;
+	int maj = 0, min = 0;
+	u64 ino = 0, gen = 0;
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
@@ -5059,6 +5080,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	memset(tmp, 0, sizeof(tmp));
 
 	if (file) {
+		struct inode *inode;
+		dev_t dev;
 		/*
 		 * d_path works from the end of the rb backwards, so we
 		 * need to add enough zero bytes after the string to handle
@@ -5074,6 +5097,13 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 			name = strncpy(tmp, "//toolong", sizeof(tmp));
 			goto got_name;
 		}
+		inode = file_inode(vma->vm_file);
+		dev = inode->i_sb->s_dev;
+		ino = inode->i_ino;
+		gen = inode->i_generation;
+		maj = MAJOR(dev);
+		min = MINOR(dev);
+
 	} else {
 		if (arch_vma_name(mmap_event->vma)) {
 			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
@@ -5104,6 +5134,10 @@ got_name:
 
 	mmap_event->file_name = name;
 	mmap_event->file_size = size;
+	mmap_event->maj = maj;
+	mmap_event->min = min;
+	mmap_event->ino = ino;
+	mmap_event->ino_generation = gen;
 
 	if (!(vma->vm_flags & VM_EXEC))
 		mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -5140,6 +5174,10 @@ void perf_event_mmap(struct vm_area_struct *vma)
 			.len    = vma->vm_end - vma->vm_start,
 			.pgoff  = (u64)vma->vm_pgoff << PAGE_SHIFT,
 		},
+		/* .maj (attr_mmap2 only) */
+		/* .min (attr_mmap2 only) */
+		/* .ino (attr_mmap2 only) */
+		/* .ino_generation (attr_mmap2 only) */
 	};
 
 	perf_event_mmap_event(&mmap_event);
-- 
cgit v0.10.2


From 274481de6cb69abdb49403ff32abb63c23743413 Mon Sep 17 00:00:00 2001
From: Vince Weaver <vincent.weaver@maine.edu>
Date: Fri, 23 Aug 2013 15:51:03 -0400
Subject: perf: Export struct perf_branch_entry to userspace

If PERF_SAMPLE_BRANCH_STACK is enabled then samples are returned
with the format { u64 from, to, flags } but the flags layout
is not specified.

This field has the type struct perf_branch_entry; move this
definition into include/uapi/linux/perf_event.h so users can
access these fields.

This is similar to the existing inclusion of perf_mem_data_src in
the include/uapi/linux/perf_event.h file.

Signed-off-by: Vince Weaver <vincent.weaver@maine.edu>
Acked-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1308231544420.1889@vincent-weaver-1.um.maine.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c43f6ea..4019d82 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -64,30 +64,6 @@ struct perf_raw_record {
 };
 
 /*
- * single taken branch record layout:
- *
- *      from: source instruction (may not always be a branch insn)
- *        to: branch target
- *   mispred: branch target was mispredicted
- * predicted: branch target was predicted
- *
- * support for mispred, predicted is optional. In case it
- * is not supported mispred = predicted = 0.
- *
- *     in_tx: running in a hardware transaction
- *     abort: aborting a hardware transaction
- */
-struct perf_branch_entry {
-	__u64	from;
-	__u64	to;
-	__u64	mispred:1,  /* target mispredicted */
-		predicted:1,/* target predicted */
-		in_tx:1,    /* in transaction */
-		abort:1,    /* transaction abort */
-		reserved:60;
-};
-
-/*
  * branch stack layout:
  *  nr: number of taken branches stored in entries[]
  *
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index a77f43a..408b8c7 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -757,4 +757,28 @@ union perf_mem_data_src {
 #define PERF_MEM_S(a, s) \
 	(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
 
+/*
+ * single taken branch record layout:
+ *
+ *      from: source instruction (may not always be a branch insn)
+ *        to: branch target
+ *   mispred: branch target was mispredicted
+ * predicted: branch target was predicted
+ *
+ * support for mispred, predicted is optional. In case it
+ * is not supported mispred = predicted = 0.
+ *
+ *     in_tx: running in a hardware transaction
+ *     abort: aborting a hardware transaction
+ */
+struct perf_branch_entry {
+	__u64	from;
+	__u64	to;
+	__u64	mispred:1,  /* target mispredicted */
+		predicted:1,/* target predicted */
+		in_tx:1,    /* in transaction */
+		abort:1,    /* transaction abort */
+		reserved:60;
+};
+
 #endif /* _UAPI_LINUX_PERF_EVENT_H */
-- 
cgit v0.10.2


From 7bfb7e6bdd906f11ee9e751b3fec4f4fc728e818 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 29 Aug 2013 13:59:17 -0700
Subject: perf: Convert kmalloc_node(...GFP_ZERO...) to kzalloc_node()

Use the convenience function instead of __GFP_ZERO.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/f58599ae1a8d7b32d37e9cf283e95fba6452f7f6.1377809875.git.joe@perches.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 4cbe032..beeb7cc 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -347,8 +347,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
 	struct amd_nb *nb;
 	int i;
 
-	nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
-			  cpu_to_node(cpu));
+	nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
 	if (!nb)
 		return NULL;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 442fcc2..63438aa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -224,7 +224,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+	buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -262,7 +262,7 @@ static int alloc_bts_buffer(int cpu)
 	if (!x86_pmu.bts)
 		return 0;
 
-	buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -295,7 +295,7 @@ static int alloc_ds_buffer(int cpu)
 	int node = cpu_to_node(cpu);
 	struct debug_store *ds;
 
-	ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
+	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
 	if (unlikely(!ds))
 		return -ENOMEM;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 34c11ae..fd8011e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2713,7 +2713,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
 
 	size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
 
-	box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
+	box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
 	if (!box)
 		return NULL;
 
-- 
cgit v0.10.2


From 6e7eeb51106d2e9ef7975214747e76d23c5d01af Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 2 Sep 2013 10:39:21 -0300
Subject: perf trace: Allow syscall arg formatters to mask args

The futex syscall ignores some arguments according to the 'operation'
arg, so allow arg formatters to mask those.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-abqrg3oldgfsdnltfrvso9f7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 69a065e..c29692a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -15,14 +15,16 @@
 #include <stdlib.h>
 #include <sys/mman.h>
 
-static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long arg)
+static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
+					 unsigned long arg, u8 *arg_mask __maybe_unused)
 {
 	return scnprintf(bf, size, "%#lx", arg);
 }
 
 #define SCA_HEX syscall_arg__scnprintf_hex
 
-static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, unsigned long arg)
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
+					       unsigned long arg, u8 *arg_mask __maybe_unused)
 {
 	int printed = 0, prot = arg;
 
@@ -52,7 +54,8 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, unsigned l
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
-static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, unsigned long arg)
+static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
+						unsigned long arg, u8 *arg_mask __maybe_unused)
 {
 	int printed = 0, flags = arg;
 
@@ -92,7 +95,8 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, unsigned
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 
-static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, unsigned long arg)
+static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
+						      unsigned long arg, u8 *arg_mask __maybe_unused)
 {
 	int behavior = arg;
 
@@ -136,7 +140,7 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, uns
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
-	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg);
+	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 *arg_mask);
 	bool	   errmsg;
 	bool	   timeout;
 	bool	   hexret;
@@ -198,7 +202,8 @@ struct syscall {
 	const char	    *name;
 	bool		    filtered;
 	struct syscall_fmt  *fmt;
-	size_t		    (**arg_scnprintf)(char *bf, size_t size, unsigned long arg);
+	size_t		    (**arg_scnprintf)(char *bf, size_t size,
+					      unsigned long arg, u8 *args_mask);
 };
 
 static size_t fprintf_duration(unsigned long t, FILE *fp)
@@ -443,17 +448,23 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 
 	if (sc->tp_format != NULL) {
 		struct format_field *field;
+		u8 mask = 0, bit = 1;
+
+		for (field = sc->tp_format->format.fields->next; field;
+		     field = field->next, ++i, bit <<= 1) {
+			if (mask & bit)
+				continue;
 
-		for (field = sc->tp_format->format.fields->next; field; field = field->next) {
 			printed += scnprintf(bf + printed, size - printed,
 					     "%s%s: ", printed ? ", " : "", field->name);
 
-			if (sc->arg_scnprintf && sc->arg_scnprintf[i])
-				printed += sc->arg_scnprintf[i](bf + printed, size - printed, args[i]);
-			else
+			if (sc->arg_scnprintf && sc->arg_scnprintf[i]) {
+				printed += sc->arg_scnprintf[i](bf + printed, size - printed,
+								args[i], &mask);
+			} else {
 				printed += scnprintf(bf + printed, size - printed,
 						     "%ld", args[i]);
-                       ++i;
+			}
 		}
 	} else {
 		while (i < 6) {
-- 
cgit v0.10.2


From f9da0b0c74af25a68b9ac43a2c81f2eea970de5a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 2 Sep 2013 13:46:44 -0300
Subject: perf trace: Add beautifier for futex 'operation' parm

That uses the arg mask mechanism just introduced to suppress ignored
arguments according to the futex operation.

Based on an initial patch from David Ahern that showed the need for some
way to allow args to tell how many further args should be shown.

Initial-patch-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-0k30it46r4hv5eanefbdmj5t@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c29692a..1a6cb74 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -14,6 +14,7 @@
 #include <libaudit.h>
 #include <stdlib.h>
 #include <sys/mman.h>
+#include <linux/futex.h>
 
 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 					 unsigned long arg, u8 *arg_mask __maybe_unused)
@@ -137,6 +138,49 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 
 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 
+static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned long arg, u8 *arg_mask)
+{
+	enum syscall_futex_args {
+		SCF_UADDR   = (1 << 0),
+		SCF_OP	    = (1 << 1),
+		SCF_VAL	    = (1 << 2),
+		SCF_TIMEOUT = (1 << 3),
+		SCF_UADDR2  = (1 << 4),
+		SCF_VAL3    = (1 << 5),
+	};
+	int op = arg;
+	int cmd = op & FUTEX_CMD_MASK;
+	size_t printed = 0;
+
+	switch (cmd) {
+#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
+	P_FUTEX_OP(WAIT);	    *arg_mask |= SCF_VAL3|SCF_UADDR2;		  break;
+	P_FUTEX_OP(WAKE);	    *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(FD);		    *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(REQUEUE);	    *arg_mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
+	P_FUTEX_OP(CMP_REQUEUE);    *arg_mask |= SCF_TIMEOUT;			  break;
+	P_FUTEX_OP(CMP_REQUEUE_PI); *arg_mask |= SCF_TIMEOUT;			  break;
+	P_FUTEX_OP(WAKE_OP);							  break;
+	P_FUTEX_OP(LOCK_PI);	    *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(UNLOCK_PI);	    *arg_mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(TRYLOCK_PI);	    *arg_mask |= SCF_VAL3|SCF_UADDR2;		  break;
+	P_FUTEX_OP(WAIT_BITSET);    *arg_mask |= SCF_UADDR2;			  break;
+	P_FUTEX_OP(WAKE_BITSET);    *arg_mask |= SCF_UADDR2;			  break;
+	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
+	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
+	}
+
+	if (op & FUTEX_PRIVATE_FLAG)
+		printed += scnprintf(bf + printed, size - printed, "|PRIV");
+
+	if (op & FUTEX_CLOCK_REALTIME)
+		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
+
+	return printed;
+}
+
+#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
@@ -153,7 +197,8 @@ static struct syscall_fmt {
 	{ .name	    = "connect",    .errmsg = true, },
 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
-	{ .name	    = "futex",	    .errmsg = true, },
+	{ .name	    = "futex",	    .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
 	{ .name	    = "ioctl",	    .errmsg = true,
 	  .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
-- 
cgit v0.10.2


From fa0097ee690693006ab1aea6c01ad3c851b65c77 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Sat, 31 Aug 2013 21:50:51 +0300
Subject: perf: Add a dummy software event to keep tracking

When an event is disabled the "tracking" events selected by the 'mmap',
'comm' and 'task' bits of struct perf_event_attr, are also disabled.
However, the information those events provide is necessary to resolve
symbols for when the main event is re-enabled.

The "tracking" events can be kept enabled by putting them on another
event, but that requires an event that otherwise does nothing.  A new
software event PERF_COUNT_SW_DUMMY is added for that purpose.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377975053-3811-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 408b8c7..ca1d90b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -109,6 +109,7 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
 	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
 	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
+	PERF_COUNT_SW_DUMMY			= 9,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
-- 
cgit v0.10.2


From d22d1a2a2c224b3b378d873589ced27add7ebde4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Sat, 31 Aug 2013 21:50:52 +0300
Subject: perf tools: Add support for PERF_COUNT_SW_DUMMY

Add support for the new dummy software event PERF_COUNT_SW_DUMMY.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377975053-3811-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index e8745fb..3612183 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -323,6 +323,7 @@ const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
 	"major-faults",
 	"alignment-faults",
 	"emulation-faults",
+	"dummy",
 };
 
 static const char *__perf_evsel__sw_name(u64 config)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9cba923..b2dd077 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -108,6 +108,10 @@ static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
 		.symbol = "emulation-faults",
 		.alias  = "",
 	},
+	[PERF_COUNT_SW_DUMMY] = {
+		.symbol = "dummy",
+		.alias  = "",
+	},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 0790452..91346b7 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -145,6 +145,7 @@ context-switches|cs				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW
 cpu-migrations|migrations			{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
 alignment-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
 emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
+dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
 
 L1-dcache|l1-d|l1d|L1-data		|
 L1-icache|l1-i|l1i|L1-instruction	|
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 381f4fd..71b5412 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -987,6 +987,7 @@ static struct {
 	{ "COUNT_SW_PAGE_FAULTS_MAJ",  PERF_COUNT_SW_PAGE_FAULTS_MAJ },
 	{ "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS },
 	{ "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS },
+	{ "COUNT_SW_DUMMY",            PERF_COUNT_SW_DUMMY },
 
 	{ "SAMPLE_IP",	      PERF_SAMPLE_IP },
 	{ "SAMPLE_TID",	      PERF_SAMPLE_TID },
-- 
cgit v0.10.2


From 395c307089c9f5f0d82c63c11c79227b57bb7ac5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Sat, 31 Aug 2013 21:50:53 +0300
Subject: perf tests: Add 'keep tracking' test

Add a test for the newly added PERF_COUNT_SW_DUMMY event.  The test
checks that tracking events continue when an event is disabled but a
dummy software event is not disabled.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377975053-3811-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index ecebfd0..c5dc1ad 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -465,6 +465,7 @@ endif # NO_LIBELF
 ifndef NO_LIBUNWIND
   LIB_OBJS += $(OUTPUT)util/unwind.o
 endif
+LIB_OBJS += $(OUTPUT)tests/keep-tracking.o
 
 ifndef NO_LIBAUDIT
   BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 8ad9415..8bbeba3 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -108,6 +108,10 @@ static struct test {
 		.func = test__sample_parsing,
 	},
 	{
+		.desc = "Test using a dummy software event to keep tracking",
+		.func = test__keep_tracking,
+	},
+	{
 		.func = NULL,
 	},
 };
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
new file mode 100644
index 0000000..d444ea2
--- /dev/null
+++ b/tools/perf/tests/keep-tracking.c
@@ -0,0 +1,154 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+#define CHECK__(x) {				\
+	while ((x) < 0) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+#define CHECK_NOT_NULL__(x) {			\
+	while ((x) == NULL) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+static int find_comm(struct perf_evlist *evlist, const char *comm)
+{
+	union perf_event *event;
+	int i, found;
+
+	found = 0;
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+			if (event->header.type == PERF_RECORD_COMM &&
+			    (pid_t)event->comm.pid == getpid() &&
+			    (pid_t)event->comm.tid == getpid() &&
+			    strcmp(event->comm.comm, comm) == 0)
+				found += 1;
+		}
+	}
+	return found;
+}
+
+/**
+ * test__keep_tracking - test using a dummy software event to keep tracking.
+ *
+ * This function implements a test that checks that tracking events continue
+ * when an event is disabled but a dummy software event is not disabled.  If the
+ * test passes %0 is returned, otherwise %-1 is returned.
+ */
+int test__keep_tracking(void)
+{
+	struct perf_record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.freq		     = 4000,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+	};
+	struct thread_map *threads = NULL;
+	struct cpu_map *cpus = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel = NULL;
+	int found, err = -1;
+	const char *comm;
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	CHECK_NOT_NULL__(threads);
+
+	cpus = cpu_map__new(NULL);
+	CHECK_NOT_NULL__(cpus);
+
+	evlist = perf_evlist__new();
+	CHECK_NOT_NULL__(evlist);
+
+	perf_evlist__set_maps(evlist, cpus, threads);
+
+	CHECK__(parse_events(evlist, "dummy:u"));
+	CHECK__(parse_events(evlist, "cycles:u"));
+
+	perf_evlist__config(evlist, &opts);
+
+	evsel = perf_evlist__first(evlist);
+
+	evsel->attr.comm = 1;
+	evsel->attr.disabled = 1;
+	evsel->attr.enable_on_exec = 0;
+
+	if (perf_evlist__open(evlist) < 0) {
+		fprintf(stderr, " (not supported)");
+		err = 0;
+		goto out_err;
+	}
+
+	CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false));
+
+	/*
+	 * First, test that a 'comm' event can be found when the event is
+	 * enabled.
+	 */
+
+	perf_evlist__enable(evlist);
+
+	comm = "Test COMM 1";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0));
+
+	perf_evlist__disable(evlist);
+
+	found = find_comm(evlist, comm);
+	if (found != 1) {
+		pr_debug("First time, failed to find tracking event.\n");
+		goto out_err;
+	}
+
+	/*
+	 * Secondly, test that a 'comm' event can be found when the event is
+	 * disabled with the dummy event still enabled.
+	 */
+
+	perf_evlist__enable(evlist);
+
+	evsel = perf_evlist__last(evlist);
+
+	CHECK__(perf_evlist__disable_event(evlist, evsel));
+
+	comm = "Test COMM 2";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0));
+
+	perf_evlist__disable(evlist);
+
+	found = find_comm(evlist, comm);
+	if (found != 1) {
+		pr_debug("Seconf time, failed to find tracking event.\n");
+		goto out_err;
+	}
+
+	err = 0;
+
+out_err:
+	if (evlist) {
+		perf_evlist__disable(evlist);
+		perf_evlist__munmap(evlist);
+		perf_evlist__close(evlist);
+		perf_evlist__delete(evlist);
+	}
+	if (cpus)
+		cpu_map__delete(cpus);
+	if (threads)
+		thread_map__delete(threads);
+
+	return err;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 83d5b71..c048b58 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -38,5 +38,6 @@ int test__sw_clock_freq(void);
 int test__perf_time_to_tsc(void);
 int test__code_reading(void);
 int test__sample_parsing(void);
+int test__keep_tracking(void);
 
 #endif /* TESTS_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 5df4ca9..b8727ae 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -246,7 +246,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
 
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		list_for_each_entry(pos, &evlist->entries, node) {
-			if (!perf_evsel__is_group_leader(pos))
+			if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 				continue;
 			for (thread = 0; thread < nr_threads; thread++)
 				ioctl(FD(pos, cpu, thread),
@@ -264,7 +264,7 @@ void perf_evlist__enable(struct perf_evlist *evlist)
 
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		list_for_each_entry(pos, &evlist->entries, node) {
-			if (!perf_evsel__is_group_leader(pos))
+			if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 				continue;
 			for (thread = 0; thread < nr_threads; thread++)
 				ioctl(FD(pos, cpu, thread),
@@ -273,6 +273,44 @@ void perf_evlist__enable(struct perf_evlist *evlist)
 	}
 }
 
+int perf_evlist__disable_event(struct perf_evlist *evlist,
+			       struct perf_evsel *evsel)
+{
+	int cpu, thread, err;
+
+	if (!evsel->fd)
+		return 0;
+
+	for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+		for (thread = 0; thread < evlist->threads->nr; thread++) {
+			err = ioctl(FD(evsel, cpu, thread),
+				    PERF_EVENT_IOC_DISABLE, 0);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+int perf_evlist__enable_event(struct perf_evlist *evlist,
+			      struct perf_evsel *evsel)
+{
+	int cpu, thread, err;
+
+	if (!evsel->fd)
+		return -EINVAL;
+
+	for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+		for (thread = 0; thread < evlist->threads->nr; thread++) {
+			err = ioctl(FD(evsel, cpu, thread),
+				    PERF_EVENT_IOC_ENABLE, 0);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
 static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
 {
 	int nr_cpus = cpu_map__nr(evlist->cpus);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 841a394..880d713 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -110,6 +110,11 @@ void perf_evlist__munmap(struct perf_evlist *evlist);
 void perf_evlist__disable(struct perf_evlist *evlist);
 void perf_evlist__enable(struct perf_evlist *evlist);
 
+int perf_evlist__disable_event(struct perf_evlist *evlist,
+			       struct perf_evsel *evsel);
+int perf_evlist__enable_event(struct perf_evlist *evlist,
+			      struct perf_evsel *evsel);
+
 void perf_evlist__set_selected(struct perf_evlist *evlist,
 			       struct perf_evsel *evsel);
 
-- 
cgit v0.10.2


From b41f1cec91c37eeea6fdb15effbfa24ea0a5536b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 27 Aug 2013 11:41:53 +0900
Subject: perf list: Skip unsupported events

Some hardware events might not be supported on a system.  Listing those
events seems meaningless and confusing to users.  Let's skip them.

Before:
  $ perf list cache | wc -l
  33

After:
  $ perf list cache | wc -l
  27

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1377571313-14722-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b2dd077..9812531 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -15,6 +15,7 @@
 #define YY_EXTRA_TYPE int
 #include "parse-events-flex.h"
 #include "pmu.h"
+#include "thread_map.h"
 
 #define MAX_NAME_LEN 100
 
@@ -1076,6 +1077,33 @@ int is_valid_tracepoint(const char *event_string)
 	return 0;
 }
 
+static bool is_event_supported(u8 type, unsigned config)
+{
+	bool ret = true;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr = {
+		.type = type,
+		.config = config,
+		.disabled = 1,
+		.exclude_kernel = 1,
+	};
+	struct {
+		struct thread_map map;
+		int threads[1];
+	} tmap = {
+		.map.nr	 = 1,
+		.threads = { 0 },
+	};
+
+	evsel = perf_evsel__new(&attr, 0);
+	if (evsel) {
+		ret = perf_evsel__open(evsel, NULL, &tmap.map) >= 0;
+		perf_evsel__delete(evsel);
+	}
+
+	return ret;
+}
+
 static void __print_events_type(u8 type, struct event_symbol *syms,
 				unsigned max)
 {
@@ -1083,14 +1111,16 @@ static void __print_events_type(u8 type, struct event_symbol *syms,
 	unsigned i;
 
 	for (i = 0; i < max ; i++, syms++) {
+		if (!is_event_supported(type, i))
+			continue;
+
 		if (strlen(syms->alias))
 			snprintf(name, sizeof(name),  "%s OR %s",
 				 syms->symbol, syms->alias);
 		else
 			snprintf(name, sizeof(name), "%s", syms->symbol);
 
-		printf("  %-50s [%s]\n", name,
-			event_type_descriptors[type]);
+		printf("  %-50s [%s]\n", name, event_type_descriptors[type]);
 	}
 }
 
@@ -1119,6 +1149,10 @@ int print_hwcache_events(const char *event_glob, bool name_only)
 				if (event_glob != NULL && !strglobmatch(name, event_glob))
 					continue;
 
+				if (!is_event_supported(PERF_TYPE_HW_CACHE,
+							type | (op << 8) | (i << 16)))
+					continue;
+
 				if (name_only)
 					printf("%s ", name);
 				else
@@ -1148,6 +1182,9 @@ static void print_symbol_events(const char *event_glob, unsigned type,
 		      (syms->alias && strglobmatch(syms->alias, event_glob))))
 			continue;
 
+		if (!is_event_supported(type, i))
+			continue;
+
 		if (name_only) {
 			printf("%s ", syms->symbol);
 			continue;
-- 
cgit v0.10.2


From 0b8c25d949e010274c6697c570b772797ebc7c27 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 28 Jul 2013 09:48:32 -0600
Subject: perf tools: Fix symbol offset computation for some dsos

For some dsos (e.g., libc, libpthread, kernel modules) the symbol offset
is huge. e.g.,

qemu-kvm 17238/17242 [007] 762235.640311:
    ffffffff816288a1 __schedule+0x451 ([kernel.kallsyms])
    ffffffff81629609 schedule+0x29 ([kernel.kallsyms])
    ffffffffa00a6ded kvm_vcpu_block+0xffffffffa00a106d (/lib/modules/3.11.0-rc1+/kernel/arch/x86/kvm/kvm.ko)
    ffffffffa00bae6b kvm_arch_vcpu_ioctl_run+0xffffffffa00a118b (/lib/modules/3.11.0-rc1+/kernel/arch/x86/kvm/kvm.ko)
    ffffffffa00a4d7a kvm_vcpu_ioctl+0xffffffffa00a141a (/lib/modules/3.11.0-rc1+/kernel/arch/x86/kvm/kvm.ko)
    ffffffff811a7bdb do_vfs_ioctl+0x8b ([kernel.kallsyms])
    ffffffff811a80c1 sys_ioctl+0x91 ([kernel.kallsyms])
    ffffffff81633182 system_call+0x72 ([kernel.kallsyms])
        7f882a97af27 __GI___ioctl+0x7f882a891007 (/lib64/libc-2.14.90.so)
           100000002 [unknown] ([unknown])

It seems to be maps with a non-0 start. Taking that into account the
offsets are correct:

qemu-kvm 17238/17242 [007] 762235.640311:
    ffffffff816288a1 __schedule+0x451 ([kernel.kallsyms])
    ffffffff81629609 schedule+0x29 ([kernel.kallsyms])
    ffffffffa00a6ded kvm_vcpu_block+0x6d (/lib/modules/3.11.0-rc1+/kernel/arch/x86/kvm/kvm.ko)
    ffffffffa00bae6b kvm_arch_vcpu_ioctl_run+0x18b (/lib/modules/3.11.0-rc1+/kernel/arch/x86/kvm/kvm.ko)
    ffffffffa00a4d7a kvm_vcpu_ioctl+0x41a (/lib/modules/3.11.0-rc1+/kernel/arch/x86/kvm/kvm.ko)
    ffffffff811a7bdb do_vfs_ioctl+0x8b ([kernel.kallsyms])
    ffffffff811a80c1 sys_ioctl+0x91 ([kernel.kallsyms])
    ffffffff81633182 system_call+0x72 ([kernel.kallsyms])
        7f882a97af27 __GI___ioctl+0x7 (/lib64/libc-2.14.90.so)
           100000002 [unknown] ([unknown])

Signed-off-by: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1375026512-45826-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 07642a7..1fc0c62 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1513,6 +1513,7 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 				printf(" ");
 				if (print_symoffset) {
 					al.addr = node->ip;
+					al.map  = node->map;
 					symbol__fprintf_symname_offs(node->sym, &al, stdout);
 				} else
 					symbol__fprintf_symname(node->sym, stdout);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 77f3b95..7eb0362 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -259,7 +259,10 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym,
 	if (sym && sym->name) {
 		length = fprintf(fp, "%s", sym->name);
 		if (al) {
-			offset = al->addr - sym->start;
+			if (al->addr < sym->end)
+				offset = al->addr - sym->start;
+			else
+				offset = al->addr - al->map->start - sym->start;
 			length += fprintf(fp, "+0x%lx", offset);
 		}
 		return length;
-- 
cgit v0.10.2


From 579e7865b2d431bb7d380a1b4ea0aa8eb8a10fd4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 2 Sep 2013 15:37:32 -0300
Subject: perf trace: Add beautifier for lseek's whence arg

 [root@zoo ~]# perf trace -a -e lseek | head -1
    546.922 ( 0.004 ms): 1184 lseek(fd: 26, offset: 0, whence: CUR) = 2
 [root@zoo ~]#

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-2eiuhwz9jbnhj80q6jaqeji4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1a6cb74..02aaea6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -24,6 +24,31 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 
 #define SCA_HEX syscall_arg__scnprintf_hex
 
+static size_t syscall_arg__scnprintf_whence(char *bf, size_t size,
+					    unsigned long arg, u8 *arg_mask __maybe_unused)
+{
+	int whence = arg;
+
+	switch (whence) {
+#define P_WHENCE(n) case SEEK_##n: return scnprintf(bf, size, #n)
+	P_WHENCE(SET);
+	P_WHENCE(CUR);
+	P_WHENCE(END);
+#ifdef SEEK_DATA
+	P_WHENCE(DATA);
+#endif
+#ifdef SEEK_HOLE
+	P_WHENCE(HOLE);
+#endif
+#undef P_WHENCE
+	default: break;
+	}
+
+	return scnprintf(bf, size, "%#x", whence);
+}
+
+#define SCA_WHENCE syscall_arg__scnprintf_whence
+
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 					       unsigned long arg, u8 *arg_mask __maybe_unused)
 {
@@ -201,6 +226,8 @@ static struct syscall_fmt {
 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
 	{ .name	    = "ioctl",	    .errmsg = true,
 	  .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, },
+	{ .name	    = "lseek",	    .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_WHENCE, /* whence */ }, },
 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat", },
 	{ .name     = "madvise",    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
-- 
cgit v0.10.2


From be65a89a0b558cb5b6863be71861f29b36feb88e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 2 Sep 2013 16:22:31 -0300
Subject: perf trace: Add beautifier for open's flags arg

Suppressing the mode when O_CREAT not present, needs improvements on the
arg masking mechanism to be reused in openat, open_by_handle_at,
mq_open:

[root@zoo ~]# perf trace -a -e open | grep -v 'flags: RDONLY' | head -5
   147.541 ( 0.028 ms): 1188 open(filename: 0x33c17782fb, flags: CLOEXEC   ) = 23
   229.898 ( 0.020 ms): 2071 open(filename: 0x3d93c80, flags: NOATIME      ) = -1 EPERM Operation not permitted

[root@zoo ~]# perf trace -a -e open | grep CREAT
  1406.697 ( 0.024 ms): 616 open(filename: 0x7fffc3a0f910, flags: CREAT|TRUNC|WRONLY, mode: 438 ) = -1 ENOENT No such file or directory
  2032.770 ( 0.804 ms): 4354 open(filename: 0x7f33ac814368, flags: CREAT|EXCL|RDWR, mode: 384   ) = 115
^C[root@zoo ~]#

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-c7vm6klaf995qw1vqdih5t7q@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 02aaea6..5b6b287 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -206,6 +206,62 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned lo
 
 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 
+static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
+					       unsigned long arg, u8 *arg_mask)
+{
+	int printed = 0, flags = arg;
+
+	if (!(flags & O_CREAT))
+		*arg_mask |= 1 << 2; /* Mask the mode parm */
+
+	if (flags == 0)
+		return scnprintf(bf, size, "RDONLY");
+#define	P_FLAG(n) \
+	if (flags & O_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~O_##n; \
+	}
+
+	P_FLAG(APPEND);
+	P_FLAG(ASYNC);
+	P_FLAG(CLOEXEC);
+	P_FLAG(CREAT);
+	P_FLAG(DIRECT);
+	P_FLAG(DIRECTORY);
+	P_FLAG(EXCL);
+	P_FLAG(LARGEFILE);
+	P_FLAG(NOATIME);
+	P_FLAG(NOCTTY);
+#ifdef O_NONBLOCK
+	P_FLAG(NONBLOCK);
+#elif O_NDELAY
+	P_FLAG(NDELAY);
+#endif
+#ifdef O_PATH
+	P_FLAG(PATH);
+#endif
+	P_FLAG(RDWR);
+#ifdef O_DSYNC
+	if ((flags & O_SYNC) == O_SYNC)
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
+	else {
+		P_FLAG(DSYNC);
+	}
+#else
+	P_FLAG(SYNC);
+#endif
+	P_FLAG(TRUNC);
+	P_FLAG(WRONLY);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
@@ -244,7 +300,8 @@ static struct syscall_fmt {
 			     [4] = SCA_HEX, /* new_addr */ }, },
 	{ .name	    = "munmap",	    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
-	{ .name	    = "open",	    .errmsg = true, },
+	{ .name	    = "open",	    .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
-- 
cgit v0.10.2


From 31cd3855c98119cae287b761d8d2e75018714c5d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 2 Sep 2013 16:40:40 -0300
Subject: perf trace: Tell arg formatters the arg index

... so that it can mask args relative to its position, like the 'mode' arg
that may or not be printed according to the 'flags' (O_CREAT) value.

 [root@zoo ~]# perf trace -a -e openat,open_by_handle_at | head -1
    469.754 ( 0.034 ms): 1183 openat(dfd: -100, filename: 0x7fbde40014b0, flags: CLOEXEC|DIRECTORY|NONBLOCK) = 23
 [root@zoo ~]#

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-bgokqpkufd4sio7ixxknf1ux@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5b6b287..b6f0725 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -17,7 +17,9 @@
 #include <linux/futex.h>
 
 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
-					 unsigned long arg, u8 *arg_mask __maybe_unused)
+					 unsigned long arg,
+					 u8 arg_idx __maybe_unused,
+					 u8 *arg_mask __maybe_unused)
 {
 	return scnprintf(bf, size, "%#lx", arg);
 }
@@ -25,7 +27,9 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 #define SCA_HEX syscall_arg__scnprintf_hex
 
 static size_t syscall_arg__scnprintf_whence(char *bf, size_t size,
-					    unsigned long arg, u8 *arg_mask __maybe_unused)
+					    unsigned long arg,
+					    u8 arg_idx __maybe_unused,
+					    u8 *arg_mask __maybe_unused)
 {
 	int whence = arg;
 
@@ -50,7 +54,9 @@ static size_t syscall_arg__scnprintf_whence(char *bf, size_t size,
 #define SCA_WHENCE syscall_arg__scnprintf_whence
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
-					       unsigned long arg, u8 *arg_mask __maybe_unused)
+					       unsigned long arg,
+					       u8 arg_idx __maybe_unused,
+					       u8 *arg_mask __maybe_unused)
 {
 	int printed = 0, prot = arg;
 
@@ -81,7 +87,8 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
-						unsigned long arg, u8 *arg_mask __maybe_unused)
+						unsigned long arg, u8 arg_idx __maybe_unused,
+						u8 *arg_mask __maybe_unused)
 {
 	int printed = 0, flags = arg;
 
@@ -122,7 +129,8 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 
 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
-						      unsigned long arg, u8 *arg_mask __maybe_unused)
+						      unsigned long arg, u8 arg_idx __maybe_unused,
+						      u8 *arg_mask __maybe_unused)
 {
 	int behavior = arg;
 
@@ -163,7 +171,8 @@ static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 
 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 
-static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned long arg, u8 *arg_mask)
+static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned long arg,
+					      u8 arg_idx __maybe_unused, u8 *arg_mask)
 {
 	enum syscall_futex_args {
 		SCF_UADDR   = (1 << 0),
@@ -207,12 +216,13 @@ static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, unsigned lo
 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 
 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
-					       unsigned long arg, u8 *arg_mask)
+					       unsigned long arg,
+					       u8 arg_idx, u8 *arg_mask)
 {
 	int printed = 0, flags = arg;
 
 	if (!(flags & O_CREAT))
-		*arg_mask |= 1 << 2; /* Mask the mode parm */
+		*arg_mask |= 1 << (arg_idx + 1); /* Mask the mode parm */
 
 	if (flags == 0)
 		return scnprintf(bf, size, "RDONLY");
@@ -265,7 +275,7 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
-	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 *arg_mask);
+	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg, u8 arg_idx, u8 *arg_mask);
 	bool	   errmsg;
 	bool	   timeout;
 	bool	   hexret;
@@ -302,6 +312,10 @@ static struct syscall_fmt {
 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "open",	    .errmsg = true,
 	  .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
+	{ .name	    = "open_by_handle_at", .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
+	{ .name	    = "openat",	    .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_OPEN_FLAGS, /* flags */ }, },
 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64", },
@@ -332,7 +346,7 @@ struct syscall {
 	bool		    filtered;
 	struct syscall_fmt  *fmt;
 	size_t		    (**arg_scnprintf)(char *bf, size_t size,
-					      unsigned long arg, u8 *args_mask);
+					      unsigned long arg, u8 arg_idx, u8 *args_mask);
 };
 
 static size_t fprintf_duration(unsigned long t, FILE *fp)
@@ -589,7 +603,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 
 			if (sc->arg_scnprintf && sc->arg_scnprintf[i]) {
 				printed += sc->arg_scnprintf[i](bf + printed, size - printed,
-								args[i], &mask);
+								args[i], i, &mask);
 			} else {
 				printed += scnprintf(bf + printed, size - printed,
 						     "%ld", args[i]);
-- 
cgit v0.10.2