From cf4957f17f2a89984915ea808876d9c82225b862 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 24 Oct 2012 13:37:58 +0200
Subject: perf: Add PERF_EVENT_IOC_ID ioctl to return event ID

The only way to get the event ID is by reading the event fd,
followed by parsing the ID value out of the returned data.

While this is ok for current read format used by perf tool,
it is not ok when we use PERF_FORMAT_GROUP format.

With this format the data are returned for the whole group
and there's no way to find out what ID belongs to our fd
(if we are not group leader event).

Adding a simple ioctl that returns event primary ID for given fd.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-v1bn5cto707jn0bon34afqr1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index efef1d3..62c25a2 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -321,6 +321,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID		_IOR('$', 7, u64 *)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 916cf1f..5200b60 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3568,6 +3568,15 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_EVENT_IOC_PERIOD:
 		return perf_event_period(event, (u64 __user *)arg);
 
+	case PERF_EVENT_IOC_ID:
+	{
+		u64 id = primary_event_id(event);
+
+		if (copy_to_user((void __user *)arg, &id, sizeof(id)))
+			return -EFAULT;
+		return 0;
+	}
+
 	case PERF_EVENT_IOC_SET_OUTPUT:
 	{
 		int ret;
-- 
cgit v0.10.2


From 6f5ab0019fd328b50a8488c9e5193fc1dbd8d6ed Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 15 Oct 2012 20:13:45 +0200
Subject: perf: Do not get values from disabled counters in group format read

It's possible some of the counters in the group could be
disabled when sampling member of the event group is reading
the rest via PERF_SAMPLE_READ sample type processing. Disabled
counters could then produce wrong numbers.

Fixing that by reading only enabled counters for PERF_SAMPLE_READ
sample type processing.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-wwkjb0bbcuslnz0klrmqi26r@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5200b60..e82e700 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4388,7 +4388,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
 
-		if (sub != event)
+		if ((sub != event) &&
+		    (sub->state == PERF_EVENT_STATE_ACTIVE))
 			sub->pmu->read(sub);
 
 		values[n++] = perf_event_count(sub);
-- 
cgit v0.10.2


From e2b5abe0c82b45980b95ead22678861a2013c0df Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 4 Apr 2012 19:32:27 +0200
Subject: perf evlist: Use PERF_EVENT_IOC_ID perf ioctl to read event id

Changing the way we retrieve the event ID. Instead of parsing out
the ID out of the read data, using the PERF_EVENT_IOC_ID ioctl.

Keeping the old way in place to support kernels without
PERF_EVENT_IOC_ID ioctl support.

This will be useful for retrieving the event ID for events
with PERF_FORMAT_GROUP read format set, where it's impossible
to get correct event id out of the read call data.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-psgb4n7kte8e6tfenbe7nj2h@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 42ea4e9..0d3b739 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -302,6 +302,17 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 {
 	u64 read_data[4] = { 0, };
 	int id_idx = 1; /* The first entry is the counter value */
+	u64 id;
+	int ret;
+
+	ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
+	if (!ret)
+		goto add;
+
+	if (errno != ENOTTY)
+		return -1;
+
+	/* Legacy way to get event id.. All hail to old kernels! */
 
 	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
 	    read(fd, &read_data, sizeof(read_data)) == -1)
@@ -312,7 +323,10 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 	if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 		++id_idx;
 
-	perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]);
+	id = read_data[id_idx];
+
+ add:
+	perf_evlist__id_add(evlist, evsel, cpu, thread, id);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 9ede473cc9f655f8a10cfc8ebbf04c48d84db7ee Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 10 Oct 2012 17:38:13 +0200
Subject: perf tools: Add support for parsing PERF_SAMPLE_READ sample type

Adding support to parse out the PERF_SAMPLE_READ sample bits.  The code
contains both single and group format specification.

This code parse out and prepare PERF_SAMPLE_READ data into the
perf_sample struct. It will be used for group leader sampling feature
comming in shortly.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-0tgdoln5rwk3wocshb442cl3@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1ebb8fb..6119a64 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -80,6 +80,23 @@ struct stack_dump {
 	char *data;
 };
 
+struct sample_read_value {
+	u64 value;
+	u64 id;
+};
+
+struct sample_read {
+	u64 time_enabled;
+	u64 time_running;
+	union {
+		struct {
+			u64 nr;
+			struct sample_read_value *values;
+		} group;
+		struct sample_read_value one;
+	};
+};
+
 struct perf_sample {
 	u64 ip;
 	u32 pid, tid;
@@ -97,6 +114,7 @@ struct perf_sample {
 	struct branch_stack *branch_stack;
 	struct regs_dump  user_regs;
 	struct stack_dump user_stack;
+	struct sample_read read;
 };
 
 #define PERF_MEM_DATA_SRC_NONE \
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 0d3b739..df77a44 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -676,6 +676,32 @@ u64 perf_evlist__sample_type(struct perf_evlist *evlist)
 	return first->attr.sample_type;
 }
 
+bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
+	u64 read_format = first->attr.read_format;
+	u64 sample_type = first->attr.sample_type;
+
+	list_for_each_entry_continue(pos, &evlist->entries, node) {
+		if (read_format != pos->attr.read_format)
+			return false;
+	}
+
+	/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
+	if ((sample_type & PERF_SAMPLE_READ) &&
+	    !(read_format & PERF_FORMAT_ID)) {
+		return false;
+	}
+
+	return true;
+}
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist)
+{
+	struct perf_evsel *first = perf_evlist__first(evlist);
+	return first->attr.read_format;
+}
+
 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
 {
 	struct perf_evsel *first = perf_evlist__first(evlist);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 0583d36..c7178b7 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -118,6 +118,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist);
 void __perf_evlist__set_leader(struct list_head *list);
 void perf_evlist__set_leader(struct perf_evlist *evlist);
 
+u64 perf_evlist__read_format(struct perf_evlist *evlist);
 u64 perf_evlist__sample_type(struct perf_evlist *evlist);
 bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
@@ -127,6 +128,7 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even
 
 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist);
 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist);
+bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
 
 void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
 				   struct list_head *list,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8bed0c1..9ab8fff 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1096,8 +1096,34 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	}
 
 	if (type & PERF_SAMPLE_READ) {
-		fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
-		return -1;
+		u64 read_format = evsel->attr.read_format;
+
+		if (read_format & PERF_FORMAT_GROUP)
+			data->read.group.nr = *array;
+		else
+			data->read.one.value = *array;
+
+		array++;
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+			data->read.time_enabled = *array;
+			array++;
+		}
+
+		if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+			data->read.time_running = *array;
+			array++;
+		}
+
+		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+		if (read_format & PERF_FORMAT_GROUP) {
+			data->read.group.values = (struct sample_read_value *) array;
+			array = (void *) array + data->read.group.nr *
+				sizeof(struct sample_read_value);
+		} else {
+			data->read.one.id = *array;
+			array++;
+		}
 	}
 
 	if (type & PERF_SAMPLE_CALLCHAIN) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 272c9cf..f082921 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -71,6 +71,11 @@ static int perf_session__open(struct perf_session *self, bool force)
 		goto out_close;
 	}
 
+	if (!perf_evlist__valid_read_format(self->evlist)) {
+		pr_err("non matching read_format");
+		goto out_close;
+	}
+
 	self->size = input_stat.st_size;
 	return 0;
 
@@ -749,6 +754,36 @@ static void perf_session__print_tstamp(struct perf_session *session,
 		printf("%" PRIu64 " ", sample->time);
 }
 
+static void sample_read__printf(struct perf_sample *sample, u64 read_format)
+{
+	printf("... sample_read:\n");
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		printf("...... time enabled %016" PRIx64 "\n",
+		       sample->read.time_enabled);
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		printf("...... time running %016" PRIx64 "\n",
+		       sample->read.time_running);
+
+	if (read_format & PERF_FORMAT_GROUP) {
+		u64 i;
+
+		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);
+
+		for (i = 0; i < sample->read.group.nr; i++) {
+			struct sample_read_value *value;
+
+			value = &sample->read.group.values[i];
+			printf("..... id %016" PRIx64
+			       ", value %016" PRIx64 "\n",
+			       value->id, value->value);
+		}
+	} else
+		printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
+			sample->read.one.id, sample->read.one.value);
+}
+
 static void dump_event(struct perf_session *session, union perf_event *event,
 		       u64 file_offset, struct perf_sample *sample)
 {
@@ -798,6 +833,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
 
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		sample_read__printf(sample, evsel->attr.read_format);
 }
 
 static struct machine *
-- 
cgit v0.10.2


From c4861afe30aa3b00c95f9389f24cf6ede88416e4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 12 Oct 2012 13:02:21 +0200
Subject: perf evlist: Fix event ID retrieval for group format read case

We need to fail the event ID retrieval in case both following conditions
are true:

  - we are on kernel with no PERF_EVENT_IOC_ID support
  - PERF_FORMAT_GROUP read format is set

The PERF_FORMAT_GROUP read format bit is the killer for retrieving event
ID out of the read syscall, because we have no guarantee of the event
placement within leader kernel sibling list.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-e93pgyj20rqx48qzw10vj4r4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index df77a44..10fcc03 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -314,6 +314,13 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 
 	/* Legacy way to get event id.. All hail to old kernels! */
 
+	/*
+	 * This way does not work with group format read, so bail
+	 * out in that case.
+	 */
+	if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
+		return -1;
+
 	if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
 	    read(fd, &read_data, sizeof(read_data)) == -1)
 		return -1;
-- 
cgit v0.10.2


From 932a35940a3f03613796ab4855ecbb214dbdc0c2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 11 Oct 2012 14:10:35 +0200
Subject: perf evlist: Add perf_evlist__id2sid method to get event ID related
 data

This will be helpful for PERF_FORMAT_GROUP samples where we need to
store ID related period value for each event.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-twmlgsbyim97p7cyohjwb1df@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 10fcc03..da2dd92 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -337,21 +337,32 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 	return 0;
 }
 
-struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
 {
 	struct hlist_head *head;
 	struct perf_sample_id *sid;
 	int hash;
 
-	if (evlist->nr_entries == 1)
-		return perf_evlist__first(evlist);
-
 	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
 	head = &evlist->heads[hash];
 
 	hlist_for_each_entry(sid, head, node)
 		if (sid->id == id)
-			return sid->evsel;
+			return sid;
+
+	return NULL;
+}
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+{
+	struct perf_sample_id *sid;
+
+	if (evlist->nr_entries == 1)
+		return perf_evlist__first(evlist);
+
+	sid = perf_evlist__id2sid(evlist, id);
+	if (sid)
+		return sid->evsel;
 
 	if (!perf_evlist__sample_id_all(evlist))
 		return perf_evlist__first(evlist);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c7178b7..327abab 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -78,6 +78,8 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
 
 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
 
+struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
+
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
-- 
cgit v0.10.2


From e4caec0d1af3d608d52e6b92d09fb862d7691d4b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 10 Oct 2012 18:52:24 +0200
Subject: perf evsel: Add PERF_SAMPLE_READ sample related processing

For sample with sample type PERF_SAMPLE_READ the period value is stored
in the 'struct sample_read'.

Moreover if the read format has PERF_FORMAT_GROUP, the 'struct
sample_read' contains period values for all events in the group (for
which the sample's event is a leader).

We deliver separated samples for all the values contained within the
'struct sample_read'.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-6mdm5xkrm6kypouh1c33cyys@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 3f156cc..6a2cf26 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -38,6 +38,9 @@ struct perf_sample_id {
 	struct hlist_node 	node;
 	u64		 	id;
 	struct perf_evsel	*evsel;
+
+	/* Holds total ID period value for PERF_SAMPLE_READ processing. */
+	u64			period;
 };
 
 /** struct perf_evsel - event selector
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f082921..9247d9c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -860,6 +860,75 @@ static struct machine *
 	return &session->machines.host;
 }
 
+static int deliver_sample_value(struct perf_session *session,
+				struct perf_tool *tool,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct sample_read_value *v,
+				struct machine *machine)
+{
+	struct perf_sample_id *sid;
+
+	sid = perf_evlist__id2sid(session->evlist, v->id);
+	if (sid) {
+		sample->id     = v->id;
+		sample->period = v->value - sid->period;
+		sid->period    = v->value;
+	}
+
+	if (!sid || sid->evsel == NULL) {
+		++session->stats.nr_unknown_id;
+		return 0;
+	}
+
+	return tool->sample(tool, event, sample, sid->evsel, machine);
+}
+
+static int deliver_sample_group(struct perf_session *session,
+				struct perf_tool *tool,
+				union  perf_event *event,
+				struct perf_sample *sample,
+				struct machine *machine)
+{
+	int ret = -EINVAL;
+	u64 i;
+
+	for (i = 0; i < sample->read.group.nr; i++) {
+		ret = deliver_sample_value(session, tool, event, sample,
+					   &sample->read.group.values[i],
+					   machine);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int
+perf_session__deliver_sample(struct perf_session *session,
+			     struct perf_tool *tool,
+			     union  perf_event *event,
+			     struct perf_sample *sample,
+			     struct perf_evsel *evsel,
+			     struct machine *machine)
+{
+	/* We know evsel != NULL. */
+	u64 sample_type = evsel->attr.sample_type;
+	u64 read_format = evsel->attr.read_format;
+
+	/* Standard sample delievery. */
+	if (!(sample_type & PERF_SAMPLE_READ))
+		return tool->sample(tool, event, sample, evsel, machine);
+
+	/* For PERF_SAMPLE_READ we have either single or group mode. */
+	if (read_format & PERF_FORMAT_GROUP)
+		return deliver_sample_group(session, tool, event, sample,
+					    machine);
+	else
+		return deliver_sample_value(session, tool, event, sample,
+					    &sample->read.one, machine);
+}
+
 static int perf_session_deliver_event(struct perf_session *session,
 				      union perf_event *event,
 				      struct perf_sample *sample,
@@ -902,7 +971,8 @@ static int perf_session_deliver_event(struct perf_session *session,
 			++session->stats.nr_unprocessable_samples;
 			return 0;
 		}
-		return tool->sample(tool, event, sample, evsel, machine);
+		return perf_session__deliver_sample(session, tool, event,
+						    sample, evsel, machine);
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
 	case PERF_RECORD_COMM:
-- 
cgit v0.10.2


From 3c1763115b492afb743daa4e1c8099eca6a70634 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 10 Oct 2012 17:39:03 +0200
Subject: perf tools: Add 'S' event/group modifier to read sample value

Adding 'S' event/group modifier to specify that the event value/s are
read by PERF_SAMPLE_READ sample type processing, instead of the period
value offered by lower layers.

There's additional behaviour change for 'S' modifier being specified on
event group:

Currently all the events within a group makes samples. If user now
specifies 'S' within group modifier, only the leader will trigger
samples. The rest of events in the group will have sampling disabled.

And same as for single events, values of all events within the group
(including leader) are read by PERF_SAMPLE_READ sample type processing.

Following example will create event group with cycles and cache-misses
events, setting the cycles as group leader and the only event to
actually sample. Both cycles and cache-misses event period values are
read by PERF_SAMPLE_READ sample type processing with PERF_FORMAT_GROUP
read format.

Example:

  $ perf record -e '{cycles,cache-misses}:S' ls
  ...
  $ perf report --group --show-total-period --stdio
  ...
  # Samples: 36  of event 'anon group { cycles, cache-misses }'
  # Event count (approx.): 12585593
  #
  #       Overhead          Period  Command      Shared Object                      Symbol
  # ..............  ..............  .......  .................  ..........................
  #
    19.92%   1.20%  2505936     31       ls  [kernel.kallsyms]  [k] mark_held_locks
    13.74%   0.47%  1729327     12       ls  [kernel.kallsyms]  [k] sched_clock_local
    13.64%  23.72%  1716147    612       ls  ld-2.14.90.so      [.] check_match.10805
    13.12%  23.22%  1650778    599       ls  libc-2.14.90.so    [.] _nl_intern_locale_data
    11.24%  29.19%  1414554    753       ls  [kernel.kallsyms]  [k] sched_clock_cpu
     8.50%   0.35%  1070150      9       ls  [kernel.kallsyms]  [k] check_chain_key
  ...

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-iyoinu3axi11mymwnh2b7fxj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 826f3d6..eb03f06 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -29,6 +29,7 @@ counted. The following modifiers exist:
  G - guest counting (in KVM guests)
  H - host counting (not in KVM guests)
  p - precise level
+ S - read sample value (PERF_SAMPLE_READ)
 
 The 'p' modifier can be used for specifying how precise the instruction
 address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9ab8fff..8f10161 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -490,6 +490,7 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
 void perf_evsel__config(struct perf_evsel *evsel,
 			struct perf_record_opts *opts)
 {
+	struct perf_evsel *leader = evsel->leader;
 	struct perf_event_attr *attr = &evsel->attr;
 	int track = !evsel->idx; /* only the first counter needs these */
 
@@ -499,6 +500,25 @@ void perf_evsel__config(struct perf_evsel *evsel,
 	perf_evsel__set_sample_bit(evsel, IP);
 	perf_evsel__set_sample_bit(evsel, TID);
 
+	if (evsel->sample_read) {
+		perf_evsel__set_sample_bit(evsel, READ);
+
+		/*
+		 * We need ID even in case of single event, because
+		 * PERF_SAMPLE_READ process ID specific data.
+		 */
+		perf_evsel__set_sample_id(evsel);
+
+		/*
+		 * Apply group format only if we belong to group
+		 * with more than one members.
+		 */
+		if (leader->nr_members > 1) {
+			attr->read_format |= PERF_FORMAT_GROUP;
+			attr->inherit = 0;
+		}
+	}
+
 	/*
 	 * We default some events to a 1 default interval. But keep
 	 * it a weak assumption overridable by the user.
@@ -514,6 +534,15 @@ void perf_evsel__config(struct perf_evsel *evsel,
 		}
 	}
 
+	/*
+	 * Disable sampling for all group members other
+	 * than leader in case leader 'leads' the sampling.
+	 */
+	if ((leader != evsel) && leader->sample_read) {
+		attr->sample_freq   = 0;
+		attr->sample_period = 0;
+	}
+
 	if (opts->no_samples)
 		attr->sample_freq = 0;
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6a2cf26..5edc625 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -79,6 +79,7 @@ struct perf_evsel {
 	/* parse modifier helper */
 	int			exclude_GH;
 	int			nr_members;
+	int			sample_read;
 	struct perf_evsel	*leader;
 	char			*group_name;
 };
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2c460ed..dba877d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -687,6 +687,7 @@ struct event_modifier {
 	int eG;
 	int precise;
 	int exclude_GH;
+	int sample_read;
 };
 
 static int get_event_modifier(struct event_modifier *mod, char *str,
@@ -698,6 +699,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	int eH = evsel ? evsel->attr.exclude_host : 0;
 	int eG = evsel ? evsel->attr.exclude_guest : 0;
 	int precise = evsel ? evsel->attr.precise_ip : 0;
+	int sample_read = 0;
 
 	int exclude = eu | ek | eh;
 	int exclude_GH = evsel ? evsel->exclude_GH : 0;
@@ -730,6 +732,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 			/* use of precise requires exclude_guest */
 			if (!exclude_GH)
 				eG = 1;
+		} else if (*str == 'S') {
+			sample_read = 1;
 		} else
 			break;
 
@@ -756,6 +760,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	mod->eG = eG;
 	mod->precise = precise;
 	mod->exclude_GH = exclude_GH;
+	mod->sample_read = sample_read;
 	return 0;
 }
 
@@ -768,7 +773,7 @@ static int check_modifier(char *str)
 	char *p = str;
 
 	/* The sizeof includes 0 byte as well. */
-	if (strlen(str) > (sizeof("ukhGHppp") - 1))
+	if (strlen(str) > (sizeof("ukhGHpppS") - 1))
 		return -1;
 
 	while (*p) {
@@ -806,6 +811,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
 		evsel->attr.exclude_host   = mod.eH;
 		evsel->attr.exclude_guest  = mod.eG;
 		evsel->exclude_GH          = mod.exclude_GH;
+		evsel->sample_read         = mod.sample_read;
 	}
 
 	return 0;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index e9d1134..b36115f 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -82,7 +82,7 @@ num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?]*
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?]*
-modifier_event	[ukhpGH]+
+modifier_event	[ukhpGHS]+
 modifier_bp	[rwx]{1,3}
 
 %%
-- 
cgit v0.10.2


From 8404db63461af62025f32f8368861fb33604e62f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 1 Feb 2013 19:33:31 +0100
Subject: perf tests: Add attr record group sampling test

Adding test to validate perf_event_attr data for command:

  'record -e '{cycles,cache-misses}:S'

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-9eppxvhkly6gse5ximudckrp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
new file mode 100644
index 0000000..658f5d6
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -0,0 +1,36 @@
+[config]
+command = record
+args    = -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=343
+read_format=12
+inherit=0
+
+[event-2:base-record]
+fd=2
+group_fd=1
+
+# cache-misses
+type=0
+config=3
+
+# default | PERF_SAMPLE_READ
+sample_type=343
+
+# PERF_FORMAT_ID | PERF_FORMAT_GROUP
+read_format=12
+
+mmap=0
+comm=0
+enable_on_exec=0
+disabled=0
+
+# inherit is disabled for group sampling
+inherit=0
+
+# sampling disabled
+sample_freq=0
+sample_period=0
-- 
cgit v0.10.2


From a9f93f97424c64f8d5d94b653a2133e491498680 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 1 Feb 2013 20:37:11 +0100
Subject: perf tests: Add parse events tests for leader sampling

Adding 2 more tests to the automated parse events suite for following
event config:

  '{cycles,cache-misses,branch-misses}:S'
  '{instructions,branch-misses}:Su'

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-tmcy0ir7i8id2t54qg5ifbio@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 344c844..b46379c 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -520,6 +520,7 @@ static int test__group1(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:upp */
 	evsel = perf_evsel__next(evsel);
@@ -535,6 +536,7 @@ static int test__group1(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
 }
@@ -560,6 +562,7 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cache-references + :u modifier */
 	evsel = perf_evsel__next(evsel);
@@ -574,6 +577,7 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:k */
 	evsel = perf_evsel__next(evsel);
@@ -587,6 +591,7 @@ static int test__group2(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
 }
@@ -615,6 +620,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 		!strcmp(leader->group_name, "group1"));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group1 cycles:kppp */
 	evsel = perf_evsel__next(evsel);
@@ -631,6 +637,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group2 cycles + G modifier */
 	evsel = leader = perf_evsel__next(evsel);
@@ -648,6 +655,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 		!strcmp(leader->group_name, "group2"));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* group2 1:3 + G modifier */
 	evsel = perf_evsel__next(evsel);
@@ -661,6 +669,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:u */
 	evsel = perf_evsel__next(evsel);
@@ -674,6 +683,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
 }
@@ -701,6 +711,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:kp + p */
 	evsel = perf_evsel__next(evsel);
@@ -716,6 +727,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	return 0;
 }
@@ -742,6 +754,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions + G */
 	evsel = perf_evsel__next(evsel);
@@ -756,6 +769,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* cycles:G */
 	evsel = leader = perf_evsel__next(evsel);
@@ -772,6 +786,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
 	TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
 	TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+	TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
 
 	/* instructions:G */
 	evsel = perf_evsel__next(evsel);
@@ -963,6 +978,98 @@ static int test__group_gh4(struct perf_evlist *evlist)
 	return 0;
 }
 
+static int test__leader_sample1(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+	/* cycles - sampling group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* cache-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* branch-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	return 0;
+}
+
+static int test__leader_sample2(struct perf_evlist *evlist __maybe_unused)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+
+	/* instructions - sampling group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	/* branch-misses - not sampling */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+	TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+	return 0;
+}
+
 static int count_tracepoints(void)
 {
 	char events_path[PATH_MAX];
@@ -1179,6 +1286,14 @@ static struct evlist_test test__events[] = {
 		.name  = "{cycles:G,cache-misses:H}:uG",
 		.check = test__group_gh4,
 	},
+	[38] = {
+		.name  = "{cycles,cache-misses,branch-misses}:S",
+		.check = test__leader_sample1,
+	},
+	[39] = {
+		.name  = "{instructions,branch-misses}:Su",
+		.check = test__leader_sample2,
+	},
 };
 
 static struct evlist_test test__events_pmu[] = {
-- 
cgit v0.10.2


From 251f426fddd9217ce6e4478653d3ee33df518030 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 28 Jul 2013 09:14:34 -0600
Subject: perf evsel: Actually show symbol offset in stack trace when requested

Symbol offset is one of the fields that can be requested in perf-script.
Currently you do not get that data when requested. e.g.,

perf script -f comm,tid,pid,time,cpu,sym,symoff,ip
...
gcc  6201/6201  [006] 762250.617897:
    ffffffff81090d95 update_curr
    ffffffff810911b8 dequeue_entity
    ffffffff81091825 dequeue_task_fair
    ffffffff81087163 dequeue_task
    ffffffff81087c03 deactivate_task
...

With this patch you get the offset:
...
gcc  6201/6201  [006] 762250.617897:
    ffffffff81090d95 update_curr+0x1c5
    ffffffff810911b8 dequeue_entity+0x28
    ffffffff81091825 dequeue_task_fair+0x45
    ffffffff81087163 dequeue_task+0x93
    ffffffff81087c03 deactivate_task+0x23
...

Signed-off-by: David Ahern <dsahern@gmail.com>
Link: http://lkml.kernel.org/r/1375024474-45726-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 9247d9c..a0ce5a4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1519,8 +1519,13 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 			printf("\t%16" PRIx64, node->ip);
 			if (print_sym) {
 				printf(" ");
-				symbol__fprintf_symname(node->sym, stdout);
+				if (print_symoffset) {
+					al.addr = node->ip;
+					symbol__fprintf_symname_offs(node->sym, &al, stdout);
+				} else
+					symbol__fprintf_symname(node->sym, stdout);
 			}
+
 			if (print_dso) {
 				printf(" (");
 				map__fprintf_dsoname(node->map, stdout);
-- 
cgit v0.10.2


From 602bab1b883090ffd125ed1253fe8ec127c048b1 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 26 Jul 2013 08:27:23 -0600
Subject: perf tools: Fix compile of util/tsc.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Fedora 18, with gcc 4.6.4 compile fails with:

arch/x86/util/tsc.c: In function ‘perf_time_to_tsc’:
arch/x86/util/tsc.c:13:6: error: declaration of ‘time’ shadows a global declaration [-Werror=shadow]
cc1: all warnings being treated as errors
make: *** [/tmp/junk/arch/x86/util/tsc.o] Error 1
make: *** Waiting for unfinished jobs....

Fix by renaming the local variable.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Link: http://lkml.kernel.org/r/1374848843-43127-1-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index f111744..9570c2b 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -10,11 +10,11 @@
 
 u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
 {
-	u64 time, quot, rem;
+	u64 t, quot, rem;
 
-	time = ns - tc->time_zero;
-	quot = time / tc->time_mult;
-	rem  = time % tc->time_mult;
+	t = ns - tc->time_zero;
+	quot = t / tc->time_mult;
+	rem  = t % tc->time_mult;
 	return (quot << tc->time_shift) +
 	       (rem << tc->time_shift) / tc->time_mult;
 }
-- 
cgit v0.10.2


From a14bb860a38ff5b4aa3db20f251fef43e447a7c9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Jul 2013 16:38:23 -0300
Subject: perf trace: Beautify 'connect' result

It is an errno, so print an error string.

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-zt68gijvvoe8gd7kmclo43si@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 0e4b67f..da7ae01 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -18,6 +18,7 @@ static struct syscall_fmt {
 } syscall_fmts[] = {
 	{ .name	    = "access",	    .errmsg = true, },
 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
+	{ .name	    = "connect",    .errmsg = true, },
 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat", },
 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat", },
 	{ .name	    = "futex",	    .errmsg = true, },
-- 
cgit v0.10.2


From 3223565cdf856fa07024e5db7ca24e1b1b38d1db Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Aug 2013 17:00:45 -0300
Subject: perf python: Remove duplicate TID bit from mask

Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thiago Peixoto <thiagolcpeixoto@gmail.com>
Link: http://lkml.kernel.org/n/tip-jurgz6myq125o1ql6lldh6f7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index b11cca5..2225162 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -21,7 +21,7 @@ def main():
 	evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
 			   wakeup_events = 1, watermark = 1,
 			   sample_id_all = 1,
-			   sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
+			   sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU)
 	evsel.open(cpus = cpus, threads = threads);
 	evlist = perf.evlist(cpus, threads)
 	evlist.add(evsel)
-- 
cgit v0.10.2


From 3b47abe1b5f5446ab41a3a139b6075f46f77f21f Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 4 Jun 2013 10:50:29 +0900
Subject: perf util: Add parse_nsec_time() function

The parse_nsec_time() function is for parsing a string of time into
64-bit nsec value.  It's a preparation of time filtering in some of perf
commands.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: David Ahern <dsahern@gmail.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1370310629-9642-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 9a06584..6d17b18 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -328,3 +328,36 @@ void put_tracing_file(char *file)
 {
 	free(file);
 }
+
+int parse_nsec_time(const char *str, u64 *ptime)
+{
+	u64 time_sec, time_nsec;
+	char *end;
+
+	time_sec = strtoul(str, &end, 10);
+	if (*end != '.' && *end != '\0')
+		return -1;
+
+	if (*end == '.') {
+		int i;
+		char nsec_buf[10];
+
+		if (strlen(++end) > 9)
+			return -1;
+
+		strncpy(nsec_buf, end, 9);
+		nsec_buf[9] = '\0';
+
+		/* make it nsec precision */
+		for (i = strlen(nsec_buf); i < 9; i++)
+			nsec_buf[i] = '0';
+
+		time_nsec = strtoul(nsec_buf, &end, 10);
+		if (*end != '\0')
+			return -1;
+	} else
+		time_nsec = 0;
+
+	*ptime = time_sec * NSEC_PER_SEC + time_nsec;
+	return 0;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index cc1574e..a535359 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -208,6 +208,8 @@ static inline int has_extension(const char *filename, const char *ext)
 #define NSEC_PER_MSEC	1000000L
 #endif
 
+int parse_nsec_time(const char *str, u64 *ptime);
+
 extern unsigned char sane_ctype[256];
 #define GIT_SPACE		0x01
 #define GIT_DIGIT		0x02
-- 
cgit v0.10.2


From 42ee8c61cb1b7fe7b7ad5071bfb3c609cb0620ca Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Aug 2013 14:05:39 -0600
Subject: perf top: move CONSOLE_CLEAR to header file

For use with kvm-live mode.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375473947-64285-2-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bbf4635..9101f7c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -238,8 +238,6 @@ out_unlock:
 	pthread_mutex_unlock(&notes->lock);
 }
 
-static const char		CONSOLE_CLEAR[] = "[H[2J";
-
 static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 						     struct addr_location *al,
 						     struct perf_sample *sample)
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index df46be9..b554ffc 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -39,6 +39,8 @@ struct perf_top {
 	float		   min_percent;
 };
 
+#define CONSOLE_CLEAR "[H[2J"
+
 size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
 void perf_top__reset_sample_counters(struct perf_top *top);
 #endif /* __PERF_TOP_H */
-- 
cgit v0.10.2


From ffe4f3c0d109dc53e1b3448ac457052107f34a84 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Aug 2013 14:05:40 -0600
Subject: perf stats: Add max and min stats

Need an initialization function to set min to -1 to
differentiate from an actual min of 0.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375473947-64285-3-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 7c59c28..6506b3d 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -10,6 +10,12 @@ void update_stats(struct stats *stats, u64 val)
 	delta = val - stats->mean;
 	stats->mean += delta / stats->n;
 	stats->M2 += delta*(val - stats->mean);
+
+	if (val > stats->max)
+		stats->max = val;
+
+	if (val < stats->min)
+		stats->min = val;
 }
 
 double avg_stats(struct stats *stats)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 588367c..ae8ccd7 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -6,6 +6,7 @@
 struct stats
 {
 	double n, mean, M2;
+	u64 max, min;
 };
 
 void update_stats(struct stats *stats, u64 val);
@@ -13,4 +14,12 @@ double avg_stats(struct stats *stats);
 double stddev_stats(struct stats *stats);
 double rel_stddev_stats(double stddev, double avg);
 
+static inline void init_stats(struct stats *stats)
+{
+	stats->n    = 0.0;
+	stats->mean = 0.0;
+	stats->M2   = 0.0;
+	stats->min  = (u64) -1;
+	stats->max  = 0;
+}
 #endif
-- 
cgit v0.10.2


From 9c5014022f5d5b09abc8b713da81b3d2db319699 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Aug 2013 14:05:41 -0600
Subject: perf session: Export a few functions for event processing

Allows kvm live mode to reuse the event processing and ordered samples
processing used by the perf-report path.

v2: removed flush_sample_queue as noticed by Jiri

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375473947-64285-4-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index a0ce5a4..b5ebd47 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -250,7 +250,7 @@ static int process_finished_round(struct perf_tool *tool,
 				  union perf_event *event,
 				  struct perf_session *session);
 
-static void perf_tool__fill_defaults(struct perf_tool *tool)
+void perf_tool__fill_defaults(struct perf_tool *tool)
 {
 	if (tool->sample == NULL)
 		tool->sample = process_event_sample_stub;
@@ -495,7 +495,7 @@ static int perf_session_deliver_event(struct perf_session *session,
 				      u64 file_offset);
 
 static int flush_sample_queue(struct perf_session *s,
-			       struct perf_tool *tool)
+		       struct perf_tool *tool)
 {
 	struct ordered_samples *os = &s->ordered_samples;
 	struct list_head *head = &os->samples;
@@ -1049,10 +1049,10 @@ static void event_swap(union perf_event *event, bool sample_id_all)
 		swap(event, sample_id_all);
 }
 
-static int perf_session__process_event(struct perf_session *session,
-				       union perf_event *event,
-				       struct perf_tool *tool,
-				       u64 file_offset)
+int perf_session__process_event(struct perf_session *session,
+				union perf_event *event,
+				struct perf_tool *tool,
+				u64 file_offset)
 {
 	struct perf_sample sample;
 	int ret;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index ad8d3d4..9818fc2 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -56,6 +56,13 @@ int __perf_session__process_events(struct perf_session *self,
 int perf_session__process_events(struct perf_session *self,
 				 struct perf_tool *tool);
 
+int perf_session__process_event(struct perf_session *session,
+				union perf_event *event,
+				struct perf_tool *tool,
+				u64 file_offset);
+
+void perf_tool__fill_defaults(struct perf_tool *tool);
+
 int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel,
 				    struct thread *thread,
 				    struct ip_callchain *chain,
-- 
cgit v0.10.2


From 8fdd84c44fd09d783caa4fb81d2d680b0cf07eeb Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 2 Aug 2013 14:05:42 -0600
Subject: perf kvm: Split out tracepoints from record args

Needed by kvm live command. Make record_args a local while we are
messing with the args.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375473947-64285-5-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 24b78ae..7d14a3a 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -801,16 +801,11 @@ exit:
 	return ret;
 }
 
-static const char * const record_args[] = {
-	"record",
-	"-R",
-	"-f",
-	"-m", "1024",
-	"-c", "1",
-	"-e", "kvm:kvm_entry",
-	"-e", "kvm:kvm_exit",
-	"-e", "kvm:kvm_mmio",
-	"-e", "kvm:kvm_pio",
+static const char * const kvm_events_tp[] = {
+	"kvm:kvm_entry",
+	"kvm:kvm_exit",
+	"kvm:kvm_mmio",
+	"kvm:kvm_pio",
 };
 
 #define STRDUP_FAIL_EXIT(s)		\
@@ -826,8 +821,16 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 {
 	unsigned int rec_argc, i, j;
 	const char **rec_argv;
+	const char * const record_args[] = {
+		"record",
+		"-R",
+		"-f",
+		"-m", "1024",
+		"-c", "1",
+	};
 
-	rec_argc = ARRAY_SIZE(record_args) + argc + 2;
+	rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
+		   2 * ARRAY_SIZE(kvm_events_tp);
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
 	if (rec_argv == NULL)
@@ -836,6 +839,11 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
 		rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
 
+	for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
+		rec_argv[i++] = "-e";
+		rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
+	}
+
 	rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
 	rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
 
-- 
cgit v0.10.2


From 5c6974f49832a55edc9ca744323778947c104ca0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 Aug 2013 17:41:09 -0700
Subject: perf evlist: Remove obsolete dummy execve

Minor cleanup.

The dummy execve to pre-resolve the PLT is obsolete since
"enable_on_execve" was added. The counters are only
running after the execve anyways. So just remove it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375490473-1503-2-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index da2dd92..c7d111f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -837,13 +837,6 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
 		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
 
 		/*
-		 * Do a dummy execvp to get the PLT entry resolved,
-		 * so we avoid the resolver overhead on the real
-		 * execvp call.
-		 */
-		execvp("", (char **)argv);
-
-		/*
 		 * Tell the parent we're ready to go
 		 */
 		close(child_ready_pipe[1]);
-- 
cgit v0.10.2


From e2407bef968d64a28465561832686636d3380bf9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 Aug 2013 17:41:10 -0700
Subject: perf evsel: Add support for enabling counters

Add support for enabling already set up counters by using an
ioctl. I share some code with the filter setup.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375490473-1503-3-git-send-email-andi@firstfloor.org
[ Fixed up 'err' variable indentation ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 8f10161..960394e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -634,15 +634,15 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 	return evsel->fd != NULL ? 0 : -ENOMEM;
 }
 
-int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
-			   const char *filter)
+static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthreads,
+			  int ioc,  void *arg)
 {
 	int cpu, thread;
 
 	for (cpu = 0; cpu < ncpus; cpu++) {
 		for (thread = 0; thread < nthreads; thread++) {
 			int fd = FD(evsel, cpu, thread),
-			    err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter);
+			    err = ioctl(fd, ioc, arg);
 
 			if (err)
 				return err;
@@ -652,6 +652,21 @@ int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 	return 0;
 }
 
+int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
+			   const char *filter)
+{
+	return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
+				     PERF_EVENT_IOC_SET_FILTER,
+				     (void *)filter);
+}
+
+int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+	return perf_evsel__run_ioctl(evsel, ncpus, nthreads,
+				     PERF_EVENT_IOC_ENABLE,
+				     0);
+}
+
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
 {
 	evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 5edc625..532a5f9 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -146,6 +146,7 @@ void perf_evsel__set_sample_id(struct perf_evsel *evsel);
 
 int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			   const char *filter);
+int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
 			     struct cpu_map *cpus);
-- 
cgit v0.10.2


From 411916880ff4061ac0491a154f10af4d49a0c61a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 Aug 2013 17:41:11 -0700
Subject: perf stat: Add support for --initial-delay option

When measuring workloads the startup phase -- doing page faults, dynamic
linking, opening files -- is often very different from the rest of the
workload.  Especially with smaller kernels and using counter
multiplexing this can give significant measurement errors.

Multiplexing assumes that the workload is mostly the same over longer
periods. But at startup there is typically some spike of activity which
is relatively short.  If many groups are multiplexing the one group
seeing the spike, and which is then scaled up over the time to run all
groups, may see a significant error.

Also in general it's often not useful to measure the startup, because it
is so different from the rest.

One way around this is to use interval mode and discard the first
sample, but this can be awkward because interval mode doesn't support
intervals of less than 100ms, and also a useful interval is not
necessarily the same as a useful startup delay.

This patch adds a new --initial-delay / -D option to skip measuring for
the startup phase. The time can be specified in ms

Here's a simple example:

perf stat -e page-faults bash -c 'for i in $(seq 100000) ; do true ; done'
...
             3,721 page-faults
...

If we just wait 20 ms the number of page faults is 1/3 less:

perf stat -D 20 -e page-faults bash -c 'for i in $(seq 100000) ; do true ; done'
...
             2,823 page-faults
...

So we filtered out most of the startup noise from bash.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375490473-1503-4-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..73c9759 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical processor.
 
+-D msecs::
+--initial-delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..2e637e4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -100,6 +100,7 @@ static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
 static unsigned int		interval			= 0;
+static unsigned int		initial_delay			= 0;
 static bool			forever				= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
@@ -254,7 +255,8 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
 	if (!perf_target__has_task(&target) &&
 	    perf_evsel__is_group_leader(evsel)) {
 		attr->disabled = 1;
-		attr->enable_on_exec = 1;
+		if (!initial_delay)
+			attr->enable_on_exec = 1;
 	}
 
 	return perf_evsel__open_per_thread(evsel, evsel_list->threads);
@@ -416,6 +418,20 @@ static void print_interval(void)
 	}
 }
 
+static void handle_initial_delay(void)
+{
+	struct perf_evsel *counter;
+
+	if (initial_delay) {
+		const int ncpus = cpu_map__nr(evsel_list->cpus),
+			nthreads = thread_map__nr(evsel_list->threads);
+
+		usleep(initial_delay * 1000);
+		list_for_each_entry(counter, &evsel_list->entries, node)
+			perf_evsel__enable(counter, ncpus, nthreads);
+	}
+}
+
 static int __run_perf_stat(int argc, const char **argv)
 {
 	char msg[512];
@@ -486,6 +502,7 @@ static int __run_perf_stat(int argc, const char **argv)
 
 	if (forks) {
 		perf_evlist__start_workload(evsel_list);
+		handle_initial_delay();
 
 		if (interval) {
 			while (!waitpid(child_pid, &status, WNOHANG)) {
@@ -497,6 +514,7 @@ static int __run_perf_stat(int argc, const char **argv)
 		if (WIFSIGNALED(status))
 			psignal(WTERMSIG(status), argv[0]);
 	} else {
+		handle_initial_delay();
 		while (!done) {
 			nanosleep(&ts, NULL);
 			if (interval)
@@ -1419,6 +1437,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "aggregate counts per processor socket", AGGR_SOCKET),
 	OPT_SET_UINT(0, "per-core", &aggr_mode,
 		     "aggregate counts per physical processor core", AGGR_CORE),
+	OPT_UINTEGER('D', "delay", &initial_delay,
+		     "ms to wait before starting measurement after program start"),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
-- 
cgit v0.10.2


From 2bbf03f16a634f675c49c473b2b6528571990aea Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 2 Aug 2013 17:41:12 -0700
Subject: perf stat: Flush output after each line in interval mode

When interval mode is outputting to a pipe, each measurement should be
flushed individually, so that the reader sees it timely.

With a terminal each line is automatically flushed by stdio, but that is
disabled with non terminal output.

Simply fflush output after each time interval

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375490473-1503-5-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2e637e4..f686d5f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -416,6 +416,8 @@ static void print_interval(void)
 		list_for_each_entry(counter, &evsel_list->entries, node)
 			print_counter_aggr(counter, prefix);
 	}
+
+	fflush(output);
 }
 
 static void handle_initial_delay(void)
-- 
cgit v0.10.2


From 3445432b7b24665cf4693fc4794c62d4d768a978 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:49 +0300
Subject: perf symbols: avoid SyS kernel syscall aliases

When removing duplicate symbols, prefer to remove syscall aliases
starting with SyS or compat_SyS.

A side-effect of that is that it results in slightly improved results
for the "vmlinux symtab matches kallsyms" test.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 02718e7..ea62ecd 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -87,6 +87,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
 {
 	s64 a;
 	s64 b;
+	size_t na, nb;
 
 	/* Prefer a symbol with non zero length */
 	a = syma->end - syma->start;
@@ -120,11 +121,21 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
 	else if (a > b)
 		return SYMBOL_B;
 
-	/* If all else fails, choose the symbol with the longest name */
-	if (strlen(syma->name) >= strlen(symb->name))
+	/* Choose the symbol with the longest name */
+	na = strlen(syma->name);
+	nb = strlen(symb->name);
+	if (na > nb)
 		return SYMBOL_A;
-	else
+	else if (na < nb)
+		return SYMBOL_B;
+
+	/* Avoid "SyS" kernel syscall aliases */
+	if (na >= 3 && !strncmp(syma->name, "SyS", 3))
 		return SYMBOL_B;
+	if (na >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+		return SYMBOL_B;
+
+	return SYMBOL_A;
 }
 
 void symbols__fixup_duplicate(struct rb_root *symbols)
-- 
cgit v0.10.2


From b55ae0a976f2927ea8f15a85c43bbe6d25a68a41 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:45 +0300
Subject: perf tests: Add test for reading object code

Using the information in mmap events, perf tools can read object code
associated with sampled addresses.  A test is added that compares bytes
read by perf with the same bytes read using objdump.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index bfd12d0..e0d3d9f 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -392,6 +392,7 @@ LIB_OBJS += $(OUTPUT)tests/sw-clock.o
 ifeq ($(ARCH),x86)
 LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
 endif
+LIB_OBJS += $(OUTPUT)tests/code-reading.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b7b4049..f5af192 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -100,6 +100,10 @@ static struct test {
 	},
 #endif
 	{
+		.desc = "Test object code reading",
+		.func = test__code_reading,
+	},
+	{
 		.func = NULL,
 	},
 };
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
new file mode 100644
index 0000000..28bee62
--- /dev/null
+++ b/tools/perf/tests/code-reading.c
@@ -0,0 +1,509 @@
+#include <sys/types.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "machine.h"
+#include "event.h"
+#include "thread.h"
+
+#include "tests.h"
+
+#define BUFSZ	1024
+#define READLEN	128
+
+static unsigned int hex(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	return c - 'A' + 10;
+}
+
+static void read_objdump_line(const char *line, size_t line_len, void **buf,
+			      size_t *len)
+{
+	const char *p;
+	size_t i;
+
+	/* Skip to a colon */
+	p = strchr(line, ':');
+	if (!p)
+		return;
+	i = p + 1 - line;
+
+	/* Read bytes */
+	while (*len) {
+		char c1, c2;
+
+		/* Skip spaces */
+		for (; i < line_len; i++) {
+			if (!isspace(line[i]))
+				break;
+		}
+		/* Get 2 hex digits */
+		if (i >= line_len || !isxdigit(line[i]))
+			break;
+		c1 = line[i++];
+		if (i >= line_len || !isxdigit(line[i]))
+			break;
+		c2 = line[i++];
+		/* Followed by a space */
+		if (i < line_len && line[i] && !isspace(line[i]))
+			break;
+		/* Store byte */
+		*(unsigned char *)*buf = (hex(c1) << 4) | hex(c2);
+		*buf += 1;
+		*len -= 1;
+	}
+}
+
+static int read_objdump_output(FILE *f, void **buf, size_t *len)
+{
+	char *line = NULL;
+	size_t line_len;
+	ssize_t ret;
+	int err = 0;
+
+	while (1) {
+		ret = getline(&line, &line_len, f);
+		if (feof(f))
+			break;
+		if (ret < 0) {
+			pr_debug("getline failed\n");
+			err = -1;
+			break;
+		}
+		read_objdump_line(line, ret, buf, len);
+	}
+
+	free(line);
+
+	return err;
+}
+
+static int read_via_objdump(const char *filename, u64 addr, void *buf,
+			    size_t len)
+{
+	char cmd[PATH_MAX * 2];
+	const char *fmt;
+	FILE *f;
+	int ret;
+
+	fmt = "%s -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s";
+	ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len,
+		       filename);
+	if (ret <= 0 || (size_t)ret >= sizeof(cmd))
+		return -1;
+
+	pr_debug("Objdump command is: %s\n", cmd);
+
+	f = popen(cmd, "r");
+	if (!f) {
+		pr_debug("popen failed\n");
+		return -1;
+	}
+
+	ret = read_objdump_output(f, &buf, &len);
+	if (len) {
+		pr_debug("objdump read too few bytes\n");
+		if (!ret)
+			ret = len;
+	}
+
+	pclose(f);
+
+	return ret;
+}
+
+static int read_object_code(u64 addr, size_t len, u8 cpumode,
+			    struct thread *thread, struct machine *machine)
+{
+	struct addr_location al;
+	unsigned char buf1[BUFSZ];
+	unsigned char buf2[BUFSZ];
+	size_t ret_len;
+	u64 objdump_addr;
+	int ret;
+
+	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
+
+	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, addr,
+			      &al);
+	if (!al.map || !al.map->dso) {
+		pr_debug("thread__find_addr_map failed\n");
+		return -1;
+	}
+
+	pr_debug("File is: %s\n", al.map->dso->long_name);
+
+	if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+		pr_debug("Unexpected kernel address - skipping\n");
+		return 0;
+	}
+
+	pr_debug("On file address is: %#"PRIx64"\n", al.addr);
+
+	if (len > BUFSZ)
+		len = BUFSZ;
+
+	/* Do not go off the map */
+	if (addr + len > al.map->end)
+		len = al.map->end - addr;
+
+	/* Read the object code using perf */
+	ret_len = dso__data_read_offset(al.map->dso, machine, al.addr, buf1,
+					len);
+	if (ret_len != len) {
+		pr_debug("dso__data_read_offset failed\n");
+		return -1;
+	}
+
+	/*
+	 * Converting addresses for use by objdump requires more information.
+	 * map__load() does that.  See map__rip_2objdump() for details.
+	 */
+	if (map__load(al.map, NULL))
+		return -1;
+
+	/* Read the object code using objdump */
+	objdump_addr = map__rip_2objdump(al.map, al.addr);
+	ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len);
+	if (ret > 0) {
+		/*
+		 * The kernel maps are inaccurate - assume objdump is right in
+		 * that case.
+		 */
+		if (cpumode == PERF_RECORD_MISC_KERNEL ||
+		    cpumode == PERF_RECORD_MISC_GUEST_KERNEL) {
+			len -= ret;
+			if (len)
+				pr_debug("Reducing len to %zu\n", len);
+			else
+				return -1;
+		}
+	}
+	if (ret < 0) {
+		pr_debug("read_via_objdump failed\n");
+		return -1;
+	}
+
+	/* The results should be identical */
+	if (memcmp(buf1, buf2, len)) {
+		pr_debug("Bytes read differ from those read by objdump\n");
+		return -1;
+	}
+	pr_debug("Bytes read match those read by objdump\n");
+
+	return 0;
+}
+
+static int process_sample_event(struct machine *machine,
+				struct perf_evlist *evlist,
+				union perf_event *event)
+{
+	struct perf_sample sample;
+	struct thread *thread;
+	u8 cpumode;
+
+	if (perf_evlist__parse_sample(evlist, event, &sample)) {
+		pr_debug("perf_evlist__parse_sample failed\n");
+		return -1;
+	}
+
+	thread = machine__findnew_thread(machine, sample.pid);
+	if (!thread) {
+		pr_debug("machine__findnew_thread failed\n");
+		return -1;
+	}
+
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+	return read_object_code(sample.ip, READLEN, cpumode, thread, machine);
+}
+
+static int process_event(struct machine *machine, struct perf_evlist *evlist,
+			 union perf_event *event)
+{
+	if (event->header.type == PERF_RECORD_SAMPLE)
+		return process_sample_event(machine, evlist, event);
+
+	if (event->header.type < PERF_RECORD_MAX)
+		return machine__process_event(machine, event);
+
+	return 0;
+}
+
+static int process_events(struct machine *machine, struct perf_evlist *evlist)
+{
+	union perf_event *event;
+	int i, ret;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+			ret = process_event(machine, evlist, event);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+static int comp(const void *a, const void *b)
+{
+	return *(int *)a - *(int *)b;
+}
+
+static void do_sort_something(void)
+{
+	size_t sz = 40960;
+	int buf[sz], i;
+
+	for (i = 0; i < (int)sz; i++)
+		buf[i] = sz - i - 1;
+
+	qsort(buf, sz, sizeof(int), comp);
+
+	for (i = 0; i < (int)sz; i++) {
+		if (buf[i] != i) {
+			pr_debug("qsort failed\n");
+			break;
+		}
+	}
+}
+
+static void sort_something(void)
+{
+	int i;
+
+	for (i = 0; i < 10; i++)
+		do_sort_something();
+}
+
+static void syscall_something(void)
+{
+	int pipefd[2];
+	int i;
+
+	for (i = 0; i < 1000; i++) {
+		if (pipe(pipefd) < 0) {
+			pr_debug("pipe failed\n");
+			break;
+		}
+		close(pipefd[1]);
+		close(pipefd[0]);
+	}
+}
+
+static void fs_something(void)
+{
+	const char *test_file_name = "temp-perf-code-reading-test-file--";
+	FILE *f;
+	int i;
+
+	for (i = 0; i < 1000; i++) {
+		f = fopen(test_file_name, "w+");
+		if (f) {
+			fclose(f);
+			unlink(test_file_name);
+		}
+	}
+}
+
+static void do_something(void)
+{
+	fs_something();
+
+	sort_something();
+
+	syscall_something();
+}
+
+enum {
+	TEST_CODE_READING_OK,
+	TEST_CODE_READING_NO_VMLINUX,
+	TEST_CODE_READING_NO_ACCESS,
+};
+
+static int do_test_code_reading(void)
+{
+	struct machines machines;
+	struct machine *machine;
+	struct thread *thread;
+	struct perf_record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.freq		     = 4000,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+	};
+	struct thread_map *threads = NULL;
+	struct cpu_map *cpus = NULL;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel = NULL;
+	int err = -1, ret;
+	pid_t pid;
+	struct map *map;
+	bool have_vmlinux, excl_kernel = false;
+
+	pid = getpid();
+
+	machines__init(&machines);
+	machine = &machines.host;
+
+	ret = machine__create_kernel_maps(machine);
+	if (ret < 0) {
+		pr_debug("machine__create_kernel_maps failed\n");
+		goto out_err;
+	}
+
+	/* Load kernel map */
+	map = machine->vmlinux_maps[MAP__FUNCTION];
+	ret = map__load(map, NULL);
+	if (ret < 0) {
+		pr_debug("map__load failed\n");
+		goto out_err;
+	}
+	have_vmlinux = map->dso->symtab_type == DSO_BINARY_TYPE__VMLINUX;
+	/* No point getting kernel events if there is no vmlinux */
+	if (!have_vmlinux)
+		excl_kernel = true;
+
+	threads = thread_map__new_by_tid(pid);
+	if (!threads) {
+		pr_debug("thread_map__new_by_tid failed\n");
+		goto out_err;
+	}
+
+	ret = perf_event__synthesize_thread_map(NULL, threads,
+						perf_event__process, machine);
+	if (ret < 0) {
+		pr_debug("perf_event__synthesize_thread_map failed\n");
+		goto out_err;
+	}
+
+	thread = machine__findnew_thread(machine, pid);
+	if (!thread) {
+		pr_debug("machine__findnew_thread failed\n");
+		goto out_err;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (!cpus) {
+		pr_debug("cpu_map__new failed\n");
+		goto out_err;
+	}
+
+	while (1) {
+		const char *str;
+
+		evlist = perf_evlist__new();
+		if (!evlist) {
+			pr_debug("perf_evlist__new failed\n");
+			goto out_err;
+		}
+
+		perf_evlist__set_maps(evlist, cpus, threads);
+
+		if (excl_kernel)
+			str = "cycles:u";
+		else
+			str = "cycles";
+		pr_debug("Parsing event '%s'\n", str);
+		ret = parse_events(evlist, str);
+		if (ret < 0) {
+			pr_debug("parse_events failed\n");
+			goto out_err;
+		}
+
+		perf_evlist__config(evlist, &opts);
+
+		evsel = perf_evlist__first(evlist);
+
+		evsel->attr.comm = 1;
+		evsel->attr.disabled = 1;
+		evsel->attr.enable_on_exec = 0;
+
+		ret = perf_evlist__open(evlist);
+		if (ret < 0) {
+			if (!excl_kernel) {
+				excl_kernel = true;
+				perf_evlist__delete(evlist);
+				evlist = NULL;
+				continue;
+			}
+			pr_debug("perf_evlist__open failed\n");
+			goto out_err;
+		}
+		break;
+	}
+
+	ret = perf_evlist__mmap(evlist, UINT_MAX, false);
+	if (ret < 0) {
+		pr_debug("perf_evlist__mmap failed\n");
+		goto out_err;
+	}
+
+	perf_evlist__enable(evlist);
+
+	do_something();
+
+	perf_evlist__disable(evlist);
+
+	ret = process_events(machine, evlist);
+	if (ret < 0)
+		goto out_err;
+
+	if (!have_vmlinux)
+		err = TEST_CODE_READING_NO_VMLINUX;
+	else if (excl_kernel)
+		err = TEST_CODE_READING_NO_ACCESS;
+	else
+		err = TEST_CODE_READING_OK;
+out_err:
+	if (evlist) {
+		perf_evlist__munmap(evlist);
+		perf_evlist__close(evlist);
+		perf_evlist__delete(evlist);
+	}
+	if (cpus)
+		cpu_map__delete(cpus);
+	if (threads)
+		thread_map__delete(threads);
+	machines__destroy_kernel_maps(&machines);
+	machine__delete_threads(machine);
+	machines__exit(&machines);
+
+	return err;
+}
+
+int test__code_reading(void)
+{
+	int ret;
+
+	ret = do_test_code_reading();
+
+	switch (ret) {
+	case TEST_CODE_READING_OK:
+		return 0;
+	case TEST_CODE_READING_NO_VMLINUX:
+		fprintf(stderr, " (no vmlinux)");
+		return 0;
+	case TEST_CODE_READING_NO_ACCESS:
+		fprintf(stderr, " (no access)");
+		return 0;
+	default:
+		return -1;
+	};
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index d22202a..c748f53 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -36,5 +36,6 @@ int test__bp_signal_overflow(void);
 int test__task_exit(void);
 int test__sw_clock_freq(void);
 int test__perf_time_to_tsc(void);
+int test__code_reading(void);
 
 #endif /* TESTS_H */
-- 
cgit v0.10.2


From 5b7ba82a75915e739709d0ace4bb559cb280db09 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:46 +0300
Subject: perf symbols: Load kernel maps before using

In order to use kernel maps to read object code, those maps must be
adjusted to map to the dso file offset.  Because lazy-initialization is
used, that is not done until symbols are loaded.  However the maps are
first used by thread__find_addr_map() before symbols are loaded.  So
this patch changes thread__find_addr() to "load" kernel maps before
using them.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 1d8de2e..f012a98 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -206,7 +206,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 	}
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al);
+			      event->ip.ip, &al, NULL);
 
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 1cad370..cd616ff 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -341,10 +341,10 @@ static void print_sample_addr(union perf_event *event,
 		return;
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      sample->addr, &al);
+			      sample->addr, &al, NULL);
 	if (!al.map)
 		thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE,
-				      sample->addr, &al);
+				      sample->addr, &al, NULL);
 
 	al.cpu = sample->cpu;
 	al.sym = NULL;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 28bee62..0c7b052 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -138,7 +138,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, addr,
-			      &al);
+			      &al, NULL);
 	if (!al.map || !al.map->dso) {
 		pr_debug("thread__find_addr_map failed\n");
 		return -1;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 5295625..3a0f508 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -33,7 +33,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 	}
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, &al);
+			      event->ip.ip, &al, NULL);
 
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9541270..cc7c0c9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -592,9 +592,10 @@ int perf_event__process(struct perf_tool *tool __maybe_unused,
 void thread__find_addr_map(struct thread *self,
 			   struct machine *machine, u8 cpumode,
 			   enum map_type type, u64 addr,
-			   struct addr_location *al)
+			   struct addr_location *al, symbol_filter_t filter)
 {
 	struct map_groups *mg = &self->mg;
+	bool load_map = false;
 
 	al->thread = self;
 	al->addr = addr;
@@ -609,11 +610,13 @@ void thread__find_addr_map(struct thread *self,
 	if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
 		al->level = 'k';
 		mg = &machine->kmaps;
+		load_map = true;
 	} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
 		al->level = '.';
 	} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
 		al->level = 'g';
 		mg = &machine->kmaps;
+		load_map = true;
 	} else {
 		/*
 		 * 'u' means guest os user space.
@@ -654,8 +657,15 @@ try_again:
 			mg = &machine->kmaps;
 			goto try_again;
 		}
-	} else
+	} else {
+		/*
+		 * Kernel maps might be changed when loading symbols so loading
+		 * must be done prior to using kernel maps.
+		 */
+		if (load_map)
+			map__load(al->map, filter);
 		al->addr = al->map->map_ip(al->map, al->addr);
+	}
 }
 
 void thread__find_addr_location(struct thread *thread, struct machine *machine,
@@ -663,7 +673,7 @@ void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				struct addr_location *al,
 				symbol_filter_t filter)
 {
-	thread__find_addr_map(thread, machine, cpumode, type, addr, al);
+	thread__find_addr_map(thread, machine, cpumode, type, addr, al, filter);
 	if (al->map != NULL)
 		al->sym = map__find_symbol(al->map, al->addr, filter);
 	else
@@ -699,7 +709,7 @@ int perf_event__preprocess_sample(const union perf_event *event,
 		machine__create_kernel_maps(machine);
 
 	thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
-			      event->ip.ip, al);
+			      event->ip.ip, al, filter);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 0fe1f9c..f98d1d9 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -41,7 +41,7 @@ static inline struct map *thread__find_map(struct thread *self,
 
 void thread__find_addr_map(struct thread *thread, struct machine *machine,
 			   u8 cpumode, enum map_type type, u64 addr,
-			   struct addr_location *al);
+			   struct addr_location *al, symbol_filter_t filter);
 
 void thread__find_addr_location(struct thread *thread, struct machine *machine,
 				u8 cpumode, enum map_type type, u64 addr,
diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index 958723b..5bbd494 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -272,7 +272,7 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
 	struct addr_location al;
 
 	thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, ip, &al);
+			      MAP__FUNCTION, ip, &al, NULL);
 	return al.map;
 }
 
@@ -349,7 +349,7 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
 	ssize_t size;
 
 	thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, addr, &al);
+			      MAP__FUNCTION, addr, &al, NULL);
 	if (!al.map) {
 		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
 		return -1;
-- 
cgit v0.10.2


From 39b12f7812710e9a5896805d96812b3ede7491e8 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:47 +0300
Subject: perf tools: Make it possible to read object code from vmlinux

The new "object code reading" test shows that it is not possible to read
object code from vmlinux.  That is because the mappings do not map to
the dso.  This patch fixes that.

A side-effect of changing the kernel map is that the "reloc" offset must
be taken into account.  As a result of that separate map functions for
relocation are no longer needed.

Also fixing up the maps to match the symbols no longer makes sense and
so is not done.

The vmlinux dso data_type is now set to either DSO_BINARY_TYPE__VMLINUX
or DSO_BINARY_TYPE__GUEST_VMLINUX as approprite, which enables the
correct file name to be determined by dso__binary_type_file().

This patch breaks the "vmlinux symtab matches kallsyms" test.  That is
fixed in a following patch.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index c4374f0..121583d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -78,6 +78,8 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
 			 symbol_conf.symfs, build_id_hex, build_id_hex + 2);
 		break;
 
+	case DSO_BINARY_TYPE__VMLINUX:
+	case DSO_BINARY_TYPE__GUEST_VMLINUX:
 	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
 		snprintf(file, size, "%s%s",
 			 symbol_conf.symfs, dso->long_name);
@@ -95,9 +97,7 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
 
 	default:
 	case DSO_BINARY_TYPE__KALLSYMS:
-	case DSO_BINARY_TYPE__VMLINUX:
 	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
-	case DSO_BINARY_TYPE__GUEST_VMLINUX:
 	case DSO_BINARY_TYPE__JAVA_JIT:
 	case DSO_BINARY_TYPE__NOT_FOUND:
 		ret = -1;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index d51aaf2..02aadaf 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/rbtree.h>
+#include <stdbool.h>
 #include "types.h"
 #include "map.h"
 
@@ -146,4 +147,11 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
 size_t dso__fprintf_symbols_by_name(struct dso *dso,
 				    enum map_type type, FILE *fp);
 size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
+
+static inline bool dso__is_vmlinux(struct dso *dso)
+{
+	return dso->data_type == DSO_BINARY_TYPE__VMLINUX ||
+	       dso->data_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
+}
+
 #endif /* __PERF_DSO */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index f9f9d63..dc35dcf 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -628,10 +628,8 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
 	struct map *map = machine->vmlinux_maps[type];
 	int ret = dso__load_vmlinux_path(map->dso, map, filter);
 
-	if (ret > 0) {
+	if (ret > 0)
 		dso__set_loaded(map->dso, type);
-		map__reloc_vmlinux(map);
-	}
 
 	return ret;
 }
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 8bcdf9e..5f662a3 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -182,12 +182,6 @@ int map__load(struct map *map, symbol_filter_t filter)
 #endif
 		return -1;
 	}
-	/*
-	 * Only applies to the kernel, as its symtabs aren't relative like the
-	 * module ones.
-	 */
-	if (map->dso->kernel)
-		map__reloc_vmlinux(map);
 
 	return 0;
 }
@@ -513,35 +507,6 @@ int map_groups__clone(struct map_groups *mg,
 	return 0;
 }
 
-static u64 map__reloc_map_ip(struct map *map, u64 ip)
-{
-	return ip + (s64)map->pgoff;
-}
-
-static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
-{
-	return ip - (s64)map->pgoff;
-}
-
-void map__reloc_vmlinux(struct map *map)
-{
-	struct kmap *kmap = map__kmap(map);
-	s64 reloc;
-
-	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
-		return;
-
-	reloc = (kmap->ref_reloc_sym->unrelocated_addr -
-		 kmap->ref_reloc_sym->addr);
-
-	if (!reloc)
-		return;
-
-	map->map_ip   = map__reloc_map_ip;
-	map->unmap_ip = map__reloc_unmap_ip;
-	map->pgoff    = reloc;
-}
-
 void maps__insert(struct rb_root *maps, struct map *map)
 {
 	struct rb_node **p = &maps->rb_node;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 4b12bf8..ed6f443 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -603,7 +603,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 						     ".gnu.prelink_undo",
 						     NULL) != NULL);
 	} else {
-		ss->adjust_symbols = 0;
+		ss->adjust_symbols = ehdr.e_type == ET_EXEC;
 	}
 
 	ss->name   = strdup(name);
@@ -624,6 +624,37 @@ out_close:
 	return err;
 }
 
+/**
+ * ref_reloc_sym_not_found - has kernel relocation symbol been found.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns %true if we are dealing with the kernel maps and the
+ * relocation reference symbol has not yet been found.  Otherwise %false is
+ * returned.
+ */
+static bool ref_reloc_sym_not_found(struct kmap *kmap)
+{
+	return kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
+	       !kmap->ref_reloc_sym->unrelocated_addr;
+}
+
+/**
+ * ref_reloc - kernel relocation offset.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns the offset of kernel addresses as determined by using
+ * the relocation reference symbol i.e. if the kernel has not been relocated
+ * then the return value is zero.
+ */
+static u64 ref_reloc(struct kmap *kmap)
+{
+	if (kmap && kmap->ref_reloc_sym &&
+	    kmap->ref_reloc_sym->unrelocated_addr)
+		return kmap->ref_reloc_sym->addr -
+		       kmap->ref_reloc_sym->unrelocated_addr;
+	return 0;
+}
+
 int dso__load_sym(struct dso *dso, struct map *map,
 		  struct symsrc *syms_ss, struct symsrc *runtime_ss,
 		  symbol_filter_t filter, int kmodule)
@@ -642,6 +673,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	Elf_Scn *sec, *sec_strndx;
 	Elf *elf;
 	int nr = 0;
+	bool remap_kernel = false, adjust_kernel_syms = false;
 
 	dso->symtab_type = syms_ss->type;
 
@@ -681,7 +713,31 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	nr_syms = shdr.sh_size / shdr.sh_entsize;
 
 	memset(&sym, 0, sizeof(sym));
-	dso->adjust_symbols = runtime_ss->adjust_symbols;
+
+	/*
+	 * The kernel relocation symbol is needed in advance in order to adjust
+	 * kernel maps correctly.
+	 */
+	if (ref_reloc_sym_not_found(kmap)) {
+		elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+			const char *elf_name = elf_sym__name(&sym, symstrs);
+
+			if (strcmp(elf_name, kmap->ref_reloc_sym->name))
+				continue;
+			kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
+			break;
+		}
+	}
+
+	dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
+	/*
+	 * Initial kernel and module mappings do not map to the dso.  For
+	 * function mappings, flag the fixups.
+	 */
+	if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) {
+		remap_kernel = true;
+		adjust_kernel_syms = dso->adjust_symbols;
+	}
 	elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
 		struct symbol *f;
 		const char *elf_name = elf_sym__name(&sym, symstrs);
@@ -690,10 +746,6 @@ int dso__load_sym(struct dso *dso, struct map *map,
 		const char *section_name;
 		bool used_opd = false;
 
-		if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
-		    strcmp(elf_name, kmap->ref_reloc_sym->name) == 0)
-			kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
-
 		if (!is_label && !elf_sym__is_a(&sym, map->type))
 			continue;
 
@@ -745,15 +797,37 @@ int dso__load_sym(struct dso *dso, struct map *map,
 		    (sym.st_value & 1))
 			--sym.st_value;
 
-		if (dso->kernel != DSO_TYPE_USER || kmodule) {
+		if (dso->kernel || kmodule) {
 			char dso_name[PATH_MAX];
 
+			/* Adjust symbol to map to file offset */
+			if (adjust_kernel_syms)
+				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+
 			if (strcmp(section_name,
 				   (curr_dso->short_name +
 				    dso->short_name_len)) == 0)
 				goto new_symbol;
 
 			if (strcmp(section_name, ".text") == 0) {
+				/*
+				 * The initial kernel mapping is based on
+				 * kallsyms and identity maps.  Overwrite it to
+				 * map to the kernel dso.
+				 */
+				if (remap_kernel && dso->kernel) {
+					remap_kernel = false;
+					map->start = shdr.sh_addr +
+						     ref_reloc(kmap);
+					map->end = map->start + shdr.sh_size;
+					map->pgoff = shdr.sh_offset;
+					map->map_ip = map__map_ip;
+					map->unmap_ip = map__unmap_ip;
+					/* Ensure maps are correctly ordered */
+					map_groups__remove(kmap->kmaps, map);
+					map_groups__insert(kmap->kmaps, map);
+				}
+
 				curr_map = map;
 				curr_dso = dso;
 				goto new_symbol;
@@ -781,8 +855,16 @@ int dso__load_sym(struct dso *dso, struct map *map,
 					dso__delete(curr_dso);
 					goto out_elf_end;
 				}
-				curr_map->map_ip = identity__map_ip;
-				curr_map->unmap_ip = identity__map_ip;
+				if (adjust_kernel_syms) {
+					curr_map->start = shdr.sh_addr +
+							  ref_reloc(kmap);
+					curr_map->end = curr_map->start +
+							shdr.sh_size;
+					curr_map->pgoff = shdr.sh_offset;
+				} else {
+					curr_map->map_ip = identity__map_ip;
+					curr_map->unmap_ip = identity__map_ip;
+				}
 				curr_dso->symtab_type = dso->symtab_type;
 				map_groups__insert(kmap->kmaps, curr_map);
 				dsos__add(&dso->node, curr_dso);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ea62ecd..04300dd 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -917,6 +917,10 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
 	symsrc__destroy(&ss);
 
 	if (err > 0) {
+		if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+			dso->data_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+		else
+			dso->data_type = DSO_BINARY_TYPE__VMLINUX;
 		dso__set_long_name(dso, (char *)vmlinux);
 		dso__set_loaded(dso, map->type);
 		pr_debug("Using %s for symbols\n", symfs_vmlinux);
@@ -989,7 +993,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 			dso__set_long_name(dso,
 					   strdup(symbol_conf.vmlinux_name));
 			dso->lname_alloc = 1;
-			goto out_fixup;
+			return err;
 		}
 		return err;
 	}
@@ -997,7 +1001,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map,
 	if (vmlinux_path != NULL) {
 		err = dso__load_vmlinux_path(dso, map, filter);
 		if (err > 0)
-			goto out_fixup;
+			return err;
 	}
 
 	/* do not try local files if a symfs was given */
@@ -1058,7 +1062,6 @@ do_kallsyms:
 
 	if (err > 0) {
 		dso__set_long_name(dso, strdup("[kernel.kallsyms]"));
-out_fixup:
 		map__fixup_start(map);
 		map__fixup_end(map);
 	}
@@ -1089,7 +1092,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 		if (symbol_conf.default_guest_vmlinux_name != NULL) {
 			err = dso__load_vmlinux(dso, map,
 				symbol_conf.default_guest_vmlinux_name, filter);
-			goto out_try_fixup;
+			return err;
 		}
 
 		kallsyms_filename = symbol_conf.default_guest_kallsyms;
@@ -1101,15 +1104,10 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 	}
 
 	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
-	if (err > 0)
-		pr_debug("Using %s for symbols\n", kallsyms_filename);
-
-out_try_fixup:
 	if (err > 0) {
-		if (kallsyms_filename != NULL) {
-			machine__mmap_name(machine, path, sizeof(path));
-			dso__set_long_name(dso, strdup(path));
-		}
+		pr_debug("Using %s for symbols\n", kallsyms_filename);
+		machine__mmap_name(machine, path, sizeof(path));
+		dso__set_long_name(dso, strdup(path));
 		map__fixup_start(map);
 		map__fixup_end(map);
 	}
-- 
cgit v0.10.2


From d380b34830cc76461feab012f0bc52b01e65087c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:48 +0300
Subject: perf tests: Adjust the vmlinux symtab matches kallsyms test

The vmlinux maps now map to the dso and the symbol values are now file
offsets.  For comparison with kallsyms the virtual memory address is
needed which is obtained by unmapping the symbol value.

The "vmlinux symtab matches kallsyms" is adjusted accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index add1539..e2e1498 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -25,6 +25,7 @@ int test__vmlinux_matches_kallsyms(void)
 	struct machine kallsyms, vmlinux;
 	enum map_type type = MAP__FUNCTION;
 	struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", };
+	u64 mem_start, mem_end;
 
 	/*
 	 * Step 1:
@@ -123,10 +124,14 @@ int test__vmlinux_matches_kallsyms(void)
 		if (sym->start == sym->end)
 			continue;
 
-		first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+		mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start);
+		mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
+
+		first_pair = machine__find_kernel_symbol(&kallsyms, type,
+							 mem_start, NULL, NULL);
 		pair = first_pair;
 
-		if (pair && pair->start == sym->start) {
+		if (pair && pair->start == mem_start) {
 next_pair:
 			if (strcmp(sym->name, pair->name) == 0) {
 				/*
@@ -138,10 +143,11 @@ next_pair:
 				 * off the real size. More than that and we
 				 * _really_ have a problem.
 				 */
-				s64 skew = sym->end - pair->end;
+				s64 skew = mem_end - pair->end;
 				if (llabs(skew) >= page_size)
 					pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
-						 sym->start, sym->name, sym->end, pair->end);
+						 mem_start, sym->name, mem_end,
+						 pair->end);
 
 				/*
 				 * Do not count this as a failure, because we
@@ -159,7 +165,7 @@ detour:
 				if (nnd) {
 					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
 
-					if (next->start == sym->start) {
+					if (next->start == mem_start) {
 						pair = next;
 						goto next_pair;
 					}
@@ -172,10 +178,11 @@ detour:
 				}
 
 				pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
-					 sym->start, sym->name, pair->name);
+					 mem_start, sym->name, pair->name);
 			}
 		} else
-			pr_debug("%#" PRIx64 ": %s not on kallsyms\n", sym->start, sym->name);
+			pr_debug("%#" PRIx64 ": %s not on kallsyms\n",
+				 mem_start, sym->name);
 
 		err = -1;
 	}
@@ -208,16 +215,19 @@ detour:
 	for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
 
-		pair = map_groups__find(&kallsyms.kmaps, type, pos->start);
+		mem_start = vmlinux_map->unmap_ip(vmlinux_map, pos->start);
+		mem_end = vmlinux_map->unmap_ip(vmlinux_map, pos->end);
+
+		pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
 		if (pair == NULL || pair->priv)
 			continue;
 
-		if (pair->start == pos->start) {
+		if (pair->start == mem_start) {
 			pair->priv = 1;
 			pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
 				pos->start, pos->end, pos->pgoff, pos->dso->name);
-			if (pos->pgoff != pair->pgoff || pos->end != pair->end)
-				pr_info(": \n*%" PRIx64 "-%" PRIx64 " %" PRIx64 "",
+			if (mem_end != pair->end)
+				pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
 					pair->start, pair->end, pair->pgoff);
 			pr_info(" %s\n", pair->dso->name);
 			pair->priv = 1;
-- 
cgit v0.10.2


From 0131c4ec794a7409eafff0c79105309540aaca4d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:50 +0300
Subject: perf tools: Make it possible to read object code from kernel modules

The new "object code reading" test shows that it is not possible to read
object code from kernel modules.  That is because the mappings do not
map to the dsos.  This patch fixes that.

This involves identifying and flagging relocatable (ELF type ET_REL)
files (e.g. kernel modules) for symbol adjustment and updating
map__rip_2objdump() accordingly.  The kmodule parameter of
dso__load_sym() is taken into use and the module map altered to map to
the dso.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 121583d..1955804 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -419,6 +419,7 @@ struct dso *dso__new(const char *name)
 		dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
 		dso->data_type   = DSO_BINARY_TYPE__NOT_FOUND;
 		dso->loaded = 0;
+		dso->rel = 0;
 		dso->sorted_by_name = 0;
 		dso->has_build_id = 0;
 		dso->kernel = DSO_TYPE_USER;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 02aadaf..735a837 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -85,6 +85,7 @@ struct dso {
 	u8		 lname_alloc:1;
 	u8		 sorted_by_name;
 	u8		 loaded;
+	u8		 rel;
 	u8		 build_id[BUILD_ID_SIZE];
 	const char	 *short_name;
 	char		 *long_name;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 5f662a3..4d599fe 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -248,14 +248,18 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
 
 /*
  * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
- * map->dso->adjust_symbols==1 for ET_EXEC-like cases.
+ * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is
+ * relative to section start.
  */
 u64 map__rip_2objdump(struct map *map, u64 rip)
 {
-	u64 addr = map->dso->adjust_symbols ?
-			map->unmap_ip(map, rip) :	/* RIP -> IP */
-			rip;
-	return addr;
+	if (!map->dso->adjust_symbols)
+		return rip;
+
+	if (map->dso->rel)
+		return rip - map->pgoff;
+
+	return map->unmap_ip(map, rip);
 }
 
 void map_groups__init(struct map_groups *mg)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index ed6f443..3eaa7b4 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -599,11 +599,13 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 	if (dso->kernel == DSO_TYPE_USER) {
 		GElf_Shdr shdr;
 		ss->adjust_symbols = (ehdr.e_type == ET_EXEC ||
+				ehdr.e_type == ET_REL ||
 				elf_section_by_name(elf, &ehdr, &shdr,
 						     ".gnu.prelink_undo",
 						     NULL) != NULL);
 	} else {
-		ss->adjust_symbols = ehdr.e_type == ET_EXEC;
+		ss->adjust_symbols = ehdr.e_type == ET_EXEC ||
+				     ehdr.e_type == ET_REL;
 	}
 
 	ss->name   = strdup(name);
@@ -676,6 +678,14 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	bool remap_kernel = false, adjust_kernel_syms = false;
 
 	dso->symtab_type = syms_ss->type;
+	dso->rel = syms_ss->ehdr.e_type == ET_REL;
+
+	/*
+	 * Modules may already have symbols from kallsyms, but those symbols
+	 * have the wrong values for the dso maps, so remove them.
+	 */
+	if (kmodule && syms_ss->symtab)
+		symbols__delete(&dso->symbols[map->type]);
 
 	if (!syms_ss->symtab) {
 		syms_ss->symtab  = syms_ss->dynsym;
@@ -828,11 +838,24 @@ int dso__load_sym(struct dso *dso, struct map *map,
 					map_groups__insert(kmap->kmaps, map);
 				}
 
+				/*
+				 * The initial module mapping is based on
+				 * /proc/modules mapped to offset zero.
+				 * Overwrite it to map to the module dso.
+				 */
+				if (remap_kernel && kmodule) {
+					remap_kernel = false;
+					map->pgoff = shdr.sh_offset;
+				}
+
 				curr_map = map;
 				curr_dso = dso;
 				goto new_symbol;
 			}
 
+			if (!kmap)
+				goto new_symbol;
+
 			snprintf(dso_name, sizeof(dso_name),
 				 "%s%s", dso->short_name, section_name);
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 04300dd..b9056a8 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -854,10 +854,15 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 	if (!runtime_ss && syms_ss)
 		runtime_ss = syms_ss;
 
-	if (syms_ss)
-		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, 0);
-	else
+	if (syms_ss) {
+		int km;
+
+		km = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+		     dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE;
+		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, km);
+	} else {
 		ret = -1;
+	}
 
 	if (ret > 0) {
 		int nr_plt;
-- 
cgit v0.10.2


From 8e0cf965f95edd41df11cca50b92b4cb6ea8d80a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:51 +0300
Subject: perf symbols: Add support for reading from /proc/kcore

In the absence of vmlinux, perf tools uses kallsyms for symbols.  If the
user has access, now also map to /proc/kcore.

The dso data_type is now set to either DSO_BINARY_TYPE__KCORE or
DSO_BINARY_TYPE__GUEST_KCORE as approprite.

This patch breaks the "vmlinux symtab matches kallsyms" test.  That is
fixed in a following patch.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 1955804..e3c1ff8 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -95,6 +95,11 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type,
 			 dso->long_name);
 		break;
 
+	case DSO_BINARY_TYPE__KCORE:
+	case DSO_BINARY_TYPE__GUEST_KCORE:
+		snprintf(file, size, "%s", dso->long_name);
+		break;
+
 	default:
 	case DSO_BINARY_TYPE__KALLSYMS:
 	case DSO_BINARY_TYPE__GUEST_KALLSYMS:
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 735a837..b793053 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -21,6 +21,8 @@ enum dso_binary_type {
 	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
 	DSO_BINARY_TYPE__GUEST_KMODULE,
 	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+	DSO_BINARY_TYPE__KCORE,
+	DSO_BINARY_TYPE__GUEST_KCORE,
 	DSO_BINARY_TYPE__NOT_FOUND,
 };
 
@@ -155,4 +157,10 @@ static inline bool dso__is_vmlinux(struct dso *dso)
 	       dso->data_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
 }
 
+static inline bool dso__is_kcore(struct dso *dso)
+{
+	return dso->data_type == DSO_BINARY_TYPE__KCORE ||
+	       dso->data_type == DSO_BINARY_TYPE__GUEST_KCORE;
+}
+
 #endif /* __PERF_DSO */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index dc35dcf..ef3b49c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -856,6 +856,18 @@ static void machine__set_kernel_mmap_len(struct machine *machine,
 	}
 }
 
+static bool machine__uses_kcore(struct machine *machine)
+{
+	struct dso *dso;
+
+	list_for_each_entry(dso, &machine->kernel_dsos, node) {
+		if (dso__is_kcore(dso))
+			return true;
+	}
+
+	return false;
+}
+
 static int machine__process_kernel_mmap_event(struct machine *machine,
 					      union perf_event *event)
 {
@@ -864,6 +876,10 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 	enum dso_kernel_type kernel_type;
 	bool is_kernel_mmap;
 
+	/* If we have maps from kcore then we do not need or want any others */
+	if (machine__uses_kcore(machine))
+		return 0;
+
 	machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
 	if (machine__is_host(machine))
 		kernel_type = DSO_TYPE_KERNEL;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 4d599fe..9e8304c 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -555,3 +555,21 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
 
 	return NULL;
 }
+
+struct map *maps__first(struct rb_root *maps)
+{
+	struct rb_node *first = rb_first(maps);
+
+	if (first)
+		return rb_entry(first, struct map, rb_node);
+	return NULL;
+}
+
+struct map *maps__next(struct map *map)
+{
+	struct rb_node *next = rb_next(&map->rb_node);
+
+	if (next)
+		return rb_entry(next, struct map, rb_node);
+	return NULL;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index a887f2c..2cc93cb 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -112,6 +112,8 @@ size_t __map_groups__fprintf_maps(struct map_groups *mg,
 void maps__insert(struct rb_root *maps, struct map *map);
 void maps__remove(struct rb_root *maps, struct map *map);
 struct map *maps__find(struct rb_root *maps, u64 addr);
+struct map *maps__first(struct rb_root *maps);
+struct map *maps__next(struct map *map);
 void map_groups__init(struct map_groups *mg);
 void map_groups__exit(struct map_groups *mg);
 int map_groups__clone(struct map_groups *mg,
@@ -139,6 +141,17 @@ static inline struct map *map_groups__find(struct map_groups *mg,
 	return maps__find(&mg->maps[type], addr);
 }
 
+static inline struct map *map_groups__first(struct map_groups *mg,
+					    enum map_type type)
+{
+	return maps__first(&mg->maps[type]);
+}
+
+static inline struct map *map_groups__next(struct map *map)
+{
+	return maps__next(map);
+}
+
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
 				       enum map_type type, u64 addr,
 				       struct map **mapp,
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 3eaa7b4..a7b9ab5 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -951,6 +951,57 @@ out_elf_end:
 	return err;
 }
 
+static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
+{
+	GElf_Phdr phdr;
+	size_t i, phdrnum;
+	int err;
+	u64 sz;
+
+	if (elf_getphdrnum(elf, &phdrnum))
+		return -1;
+
+	for (i = 0; i < phdrnum; i++) {
+		if (gelf_getphdr(elf, i, &phdr) == NULL)
+			return -1;
+		if (phdr.p_type != PT_LOAD)
+			continue;
+		if (exe) {
+			if (!(phdr.p_flags & PF_X))
+				continue;
+		} else {
+			if (!(phdr.p_flags & PF_R))
+				continue;
+		}
+		sz = min(phdr.p_memsz, phdr.p_filesz);
+		if (!sz)
+			continue;
+		err = mapfn(phdr.p_vaddr, sz, phdr.p_offset, data);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+		    bool *is_64_bit)
+{
+	int err;
+	Elf *elf;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		return -1;
+
+	if (is_64_bit)
+		*is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+	err = elf_read_maps(elf, exe, mapfn, data);
+
+	elf_end(elf);
+	return err;
+}
+
 void symbol__elf_init(void)
 {
 	elf_version(EV_CURRENT);
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index a7390cd..3a802c3 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -301,6 +301,13 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
 	return 0;
 }
 
+int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused,
+		    mapfn_t mapfn __maybe_unused, void *data __maybe_unused,
+		    bool *is_64_bit __maybe_unused)
+{
+	return -1;
+}
+
 void symbol__elf_init(void)
 {
 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b9056a8..77f3b95 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -327,6 +327,16 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
 	return NULL;
 }
 
+static struct symbol *symbols__first(struct rb_root *symbols)
+{
+	struct rb_node *n = rb_first(symbols);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
 struct symbol_name_rb_node {
 	struct rb_node	rb_node;
 	struct symbol	sym;
@@ -397,6 +407,11 @@ struct symbol *dso__find_symbol(struct dso *dso,
 	return symbols__find(&dso->symbols[type], addr);
 }
 
+struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
+{
+	return symbols__first(&dso->symbols[type]);
+}
+
 struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 					const char *name)
 {
@@ -533,6 +548,53 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename,
 	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
 }
 
+static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
+					 symbol_filter_t filter)
+{
+	struct map_groups *kmaps = map__kmap(map)->kmaps;
+	struct map *curr_map;
+	struct symbol *pos;
+	int count = 0, moved = 0;
+	struct rb_root *root = &dso->symbols[map->type];
+	struct rb_node *next = rb_first(root);
+
+	while (next) {
+		char *module;
+
+		pos = rb_entry(next, struct symbol, rb_node);
+		next = rb_next(&pos->rb_node);
+
+		module = strchr(pos->name, '\t');
+		if (module)
+			*module = '\0';
+
+		curr_map = map_groups__find(kmaps, map->type, pos->start);
+
+		if (!curr_map || (filter && filter(curr_map, pos))) {
+			rb_erase(&pos->rb_node, root);
+			symbol__delete(pos);
+		} else {
+			pos->start -= curr_map->start - curr_map->pgoff;
+			if (pos->end)
+				pos->end -= curr_map->start - curr_map->pgoff;
+			if (curr_map != map) {
+				rb_erase(&pos->rb_node, root);
+				symbols__insert(
+					&curr_map->dso->symbols[curr_map->type],
+					pos);
+				++moved;
+			} else {
+				++count;
+			}
+		}
+	}
+
+	/* Symbols have been adjusted */
+	dso->adjust_symbols = 1;
+
+	return count + moved;
+}
+
 /*
  * Split the symbols into maps, making sure there are no overlaps, i.e. the
  * kernel range is broken in several maps, named [kernel].N, as we don't have
@@ -674,6 +736,161 @@ bool symbol__restricted_filename(const char *filename,
 	return restricted;
 }
 
+struct kcore_mapfn_data {
+	struct dso *dso;
+	enum map_type type;
+	struct list_head maps;
+};
+
+static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+{
+	struct kcore_mapfn_data *md = data;
+	struct map *map;
+
+	map = map__new2(start, md->dso, md->type);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->end = map->start + len;
+	map->pgoff = pgoff;
+
+	list_add(&map->node, &md->maps);
+
+	return 0;
+}
+
+/*
+ * If kallsyms is referenced by name then we look for kcore in the same
+ * directory.
+ */
+static bool kcore_filename_from_kallsyms_filename(char *kcore_filename,
+						  const char *kallsyms_filename)
+{
+	char *name;
+
+	strcpy(kcore_filename, kallsyms_filename);
+	name = strrchr(kcore_filename, '/');
+	if (!name)
+		return false;
+
+	if (!strcmp(name, "/kallsyms")) {
+		strcpy(name, "/kcore");
+		return true;
+	}
+
+	return false;
+}
+
+static int dso__load_kcore(struct dso *dso, struct map *map,
+			   const char *kallsyms_filename)
+{
+	struct map_groups *kmaps = map__kmap(map)->kmaps;
+	struct machine *machine = kmaps->machine;
+	struct kcore_mapfn_data md;
+	struct map *old_map, *new_map, *replacement_map = NULL;
+	bool is_64_bit;
+	int err, fd;
+	char kcore_filename[PATH_MAX];
+	struct symbol *sym;
+
+	/* This function requires that the map is the kernel map */
+	if (map != machine->vmlinux_maps[map->type])
+		return -EINVAL;
+
+	if (!kcore_filename_from_kallsyms_filename(kcore_filename,
+						   kallsyms_filename))
+		return -EINVAL;
+
+	md.dso = dso;
+	md.type = map->type;
+	INIT_LIST_HEAD(&md.maps);
+
+	fd = open(kcore_filename, O_RDONLY);
+	if (fd < 0)
+		return -EINVAL;
+
+	/* Read new maps into temporary lists */
+	err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md,
+			      &is_64_bit);
+	if (err)
+		goto out_err;
+
+	if (list_empty(&md.maps)) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	/* Remove old maps */
+	old_map = map_groups__first(kmaps, map->type);
+	while (old_map) {
+		struct map *next = map_groups__next(old_map);
+
+		if (old_map != map)
+			map_groups__remove(kmaps, old_map);
+		old_map = next;
+	}
+
+	/* Find the kernel map using the first symbol */
+	sym = dso__first_symbol(dso, map->type);
+	list_for_each_entry(new_map, &md.maps, node) {
+		if (sym && sym->start >= new_map->start &&
+		    sym->start < new_map->end) {
+			replacement_map = new_map;
+			break;
+		}
+	}
+
+	if (!replacement_map)
+		replacement_map = list_entry(md.maps.next, struct map, node);
+
+	/* Add new maps */
+	while (!list_empty(&md.maps)) {
+		new_map = list_entry(md.maps.next, struct map, node);
+		list_del(&new_map->node);
+		if (new_map == replacement_map) {
+			map->start	= new_map->start;
+			map->end	= new_map->end;
+			map->pgoff	= new_map->pgoff;
+			map->map_ip	= new_map->map_ip;
+			map->unmap_ip	= new_map->unmap_ip;
+			map__delete(new_map);
+			/* Ensure maps are correctly ordered */
+			map_groups__remove(kmaps, map);
+			map_groups__insert(kmaps, map);
+		} else {
+			map_groups__insert(kmaps, new_map);
+		}
+	}
+
+	/*
+	 * Set the data type and long name so that kcore can be read via
+	 * dso__data_read_addr().
+	 */
+	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+		dso->data_type = DSO_BINARY_TYPE__GUEST_KCORE;
+	else
+		dso->data_type = DSO_BINARY_TYPE__KCORE;
+	dso__set_long_name(dso, strdup(kcore_filename));
+
+	close(fd);
+
+	if (map->type == MAP__FUNCTION)
+		pr_debug("Using %s for kernel object code\n", kcore_filename);
+	else
+		pr_debug("Using %s for kernel data\n", kcore_filename);
+
+	return 0;
+
+out_err:
+	while (!list_empty(&md.maps)) {
+		map = list_entry(md.maps.next, struct map, node);
+		list_del(&map->node);
+		map__delete(map);
+	}
+	close(fd);
+	return -EINVAL;
+}
+
 int dso__load_kallsyms(struct dso *dso, const char *filename,
 		       struct map *map, symbol_filter_t filter)
 {
@@ -691,7 +908,10 @@ int dso__load_kallsyms(struct dso *dso, const char *filename,
 	else
 		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
 
-	return dso__split_kallsyms(dso, map, filter);
+	if (!dso__load_kcore(dso, map, filename))
+		return dso__split_kallsyms_for_kcore(dso, map, filter);
+	else
+		return dso__split_kallsyms(dso, map, filter);
 }
 
 static int dso__load_perf_map(struct dso *dso, struct map *map,
@@ -1065,7 +1285,7 @@ do_kallsyms:
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	free(kallsyms_allocated_filename);
 
-	if (err > 0) {
+	if (err > 0 && !dso__is_kcore(dso)) {
 		dso__set_long_name(dso, strdup("[kernel.kallsyms]"));
 		map__fixup_start(map);
 		map__fixup_end(map);
@@ -1109,8 +1329,9 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
 	}
 
 	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
-	if (err > 0) {
+	if (err > 0)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
+	if (err > 0 && !dso__is_kcore(dso)) {
 		machine__mmap_name(machine, path, sizeof(path));
 		dso__set_long_name(dso, strdup(path));
 		map__fixup_start(map);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 5f720dc..fd5b70e 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -215,6 +215,7 @@ struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
 				u64 addr);
 struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
 					const char *name);
+struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
 
 int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
@@ -247,4 +248,8 @@ void symbols__fixup_duplicate(struct rb_root *symbols);
 void symbols__fixup_end(struct rb_root *symbols);
 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type);
 
+typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+		    bool *is_64_bit);
+
 #endif /* __PERF_SYMBOL */
-- 
cgit v0.10.2


From 82e75d00adc5bde3cf98f11e937eed6127163969 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:52 +0300
Subject: perf tests: Adjust the vmlinux symtab matches kallsyms test again

The kallsyms maps now may map to kcore and the symbol values now may be
file offsets.  For comparison with vmlinux the virtual memory address is
needed which is obtained by unmapping the symbol value.

The "vmlinux symtab matches kallsyms" is adjusted accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index e2e1498..2bd13ed 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -16,6 +16,8 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
 	return 0;
 }
 
+#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
+
 int test__vmlinux_matches_kallsyms(void)
 {
 	int err = -1;
@@ -74,7 +76,7 @@ int test__vmlinux_matches_kallsyms(void)
 		goto out;
 	}
 
-	ref_reloc_sym.addr = sym->start;
+	ref_reloc_sym.addr = UM(sym->start);
 
 	/*
 	 * Step 5:
@@ -131,7 +133,7 @@ int test__vmlinux_matches_kallsyms(void)
 							 mem_start, NULL, NULL);
 		pair = first_pair;
 
-		if (pair && pair->start == mem_start) {
+		if (pair && UM(pair->start) == mem_start) {
 next_pair:
 			if (strcmp(sym->name, pair->name) == 0) {
 				/*
@@ -143,11 +145,11 @@ next_pair:
 				 * off the real size. More than that and we
 				 * _really_ have a problem.
 				 */
-				s64 skew = mem_end - pair->end;
+				s64 skew = mem_end - UM(pair->end);
 				if (llabs(skew) >= page_size)
 					pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
 						 mem_start, sym->name, mem_end,
-						 pair->end);
+						 UM(pair->end));
 
 				/*
 				 * Do not count this as a failure, because we
@@ -165,7 +167,7 @@ detour:
 				if (nnd) {
 					struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
 
-					if (next->start == mem_start) {
+					if (UM(next->start) == mem_start) {
 						pair = next;
 						goto next_pair;
 					}
-- 
cgit v0.10.2


From 7a77bc2c0d2726a7fc9e6b91b36f984c3e377008 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:53 +0300
Subject: perf tests: Add kcore to the object code reading test

Make the "object code reading" test attempt to read from kcore.

The test uses objdump which struggles with kcore. i.e.  doesn't always
work, sometimes takes a long time.  The test has been made to work
around those issues.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-10-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 0c7b052..8e0943b 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -20,6 +20,11 @@
 #define BUFSZ	1024
 #define READLEN	128
 
+struct state {
+	u64 done[1024];
+	size_t done_cnt;
+};
+
 static unsigned int hex(char c)
 {
 	if (c >= '0' && c <= '9')
@@ -107,6 +112,9 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf,
 
 	pr_debug("Objdump command is: %s\n", cmd);
 
+	/* Ignore objdump errors */
+	strcat(cmd, " 2>/dev/null");
+
 	f = popen(cmd, "r");
 	if (!f) {
 		pr_debug("popen failed\n");
@@ -126,7 +134,8 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf,
 }
 
 static int read_object_code(u64 addr, size_t len, u8 cpumode,
-			    struct thread *thread, struct machine *machine)
+			    struct thread *thread, struct machine *machine,
+			    struct state *state)
 {
 	struct addr_location al;
 	unsigned char buf1[BUFSZ];
@@ -146,7 +155,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 
 	pr_debug("File is: %s\n", al.map->dso->long_name);
 
-	if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+	if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(al.map->dso)) {
 		pr_debug("Unexpected kernel address - skipping\n");
 		return 0;
 	}
@@ -175,6 +185,24 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	if (map__load(al.map, NULL))
 		return -1;
 
+	/* objdump struggles with kcore - try each map only once */
+	if (dso__is_kcore(al.map->dso)) {
+		size_t d;
+
+		for (d = 0; d < state->done_cnt; d++) {
+			if (state->done[d] == al.map->start) {
+				pr_debug("kcore map tested already");
+				pr_debug(" - skipping\n");
+				return 0;
+			}
+		}
+		if (state->done_cnt >= ARRAY_SIZE(state->done)) {
+			pr_debug("Too many kcore maps - skipping\n");
+			return 0;
+		}
+		state->done[state->done_cnt++] = al.map->start;
+	}
+
 	/* Read the object code using objdump */
 	objdump_addr = map__rip_2objdump(al.map, al.addr);
 	ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len);
@@ -186,10 +214,19 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 		if (cpumode == PERF_RECORD_MISC_KERNEL ||
 		    cpumode == PERF_RECORD_MISC_GUEST_KERNEL) {
 			len -= ret;
-			if (len)
+			if (len) {
 				pr_debug("Reducing len to %zu\n", len);
-			else
+			} else if (dso__is_kcore(al.map->dso)) {
+				/*
+				 * objdump cannot handle very large segments
+				 * that may be found in kcore.
+				 */
+				pr_debug("objdump failed for kcore");
+				pr_debug(" - skipping\n");
+				return 0;
+			} else {
 				return -1;
+			}
 		}
 	}
 	if (ret < 0) {
@@ -209,7 +246,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 
 static int process_sample_event(struct machine *machine,
 				struct perf_evlist *evlist,
-				union perf_event *event)
+				union perf_event *event, struct state *state)
 {
 	struct perf_sample sample;
 	struct thread *thread;
@@ -228,14 +265,15 @@ static int process_sample_event(struct machine *machine,
 
 	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	return read_object_code(sample.ip, READLEN, cpumode, thread, machine);
+	return read_object_code(sample.ip, READLEN, cpumode, thread, machine,
+				state);
 }
 
 static int process_event(struct machine *machine, struct perf_evlist *evlist,
-			 union perf_event *event)
+			 union perf_event *event, struct state *state)
 {
 	if (event->header.type == PERF_RECORD_SAMPLE)
-		return process_sample_event(machine, evlist, event);
+		return process_sample_event(machine, evlist, event, state);
 
 	if (event->header.type < PERF_RECORD_MAX)
 		return machine__process_event(machine, event);
@@ -243,14 +281,15 @@ static int process_event(struct machine *machine, struct perf_evlist *evlist,
 	return 0;
 }
 
-static int process_events(struct machine *machine, struct perf_evlist *evlist)
+static int process_events(struct machine *machine, struct perf_evlist *evlist,
+			  struct state *state)
 {
 	union perf_event *event;
 	int i, ret;
 
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
-			ret = process_event(machine, evlist, event);
+			ret = process_event(machine, evlist, event, state);
 			if (ret < 0)
 				return ret;
 		}
@@ -331,10 +370,12 @@ static void do_something(void)
 enum {
 	TEST_CODE_READING_OK,
 	TEST_CODE_READING_NO_VMLINUX,
+	TEST_CODE_READING_NO_KCORE,
 	TEST_CODE_READING_NO_ACCESS,
+	TEST_CODE_READING_NO_KERNEL_OBJ,
 };
 
-static int do_test_code_reading(void)
+static int do_test_code_reading(bool try_kcore)
 {
 	struct machines machines;
 	struct machine *machine;
@@ -348,6 +389,9 @@ static int do_test_code_reading(void)
 			.uses_mmap   = true,
 		},
 	};
+	struct state state = {
+		.done_cnt = 0,
+	};
 	struct thread_map *threads = NULL;
 	struct cpu_map *cpus = NULL;
 	struct perf_evlist *evlist = NULL;
@@ -355,7 +399,7 @@ static int do_test_code_reading(void)
 	int err = -1, ret;
 	pid_t pid;
 	struct map *map;
-	bool have_vmlinux, excl_kernel = false;
+	bool have_vmlinux, have_kcore, excl_kernel = false;
 
 	pid = getpid();
 
@@ -368,6 +412,10 @@ static int do_test_code_reading(void)
 		goto out_err;
 	}
 
+	/* Force the use of kallsyms instead of vmlinux to try kcore */
+	if (try_kcore)
+		symbol_conf.kallsyms_name = "/proc/kallsyms";
+
 	/* Load kernel map */
 	map = machine->vmlinux_maps[MAP__FUNCTION];
 	ret = map__load(map, NULL);
@@ -375,9 +423,15 @@ static int do_test_code_reading(void)
 		pr_debug("map__load failed\n");
 		goto out_err;
 	}
-	have_vmlinux = map->dso->symtab_type == DSO_BINARY_TYPE__VMLINUX;
-	/* No point getting kernel events if there is no vmlinux */
-	if (!have_vmlinux)
+	have_vmlinux = dso__is_vmlinux(map->dso);
+	have_kcore = dso__is_kcore(map->dso);
+
+	/* 2nd time through we just try kcore */
+	if (try_kcore && !have_kcore)
+		return TEST_CODE_READING_NO_KCORE;
+
+	/* No point getting kernel events if there is no kernel object */
+	if (!have_vmlinux && !have_kcore)
 		excl_kernel = true;
 
 	threads = thread_map__new_by_tid(pid);
@@ -461,11 +515,13 @@ static int do_test_code_reading(void)
 
 	perf_evlist__disable(evlist);
 
-	ret = process_events(machine, evlist);
+	ret = process_events(machine, evlist, &state);
 	if (ret < 0)
 		goto out_err;
 
-	if (!have_vmlinux)
+	if (!have_vmlinux && !have_kcore && !try_kcore)
+		err = TEST_CODE_READING_NO_KERNEL_OBJ;
+	else if (!have_vmlinux && !try_kcore)
 		err = TEST_CODE_READING_NO_VMLINUX;
 	else if (excl_kernel)
 		err = TEST_CODE_READING_NO_ACCESS;
@@ -492,7 +548,9 @@ int test__code_reading(void)
 {
 	int ret;
 
-	ret = do_test_code_reading();
+	ret = do_test_code_reading(false);
+	if (!ret)
+		ret = do_test_code_reading(true);
 
 	switch (ret) {
 	case TEST_CODE_READING_OK:
@@ -500,9 +558,15 @@ int test__code_reading(void)
 	case TEST_CODE_READING_NO_VMLINUX:
 		fprintf(stderr, " (no vmlinux)");
 		return 0;
+	case TEST_CODE_READING_NO_KCORE:
+		fprintf(stderr, " (no kcore)");
+		return 0;
 	case TEST_CODE_READING_NO_ACCESS:
 		fprintf(stderr, " (no access)");
 		return 0;
+	case TEST_CODE_READING_NO_KERNEL_OBJ:
+		fprintf(stderr, " (no kernel obj)");
+		return 0;
 	default:
 		return -1;
 	};
-- 
cgit v0.10.2


From bbb7f846f88df05646795854a014d73fb00f3b8b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:54 +0300
Subject: perf annotate: Allow disassembly using /proc/kcore

Annotation with /proc/kcore is possible so the logic is adjusted to
allow it.  The main difference is that /proc/kcore had no symbols so the
parsing logic needed a tweak to read jump offsets.

The other difference is that objdump cannot always read from kcore.
That seems to be a bug with objdump.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-11-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 9101f7c..440c3b3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -103,7 +103,8 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 	/*
 	 * We can't annotate with just /proc/kallsyms
 	 */
-	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(map->dso)) {
 		pr_err("Can't annotate %s: No vmlinux file was found in the "
 		       "path\n", sym->name);
 		sleep(1);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index d102716..4ab2f11 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -110,10 +110,10 @@ static int jump__parse(struct ins_operands *ops)
 {
 	const char *s = strchr(ops->raw, '+');
 
-	ops->target.addr = strtoll(ops->raw, NULL, 16);
+	ops->target.addr = strtoull(ops->raw, NULL, 16);
 
 	if (s++ != NULL)
-		ops->target.offset = strtoll(s, NULL, 16);
+		ops->target.offset = strtoull(s, NULL, 16);
 	else
 		ops->target.offset = UINT64_MAX;
 
@@ -821,6 +821,10 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 	if (dl == NULL)
 		return -1;
 
+	if (dl->ops.target.offset == UINT64_MAX)
+		dl->ops.target.offset = dl->ops.target.addr -
+					map__rip_2objdump(map, sym->start);
+
 	disasm__add(&notes->src->source, dl);
 
 	return 0;
@@ -864,7 +868,8 @@ fallback:
 		free_filename = false;
 	}
 
-	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(dso)) {
 		char bf[BUILD_ID_SIZE * 2 + 16] = " with build id ";
 		char *build_id_msg = NULL;
 
@@ -898,7 +903,7 @@ fallback:
 	snprintf(command, sizeof(command),
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
-		 " -d %s %s -C %s|grep -v %s|expand",
+		 " -d %s %s -C %s 2>/dev/null|grep -v %s|expand",
 		 objdump_path ? objdump_path : "objdump",
 		 disassembler_style ? "-M " : "",
 		 disassembler_style ? disassembler_style : "",
-- 
cgit v0.10.2


From 34f77abcb34e1da4ee3ca5c5a41b673664eee1fa Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:55 +0300
Subject: perf annotate: Put dso name in symbol annotation title

Currently the symbol name is displayed at the top when displaying symbol
annotation.  Add to this the dso long name.

Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-12-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index cc64d3f..0f88a77 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -428,6 +428,14 @@ static void annotate_browser__init_asm_mode(struct annotate_browser *browser)
 	browser->b.nr_entries = browser->nr_asm_entries;
 }
 
+#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
+
+static int sym_title(struct symbol *sym, struct map *map, char *title,
+		     size_t sz)
+{
+	return snprintf(title, sz, "%s  %s", sym->name, map->dso->long_name);
+}
+
 static bool annotate_browser__callq(struct annotate_browser *browser,
 				    struct perf_evsel *evsel,
 				    struct hist_browser_timer *hbt)
@@ -438,6 +446,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	struct annotation *notes;
 	struct symbol *target;
 	u64 ip;
+	char title[SYM_TITLE_MAX_SIZE];
 
 	if (!ins__is_call(dl->ins))
 		return false;
@@ -461,7 +470,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 
 	pthread_mutex_unlock(&notes->lock);
 	symbol__tui_annotate(target, ms->map, evsel, hbt);
-	ui_browser__show_title(&browser->b, sym->name);
+	sym_title(sym, ms->map, title, sizeof(title));
+	ui_browser__show_title(&browser->b, title);
 	return true;
 }
 
@@ -653,8 +663,10 @@ static int annotate_browser__run(struct annotate_browser *browser,
 	const char *help = "Press 'h' for help on key bindings";
 	int delay_secs = hbt ? hbt->refresh : 0;
 	int key;
+	char title[SYM_TITLE_MAX_SIZE];
 
-	if (ui_browser__show(&browser->b, sym->name, help) < 0)
+	sym_title(sym, ms->map, title, sizeof(title));
+	if (ui_browser__show(&browser->b, title, help) < 0)
 		return -1;
 
 	annotate_browser__calc_percent(browser, evsel);
-- 
cgit v0.10.2


From 484a5e7476b7ce790ba37417a4976c7f86a87231 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:56 +0300
Subject: perf annotate: Remove nop at end of annotation

When kcore is used for annotation, symbols do not have correct sizes
because they come from kallsyms, that has only its start address, with
the end address being the next symbol's minus one.

That sometimes results in an extra nop being seen after the end of a
function.  Remove it.

Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-13-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4ab2f11..646e38d 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -830,6 +830,30 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 	return 0;
 }
 
+static void delete_last_nop(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+	struct list_head *list = &notes->src->source;
+	struct disasm_line *dl;
+
+	while (!list_empty(list)) {
+		dl = list_entry(list->prev, struct disasm_line, node);
+
+		if (dl->ins && dl->ins->ops) {
+			if (dl->ins->ops != &nop_ops)
+				return;
+		} else {
+			if (!strstr(dl->line, " nop ") &&
+			    !strstr(dl->line, " nopl ") &&
+			    !strstr(dl->line, " nopw "))
+				return;
+		}
+
+		list_del(&dl->node);
+		disasm_line__free(dl);
+	}
+}
+
 int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
 {
 	struct dso *dso = map->dso;
@@ -923,6 +947,13 @@ fallback:
 		if (symbol__parse_objdump_line(sym, map, file, privsize) < 0)
 			break;
 
+	/*
+	 * kallsyms does not have symbol sizes so there may a nop at the end.
+	 * Remove it.
+	 */
+	if (dso__is_kcore(dso))
+		delete_last_nop(sym);
+
 	pclose(file);
 out_free_filename:
 	if (free_filename)
-- 
cgit v0.10.2


From b178170a38e719cb7bc4a14d3f5e4b4ea6b7b851 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 7 Aug 2013 14:38:57 +0300
Subject: perf annotate: Add call target name if it is missing

The /proc/kcore file has no symbols, so the call target name does not
display.  Fix by looking up the symbol name if it is on the same map.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1375875537-4509-14-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 646e38d..bfc5a27 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -825,6 +825,22 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 		dl->ops.target.offset = dl->ops.target.addr -
 					map__rip_2objdump(map, sym->start);
 
+	/*
+	 * kcore has no symbols, so add the call target name if it is on the
+	 * same map.
+	 */
+	if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) {
+		struct symbol *s;
+		u64 ip = dl->ops.target.addr;
+
+		if (ip >= map->start && ip <= map->end) {
+			ip = map->map_ip(map, ip);
+			s = map__find_symbol(map, ip, NULL);
+			if (s && s->start == ip)
+				dl->ops.target.name = strdup(s->name);
+		}
+	}
+
 	disasm__add(&notes->src->source, dl);
 
 	return 0;
-- 
cgit v0.10.2


From fcd9fef9a64b84520852e0247d6796893e28864c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 7 Aug 2013 15:55:48 -0300
Subject: perf annotate browser: Improve description of '?' hotkey

The previous description: "Search previous string" is usually associated
with the 'N' following a '/string', the opposite of 'n', which is
'Search next string' in the direction established with '/' or '?'.

So change it to 'Search string backwards', to clarify that.

The 'N' hotkey remains to be implemented with the semantic described
above.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-5lw5y15d7vv308xbpm8pqe4g@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 0f88a77..7d75497 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -732,7 +732,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"s             Toggle source code view\n"
 		"/             Search string\n"
 		"r             Run available scripts\n"
-		"?             Search previous string\n");
+		"?             Search string backwards\n");
 			continue;
 		case 'r':
 			{
-- 
cgit v0.10.2


From e6f653888844f3b86f4274f03e731a3eacd22c49 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 2 Aug 2013 13:10:50 +0200
Subject: perf annotate browser: Fix typo

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/20130802111050.GA29126@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 7d75497..08545ae 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -505,7 +505,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser)
 
 	dl = annotate_browser__find_offset(browser, dl->ops.target.offset, &idx);
 	if (dl == NULL) {
-		ui_helpline__puts("Invallid jump offset");
+		ui_helpline__puts("Invalid jump offset");
 		return true;
 	}
 
-- 
cgit v0.10.2


From e30b88a77cc8ae2a1febf268c8443a6cdd696417 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 5 Aug 2013 21:41:33 -0400
Subject: perf session: Export queue_event function

Taking a lesson from perf-trace and bringing in control of event
processing to perf-kvm-stat-live: parse the sample to get access the
time leaving just the need to queue it to the ordered samples list.  For
that the queue_event function needs to be exported.

Unexport perf_session__process_event.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375753297-69645-2-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b5ebd47..dedaeb2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -643,7 +643,7 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s)
 
 #define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))
 
-static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
+int perf_session_queue_event(struct perf_session *s, union perf_event *event,
 				    struct perf_sample *sample, u64 file_offset)
 {
 	struct ordered_samples *os = &s->ordered_samples;
@@ -1049,10 +1049,10 @@ static void event_swap(union perf_event *event, bool sample_id_all)
 		swap(event, sample_id_all);
 }
 
-int perf_session__process_event(struct perf_session *session,
-				union perf_event *event,
-				struct perf_tool *tool,
-				u64 file_offset)
+static int perf_session__process_event(struct perf_session *session,
+				       union perf_event *event,
+				       struct perf_tool *tool,
+				       u64 file_offset)
 {
 	struct perf_sample sample;
 	int ret;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9818fc2..8bed17e 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -56,10 +56,8 @@ int __perf_session__process_events(struct perf_session *self,
 int perf_session__process_events(struct perf_session *self,
 				 struct perf_tool *tool);
 
-int perf_session__process_event(struct perf_session *session,
-				union perf_event *event,
-				struct perf_tool *tool,
-				u64 file_offset);
+int perf_session_queue_event(struct perf_session *s, union perf_event *event,
+			     struct perf_sample *sample, u64 file_offset);
 
 void perf_tool__fill_defaults(struct perf_tool *tool);
 
-- 
cgit v0.10.2


From 1afe1d148491069ef51ed69fa53b09e1cb3ec30d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 5 Aug 2013 21:41:34 -0400
Subject: perf kvm: Add live mode

perf kvm stat currently requires back to back record and report commands
to see stats. e.g,.

  perf kvm stat record -p $pid -- sleep 1
  perf kvm stat report

This is inconvenvient for on box monitoring of a VM. This patch
introduces a 'live' mode that in effect combines the record plus report
into one command. e.g., to monitor a single VM:

  perf kvm stat live -p $pid

or all VMs:

  perf kvm stat live

Same stats options for the record+report path work with the live mode.
Display rate defaults to 1 second and can be changed using the -d
option.

v4:
- address comments from Xiao -- verify_vcpu check should not look at
  processors on line for the host, prune configurable options.
- set attr->{mmap,comm,task} to 0 - don't need task events so trim events
  we have to deal with
- better control of time for queue event flushing to reduce frequency of
  "Timestamp below last timeslice flush" failures.

v3:
updated to use existing tracepoint parsing code

v2:
removed ABSTIME arg from timerfd_settime as mentioned by Namhyung
only call perf_kvm__handle_stdin when poll returns activity.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375753297-69645-3-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 7d14a3a..29bfca7 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -2,6 +2,7 @@
 #include "perf.h"
 
 #include "util/evsel.h"
+#include "util/evlist.h"
 #include "util/util.h"
 #include "util/cache.h"
 #include "util/symbol.h"
@@ -15,9 +16,12 @@
 #include <lk/debugfs.h>
 #include "util/tool.h"
 #include "util/stat.h"
+#include "util/top.h"
 
 #include <sys/prctl.h>
+#include <sys/timerfd.h>
 
+#include <termios.h>
 #include <semaphore.h>
 #include <pthread.h>
 #include <math.h>
@@ -82,6 +86,8 @@ struct exit_reasons_table {
 
 struct perf_kvm_stat {
 	struct perf_tool    tool;
+	struct perf_record_opts opts;
+	struct perf_evlist  *evlist;
 	struct perf_session *session;
 
 	const char *file_name;
@@ -96,10 +102,16 @@ struct perf_kvm_stat {
 	struct kvm_events_ops *events_ops;
 	key_cmp_fun compare;
 	struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
+
 	u64 total_time;
 	u64 total_count;
+	u64 lost_events;
 
 	struct rb_root result;
+
+	int timerfd;
+	unsigned int display_time;
+	bool live;
 };
 
 
@@ -320,6 +332,23 @@ static void init_kvm_event_record(struct perf_kvm_stat *kvm)
 		INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
 }
 
+static void clear_events_cache_stats(struct list_head *kvm_events_cache)
+{
+	struct list_head *head;
+	struct kvm_event *event;
+	unsigned int i;
+
+	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+		head = &kvm_events_cache[i];
+		list_for_each_entry(event, head, hash_entry) {
+			/* reset stats for event */
+			memset(&event->total, 0, sizeof(event->total));
+			memset(event->vcpu, 0,
+			       event->max_vcpu * sizeof(*event->vcpu));
+		}
+	}
+}
+
 static int kvm_events_hash_fn(u64 key)
 {
 	return key & (EVENTS_CACHE_SIZE - 1);
@@ -472,7 +501,11 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
 	vcpu_record->last_event = NULL;
 	vcpu_record->start_time = 0;
 
-	BUG_ON(timestamp < time_begin);
+	/* seems to happen once in a while during live mode */
+	if (timestamp < time_begin) {
+		pr_debug("End time before begin time; skipping event.\n");
+		return true;
+	}
 
 	time_diff = timestamp - time_begin;
 	return update_kvm_event(event, vcpu, time_diff);
@@ -639,24 +672,56 @@ static struct kvm_event *pop_from_result(struct rb_root *result)
 	return container_of(node, struct kvm_event, rb);
 }
 
-static void print_vcpu_info(int vcpu)
+static void print_vcpu_info(struct perf_kvm_stat *kvm)
 {
+	int vcpu = kvm->trace_vcpu;
+
 	pr_info("Analyze events for ");
 
+	if (kvm->live) {
+		if (kvm->opts.target.system_wide)
+			pr_info("all VMs, ");
+		else if (kvm->opts.target.pid)
+			pr_info("pid(s) %s, ", kvm->opts.target.pid);
+		else
+			pr_info("dazed and confused on what is monitored, ");
+	}
+
 	if (vcpu == -1)
 		pr_info("all VCPUs:\n\n");
 	else
 		pr_info("VCPU %d:\n\n", vcpu);
 }
 
+static void show_timeofday(void)
+{
+	char date[64];
+	struct timeval tv;
+	struct tm ltime;
+
+	gettimeofday(&tv, NULL);
+	if (localtime_r(&tv.tv_sec, &ltime)) {
+		strftime(date, sizeof(date), "%H:%M:%S", &ltime);
+		pr_info("%s.%06ld", date, tv.tv_usec);
+	} else
+		pr_info("00:00:00.000000");
+
+	return;
+}
+
 static void print_result(struct perf_kvm_stat *kvm)
 {
 	char decode[20];
 	struct kvm_event *event;
 	int vcpu = kvm->trace_vcpu;
 
+	if (kvm->live) {
+		puts(CONSOLE_CLEAR);
+		show_timeofday();
+	}
+
 	pr_info("\n\n");
-	print_vcpu_info(vcpu);
+	print_vcpu_info(kvm);
 	pr_info("%20s ", kvm->events_ops->name);
 	pr_info("%10s ", "Samples");
 	pr_info("%9s ", "Samples%");
@@ -683,6 +748,20 @@ static void print_result(struct perf_kvm_stat *kvm)
 
 	pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
 		kvm->total_count, kvm->total_time / 1e3);
+
+	if (kvm->lost_events)
+		pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
+}
+
+static int process_lost_event(struct perf_tool *tool,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
+{
+	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
+
+	kvm->lost_events++;
+	return 0;
 }
 
 static int process_sample_event(struct perf_tool *tool,
@@ -707,10 +786,20 @@ static int process_sample_event(struct perf_tool *tool,
 	return 0;
 }
 
-static int get_cpu_isa(struct perf_session *session)
+static int cpu_isa_config(struct perf_kvm_stat *kvm)
 {
-	char *cpuid = session->header.env.cpuid;
-	int isa;
+	char buf[64], *cpuid;
+	int err, isa;
+
+	if (kvm->live) {
+		err = get_cpuid(buf, sizeof(buf));
+		if (err != 0) {
+			pr_err("Failed to look up CPU type (Intel or AMD)\n");
+			return err;
+		}
+		cpuid = buf;
+	} else
+		cpuid = kvm->session->header.env.cpuid;
 
 	if (strstr(cpuid, "Intel"))
 		isa = 1;
@@ -718,10 +807,361 @@ static int get_cpu_isa(struct perf_session *session)
 		isa = 0;
 	else {
 		pr_err("CPU %s is not supported.\n", cpuid);
-		isa = -ENOTSUP;
+		return -ENOTSUP;
+	}
+
+	if (isa == 1) {
+		kvm->exit_reasons = vmx_exit_reasons;
+		kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
+		kvm->exit_reasons_isa = "VMX";
+	}
+
+	return 0;
+}
+
+static bool verify_vcpu(int vcpu)
+{
+	if (vcpu != -1 && vcpu < 0) {
+		pr_err("Invalid vcpu:%d.\n", vcpu);
+		return false;
+	}
+
+	return true;
+}
+
+/* keeping the max events to a modest level to keep
+ * the processing of samples per mmap smooth.
+ */
+#define PERF_KVM__MAX_EVENTS_PER_MMAP  25
+
+static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
+				   u64 *mmap_time)
+{
+	union perf_event *event;
+	struct perf_sample sample;
+	s64 n = 0;
+	int err;
+
+	*mmap_time = ULLONG_MAX;
+	while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
+		err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
+		if (err) {
+			pr_err("Failed to parse sample\n");
+			return -1;
+		}
+
+		err = perf_session_queue_event(kvm->session, event, &sample, 0);
+		if (err) {
+			pr_err("Failed to enqueue sample: %d\n", err);
+			return -1;
+		}
+
+		/* save time stamp of our first sample for this mmap */
+		if (n == 0)
+			*mmap_time = sample.time;
+
+		/* limit events per mmap handled all at once */
+		n++;
+		if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+			break;
+	}
+
+	return n;
+}
+
+static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
+{
+	int i, err, throttled = 0;
+	s64 n, ntotal = 0;
+	u64 flush_time = ULLONG_MAX, mmap_time;
+
+	for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
+		n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
+		if (n < 0)
+			return -1;
+
+		/* flush time is going to be the minimum of all the individual
+		 * mmap times. Essentially, we flush all the samples queued up
+		 * from the last pass under our minimal start time -- that leaves
+		 * a very small race for samples to come in with a lower timestamp.
+		 * The ioctl to return the perf_clock timestamp should close the
+		 * race entirely.
+		 */
+		if (mmap_time < flush_time)
+			flush_time = mmap_time;
+
+		ntotal += n;
+		if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+			throttled = 1;
+	}
+
+	/* flush queue after each round in which we processed events */
+	if (ntotal) {
+		kvm->session->ordered_samples.next_flush = flush_time;
+		err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session);
+		if (err) {
+			if (kvm->lost_events)
+				pr_info("\nLost events: %" PRIu64 "\n\n",
+					kvm->lost_events);
+			return err;
+		}
+	}
+
+	return throttled;
+}
+
+static volatile int done;
+
+static void sig_handler(int sig __maybe_unused)
+{
+	done = 1;
+}
+
+static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
+{
+	struct itimerspec new_value;
+	int rc = -1;
+
+	kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+	if (kvm->timerfd < 0) {
+		pr_err("timerfd_create failed\n");
+		goto out;
+	}
+
+	new_value.it_value.tv_sec = kvm->display_time;
+	new_value.it_value.tv_nsec = 0;
+	new_value.it_interval.tv_sec = kvm->display_time;
+	new_value.it_interval.tv_nsec = 0;
+
+	if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
+		pr_err("timerfd_settime failed: %d\n", errno);
+		close(kvm->timerfd);
+		goto out;
+	}
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
+{
+	uint64_t c;
+	int rc;
+
+	rc = read(kvm->timerfd, &c, sizeof(uint64_t));
+	if (rc < 0) {
+		if (errno == EAGAIN)
+			return 0;
+
+		pr_err("Failed to read timer fd: %d\n", errno);
+		return -1;
+	}
+
+	if (rc != sizeof(uint64_t)) {
+		pr_err("Error reading timer fd - invalid size returned\n");
+		return -1;
+	}
+
+	if (c != 1)
+		pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
+
+	/* update display */
+	sort_result(kvm);
+	print_result(kvm);
+
+	/* reset counts */
+	clear_events_cache_stats(kvm->kvm_events_cache);
+	kvm->total_count = 0;
+	kvm->total_time = 0;
+	kvm->lost_events = 0;
+
+	return 0;
+}
+
+static int fd_set_nonblock(int fd)
+{
+	long arg = 0;
+
+	arg = fcntl(fd, F_GETFL);
+	if (arg < 0) {
+		pr_err("Failed to get current flags for fd %d\n", fd);
+		return -1;
+	}
+
+	if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
+		pr_err("Failed to set non-block option on fd %d\n", fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+static
+int perf_kvm__handle_stdin(struct termios *tc_now, struct termios *tc_save)
+{
+	int c;
+
+	tcsetattr(0, TCSANOW, tc_now);
+	c = getc(stdin);
+	tcsetattr(0, TCSAFLUSH, tc_save);
+
+	if (c == 'q')
+		return 1;
+
+	return 0;
+}
+
+static int kvm_events_live_report(struct perf_kvm_stat *kvm)
+{
+	struct pollfd *pollfds = NULL;
+	int nr_fds, nr_stdin, ret, err = -EINVAL;
+	struct termios tc, save;
+
+	/* live flag must be set first */
+	kvm->live = true;
+
+	ret = cpu_isa_config(kvm);
+	if (ret < 0)
+		return ret;
+
+	if (!verify_vcpu(kvm->trace_vcpu) ||
+	    !select_key(kvm) ||
+	    !register_kvm_events_ops(kvm)) {
+		goto out;
+	}
+
+	init_kvm_event_record(kvm);
+
+	tcgetattr(0, &save);
+	tc = save;
+	tc.c_lflag &= ~(ICANON | ECHO);
+	tc.c_cc[VMIN] = 0;
+	tc.c_cc[VTIME] = 0;
+
+	signal(SIGINT, sig_handler);
+	signal(SIGTERM, sig_handler);
+
+	/* copy pollfds -- need to add timerfd and stdin */
+	nr_fds = kvm->evlist->nr_fds;
+	pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2));
+	if (!pollfds) {
+		err = -ENOMEM;
+		goto out;
+	}
+	memcpy(pollfds, kvm->evlist->pollfd,
+		sizeof(struct pollfd) * kvm->evlist->nr_fds);
+
+	/* add timer fd */
+	if (perf_kvm__timerfd_create(kvm) < 0) {
+		err = -1;
+		goto out;
+	}
+
+	pollfds[nr_fds].fd = kvm->timerfd;
+	pollfds[nr_fds].events = POLLIN;
+	nr_fds++;
+
+	pollfds[nr_fds].fd = fileno(stdin);
+	pollfds[nr_fds].events = POLLIN;
+	nr_stdin = nr_fds;
+	nr_fds++;
+	if (fd_set_nonblock(fileno(stdin)) != 0)
+		goto out;
+
+	/* everything is good - enable the events and process */
+	perf_evlist__enable(kvm->evlist);
+
+	while (!done) {
+		int rc;
+
+		rc = perf_kvm__mmap_read(kvm);
+		if (rc < 0)
+			break;
+
+		err = perf_kvm__handle_timerfd(kvm);
+		if (err)
+			goto out;
+
+		if (pollfds[nr_stdin].revents & POLLIN)
+			done = perf_kvm__handle_stdin(&tc, &save);
+
+		if (!rc && !done)
+			err = poll(pollfds, nr_fds, 100);
+	}
+
+	perf_evlist__disable(kvm->evlist);
+
+	if (err == 0) {
+		sort_result(kvm);
+		print_result(kvm);
+	}
+
+out:
+	if (kvm->timerfd >= 0)
+		close(kvm->timerfd);
+
+	if (pollfds)
+		free(pollfds);
+
+	return err;
+}
+
+static int kvm_live_open_events(struct perf_kvm_stat *kvm)
+{
+	int err, rc = -1;
+	struct perf_evsel *pos;
+	struct perf_evlist *evlist = kvm->evlist;
+
+	perf_evlist__config(evlist, &kvm->opts);
+
+	/*
+	 * Note: exclude_{guest,host} do not apply here.
+	 *       This command processes KVM tracepoints from host only
+	 */
+	list_for_each_entry(pos, &evlist->entries, node) {
+		struct perf_event_attr *attr = &pos->attr;
+
+		/* make sure these *are* set */
+		attr->sample_type |= PERF_SAMPLE_TID;
+		attr->sample_type |= PERF_SAMPLE_TIME;
+		attr->sample_type |= PERF_SAMPLE_CPU;
+		attr->sample_type |= PERF_SAMPLE_RAW;
+		/* make sure these are *not*; want as small a sample as possible */
+		attr->sample_type &= ~PERF_SAMPLE_PERIOD;
+		attr->sample_type &= ~PERF_SAMPLE_IP;
+		attr->sample_type &= ~PERF_SAMPLE_CALLCHAIN;
+		attr->sample_type &= ~PERF_SAMPLE_ADDR;
+		attr->sample_type &= ~PERF_SAMPLE_READ;
+		attr->mmap = 0;
+		attr->comm = 0;
+		attr->task = 0;
+
+		attr->sample_period = 1;
+
+		attr->watermark = 0;
+		attr->wakeup_events = 1000;
+
+		/* will enable all once we are ready */
+		attr->disabled = 1;
+	}
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		printf("Couldn't create the events: %s\n", strerror(errno));
+		goto out;
 	}
 
-	return isa;
+	if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) {
+		ui__error("Failed to mmap the events: %s\n", strerror(errno));
+		perf_evlist__close(evlist);
+		goto out;
+	}
+
+	rc = 0;
+
+out:
+	return rc;
 }
 
 static int read_events(struct perf_kvm_stat *kvm)
@@ -749,30 +1189,13 @@ static int read_events(struct perf_kvm_stat *kvm)
 	 * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
 	 * traced in the old kernel.
 	 */
-	ret = get_cpu_isa(kvm->session);
-
+	ret = cpu_isa_config(kvm);
 	if (ret < 0)
 		return ret;
 
-	if (ret == 1) {
-		kvm->exit_reasons = vmx_exit_reasons;
-		kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
-		kvm->exit_reasons_isa = "VMX";
-	}
-
 	return perf_session__process_events(kvm->session, &kvm->tool);
 }
 
-static bool verify_vcpu(int vcpu)
-{
-	if (vcpu != -1 && vcpu < 0) {
-		pr_err("Invalid vcpu:%d.\n", vcpu);
-		return false;
-	}
-
-	return true;
-}
-
 static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
 {
 	int ret = -EINVAL;
@@ -886,6 +1309,186 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
 	return kvm_events_report_vcpu(kvm);
 }
 
+static struct perf_evlist *kvm_live_event_list(void)
+{
+	struct perf_evlist *evlist;
+	char *tp, *name, *sys;
+	unsigned int j;
+	int err = -1;
+
+	evlist = perf_evlist__new();
+	if (evlist == NULL)
+		return NULL;
+
+	for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
+
+		tp = strdup(kvm_events_tp[j]);
+		if (tp == NULL)
+			goto out;
+
+		/* split tracepoint into subsystem and name */
+		sys = tp;
+		name = strchr(tp, ':');
+		if (name == NULL) {
+			pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
+				kvm_events_tp[j]);
+			free(tp);
+			goto out;
+		}
+		*name = '\0';
+		name++;
+
+		if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+			pr_err("Failed to add %s tracepoint to the list\n", kvm_events_tp[j]);
+			free(tp);
+			goto out;
+		}
+
+		free(tp);
+	}
+
+	err = 0;
+
+out:
+	if (err) {
+		perf_evlist__delete(evlist);
+		evlist = NULL;
+	}
+
+	return evlist;
+}
+
+static int kvm_events_live(struct perf_kvm_stat *kvm,
+			   int argc, const char **argv)
+{
+	char errbuf[BUFSIZ];
+	int err;
+
+	const struct option live_options[] = {
+		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
+			"record events on existing process id"),
+		OPT_UINTEGER('m', "mmap-pages", &kvm->opts.mmap_pages,
+			"number of mmap data pages"),
+		OPT_INCR('v', "verbose", &verbose,
+			"be more verbose (show counter open errors, etc)"),
+		OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
+			"system-wide collection from all CPUs"),
+		OPT_UINTEGER('d', "display", &kvm->display_time,
+			"time in seconds between display updates"),
+		OPT_STRING(0, "event", &kvm->report_event, "report event",
+			"event for reporting: vmexit, mmio, ioport"),
+		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
+			"vcpu id to report"),
+		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
+			"key for sorting: sample(sort by samples number)"
+			" time (sort by avg time)"),
+		OPT_END()
+	};
+	const char * const live_usage[] = {
+		"perf kvm stat live [<options>]",
+		NULL
+	};
+
+
+	/* event handling */
+	kvm->tool.sample = process_sample_event;
+	kvm->tool.comm   = perf_event__process_comm;
+	kvm->tool.exit   = perf_event__process_exit;
+	kvm->tool.fork   = perf_event__process_fork;
+	kvm->tool.lost   = process_lost_event;
+	kvm->tool.ordered_samples = true;
+	perf_tool__fill_defaults(&kvm->tool);
+
+	/* set defaults */
+	kvm->display_time = 1;
+	kvm->opts.user_interval = 1;
+	kvm->opts.mmap_pages = 512;
+	kvm->opts.target.uses_mmap = false;
+	kvm->opts.target.uid_str = NULL;
+	kvm->opts.target.uid = UINT_MAX;
+
+	symbol__init();
+	disable_buildid_cache();
+
+	use_browser = 0;
+	setup_browser(false);
+
+	if (argc) {
+		argc = parse_options(argc, argv, live_options,
+				     live_usage, 0);
+		if (argc)
+			usage_with_options(live_usage, live_options);
+	}
+
+	/*
+	 * target related setups
+	 */
+	err = perf_target__validate(&kvm->opts.target);
+	if (err) {
+		perf_target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
+		ui__warning("%s", errbuf);
+	}
+
+	if (perf_target__none(&kvm->opts.target))
+		kvm->opts.target.system_wide = true;
+
+
+	/*
+	 * generate the event list
+	 */
+	kvm->evlist = kvm_live_event_list();
+	if (kvm->evlist == NULL) {
+		err = -1;
+		goto out;
+	}
+
+	symbol_conf.nr_events = kvm->evlist->nr_entries;
+
+	if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
+		usage_with_options(live_usage, live_options);
+
+	/*
+	 * perf session
+	 */
+	kvm->session = perf_session__new(NULL, O_WRONLY, false, false, &kvm->tool);
+	if (kvm->session == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+	kvm->session->evlist = kvm->evlist;
+	perf_session__set_id_hdr_size(kvm->session);
+
+
+	if (perf_target__has_task(&kvm->opts.target))
+		perf_event__synthesize_thread_map(&kvm->tool,
+						  kvm->evlist->threads,
+						  perf_event__process,
+						  &kvm->session->machines.host);
+	else
+		perf_event__synthesize_threads(&kvm->tool, perf_event__process,
+					       &kvm->session->machines.host);
+
+
+	err = kvm_live_open_events(kvm);
+	if (err)
+		goto out;
+
+	err = kvm_events_live_report(kvm);
+
+out:
+	exit_browser(0);
+
+	if (kvm->session)
+		perf_session__delete(kvm->session);
+	kvm->session = NULL;
+	if (kvm->evlist) {
+		perf_evlist__delete_maps(kvm->evlist);
+		perf_evlist__delete(kvm->evlist);
+	}
+
+	return err;
+}
+
 static void print_kvm_stat_usage(void)
 {
 	printf("Usage: perf kvm stat <command>\n\n");
@@ -893,6 +1496,7 @@ static void print_kvm_stat_usage(void)
 	printf("# Available commands:\n");
 	printf("\trecord: record kvm events\n");
 	printf("\treport: report statistical data of kvm events\n");
+	printf("\tlive:   live reporting of statistical data of kvm events\n");
 
 	printf("\nOtherwise, it is the alias of 'perf stat':\n");
 }
@@ -922,6 +1526,9 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
 	if (!strncmp(argv[1], "rep", 3))
 		return kvm_events_report(&kvm, argc - 1 , argv + 1);
 
+	if (!strncmp(argv[1], "live", 4))
+		return kvm_events_live(&kvm, argc - 1 , argv + 1);
+
 perf_stat:
 	return cmd_stat(argc, argv, NULL);
 }
-- 
cgit v0.10.2


From 62d04dbf3652264157e646d93006b2d74cf1be93 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 5 Aug 2013 21:41:35 -0400
Subject: perf kvm: Add min and max stats to display

Add max and min times for exit events.

v2: address Xiao's comment to use get_event function for pulling max and
    min from stats struct similar to mean and count

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375753297-69645-4-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 29bfca7..b6595e9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -337,14 +337,19 @@ static void clear_events_cache_stats(struct list_head *kvm_events_cache)
 	struct list_head *head;
 	struct kvm_event *event;
 	unsigned int i;
+	int j;
 
 	for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
 		head = &kvm_events_cache[i];
 		list_for_each_entry(event, head, hash_entry) {
 			/* reset stats for event */
-			memset(&event->total, 0, sizeof(event->total));
-			memset(event->vcpu, 0,
-			       event->max_vcpu * sizeof(*event->vcpu));
+			event->total.time = 0;
+			init_stats(&event->total.stats);
+
+			for (j = 0; j < event->max_vcpu; ++j) {
+				event->vcpu[j].time = 0;
+				init_stats(&event->vcpu[j].stats);
+			}
 		}
 	}
 }
@@ -583,6 +588,8 @@ static int compare_kvm_event_ ## func(struct kvm_event *one,		\
 GET_EVENT_KEY(time, time);
 COMPARE_EVENT_KEY(count, stats.n);
 COMPARE_EVENT_KEY(mean, stats.mean);
+GET_EVENT_KEY(max, stats.max);
+GET_EVENT_KEY(min, stats.min);
 
 #define DEF_SORT_NAME_KEY(name, compare_key)				\
 	{ #name, compare_kvm_event_ ## compare_key }
@@ -727,20 +734,26 @@ static void print_result(struct perf_kvm_stat *kvm)
 	pr_info("%9s ", "Samples%");
 
 	pr_info("%9s ", "Time%");
+	pr_info("%10s ", "Min Time");
+	pr_info("%10s ", "Max Time");
 	pr_info("%16s ", "Avg time");
 	pr_info("\n\n");
 
 	while ((event = pop_from_result(&kvm->result))) {
-		u64 ecount, etime;
+		u64 ecount, etime, max, min;
 
 		ecount = get_event_count(event, vcpu);
 		etime = get_event_time(event, vcpu);
+		max = get_event_max(event, vcpu);
+		min = get_event_min(event, vcpu);
 
 		kvm->events_ops->decode_key(kvm, &event->key, decode);
 		pr_info("%20s ", decode);
 		pr_info("%10llu ", (unsigned long long)ecount);
 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
+		pr_info("%8" PRIu64 "us ", min / 1000);
+		pr_info("%8" PRIu64 "us ", max / 1000);
 		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
 			kvm_event_rel_stddev(vcpu, event));
 		pr_info("\n");
-- 
cgit v0.10.2


From 2e73f00fe707a8f2476d989de946c12078c7c066 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 5 Aug 2013 21:41:37 -0400
Subject: perf kvm stat report: Add option to analyze specific VM

Add an option to analyze a specific VM within a data file. This allows
the collection of kvm events for all VMs and then analyze data for each
VM (or set of VMs) individually.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1375753297-69645-6-git-send-email-dsahern@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index b6595e9..2ceec81 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -9,7 +9,7 @@
 #include "util/thread.h"
 #include "util/header.h"
 #include "util/session.h"
-
+#include "util/intlist.h"
 #include "util/parse-options.h"
 #include "util/trace-event.h"
 #include "util/debug.h"
@@ -107,6 +107,9 @@ struct perf_kvm_stat {
 	u64 total_count;
 	u64 lost_events;
 
+	const char *pid_str;
+	struct intlist *pid_list;
+
 	struct rb_root result;
 
 	int timerfd;
@@ -777,16 +780,29 @@ static int process_lost_event(struct perf_tool *tool,
 	return 0;
 }
 
+static bool skip_sample(struct perf_kvm_stat *kvm,
+			struct perf_sample *sample)
+{
+	if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL)
+		return true;
+
+	return false;
+}
+
 static int process_sample_event(struct perf_tool *tool,
 				union perf_event *event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel,
 				struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, sample->tid);
+	struct thread *thread;
 	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
 						 tool);
 
+	if (skip_sample(kvm, sample))
+		return 0;
+
+	thread = machine__findnew_thread(machine, sample->tid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			event->header.type);
@@ -1209,11 +1225,27 @@ static int read_events(struct perf_kvm_stat *kvm)
 	return perf_session__process_events(kvm->session, &kvm->tool);
 }
 
+static int parse_target_str(struct perf_kvm_stat *kvm)
+{
+	if (kvm->pid_str) {
+		kvm->pid_list = intlist__new(kvm->pid_str);
+		if (kvm->pid_list == NULL) {
+			pr_err("Error parsing process id string\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
 {
 	int ret = -EINVAL;
 	int vcpu = kvm->trace_vcpu;
 
+	if (parse_target_str(kvm) != 0)
+		goto exit;
+
 	if (!verify_vcpu(vcpu))
 		goto exit;
 
@@ -1300,6 +1332,8 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
 			    "key for sorting: sample(sort by samples number)"
 			    " time (sort by avg time)"),
+		OPT_STRING('p', "pid", &kvm->pid_str, "pid",
+			   "analyze events only for given process id(s)"),
 		OPT_END()
 	};
 
-- 
cgit v0.10.2


From d50bf78ff69297d3f60aa778c272acc8e5f59a19 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Tue, 6 Aug 2013 14:14:13 +0900
Subject: perf ui/gtk: Fix segmentation fault on perf_hpp__for_each_format loop

The commit 2b8bfa6bb8a7 ("perf tools: Centralize default columns init in
perf_hpp__init") moves initialization of common overhead column to
perf_hpp__init() but forgot about the gtk code.

So the gtk code added the same column to the list twice causing infinite
loop when iterating it by perf_hpp__for_each_format loop.  When I run
perf report --gtk, I can see following messages indefinitely.

  (perf:11687): Gtk-CRITICAL **: IA__gtk_main_quit: assertion 'main_loops != NULL' failed
  perf: Segmentation fault

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1375766056-19377-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index cb2ed198..2ca66cc 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -109,8 +109,6 @@ __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
 
 void perf_gtk__init_hpp(void)
 {
-	perf_hpp__column_enable(PERF_HPP__OVERHEAD);
-
 	perf_hpp__init();
 
 	perf_hpp__format[PERF_HPP__OVERHEAD].color =
-- 
cgit v0.10.2


From e9a7c414477d20c3cc56f90f29c35b06f0f15e25 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 6 Aug 2013 23:28:05 +1000
Subject: perf tools: Add support for pinned modifier

This commit adds support for a new modifier "D", which requests that the
event, or group of events, be pinned to the PMU.

The "p" modifier is already taken for precise, and "P" may be used in
future to mean "fully precise".

So we use "D", which stands for pinneD - and looks like a padlock, or if
you're using the ":D" syntax perf smiles at you.

This is an oft-requested feature from our HW folks, who want to be able
to run a large number of events, but also want 100% accurate results for
instructions per cycle.

Comparison of results with and without pinning:

$ perf stat -e '{cycles,instructions}:D' -e cycles,instructions,...

  79,590,480,683 cycles         #  0.000 GHz
 166,123,716,524 instructions   #  2.09  insns per cycle
                                #  0.11  stalled cycles per insn

  79,352,134,463 cycles         #  0.000 GHz                     [11.11%]
 165,178,301,818 instructions   #  2.08  insns per cycle
                                #  0.11  stalled cycles per insn [11.13%]

As you can see although perf does a very good job of scaling the values
in the non-pinned case, there is some small discrepancy.

The patch is fairly straight forward, the one detail is that we need to
make sure we only request pinning for the group leader when we have a
group.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1375795686-4226-1-git-send-email-michael@ellerman.id.au
[ Use perf_evsel__is_group_leader instead of open coded equivalent, as
  suggested by Jiri Olsa ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index eb03f06..6fce6a6 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -30,6 +30,7 @@ counted. The following modifiers exist:
  H - host counting (not in KVM guests)
  p - precise level
  S - read sample value (PERF_SAMPLE_READ)
+ D - pin the event to the PMU
 
 The 'p' modifier can be used for specifying how precise the instruction
 address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index dba877d..9cba923 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -688,6 +688,7 @@ struct event_modifier {
 	int precise;
 	int exclude_GH;
 	int sample_read;
+	int pinned;
 };
 
 static int get_event_modifier(struct event_modifier *mod, char *str,
@@ -700,6 +701,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	int eG = evsel ? evsel->attr.exclude_guest : 0;
 	int precise = evsel ? evsel->attr.precise_ip : 0;
 	int sample_read = 0;
+	int pinned = evsel ? evsel->attr.pinned : 0;
 
 	int exclude = eu | ek | eh;
 	int exclude_GH = evsel ? evsel->exclude_GH : 0;
@@ -734,6 +736,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 				eG = 1;
 		} else if (*str == 'S') {
 			sample_read = 1;
+		} else if (*str == 'D') {
+			pinned = 1;
 		} else
 			break;
 
@@ -761,6 +765,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
 	mod->precise = precise;
 	mod->exclude_GH = exclude_GH;
 	mod->sample_read = sample_read;
+	mod->pinned = pinned;
+
 	return 0;
 }
 
@@ -773,7 +779,7 @@ static int check_modifier(char *str)
 	char *p = str;
 
 	/* The sizeof includes 0 byte as well. */
-	if (strlen(str) > (sizeof("ukhGHpppS") - 1))
+	if (strlen(str) > (sizeof("ukhGHpppSD") - 1))
 		return -1;
 
 	while (*p) {
@@ -812,6 +818,9 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
 		evsel->attr.exclude_guest  = mod.eG;
 		evsel->exclude_GH          = mod.exclude_GH;
 		evsel->sample_read         = mod.sample_read;
+
+		if (perf_evsel__is_group_leader(evsel))
+			evsel->attr.pinned = mod.pinned;
 	}
 
 	return 0;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index b36115f..0790452 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -82,7 +82,8 @@ num_hex		0x[a-fA-F0-9]+
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?]*
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?]*
-modifier_event	[ukhpGHS]+
+/* If you add a modifier you need to update check_modifier() */
+modifier_event	[ukhpGHSD]+
 modifier_bp	[rwx]{1,3}
 
 %%
-- 
cgit v0.10.2


From c9ee780f2736b7a149658b0cd8c8389da23e190a Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 6 Aug 2013 23:28:06 +1000
Subject: perf tests: Add tests of new pinned modifier

Add a negative test to test__checkevent_pmu_events() to get lots of
coverage of the negative case, ie. when the modifier is not specified.

Add a test of a single event, and of the group case.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1375795686-4226-2-git-send-email-michael@ellerman.id.au
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index b46379c..48114d1 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -452,6 +452,7 @@ static int test__checkevent_pmu_events(struct perf_evlist *evlist)
 			evsel->attr.exclude_kernel);
 	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
 
 	return 0;
 }
@@ -1070,6 +1071,50 @@ static int test__leader_sample2(struct perf_evlist *evlist __maybe_unused)
 	return 0;
 }
 
+static int test__checkevent_pinned_modifier(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+	TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+	TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+	TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned);
+
+	return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__pinned_group(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel, *leader;
+
+	TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+	/* cycles - group leader */
+	evsel = leader = perf_evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+	TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+	TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned);
+
+	/* cache-misses - can not be pinned, but will go on with the leader */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	/* branch-misses - ditto */
+	evsel = perf_evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong config",
+			PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+	TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+	return 0;
+}
+
 static int count_tracepoints(void)
 {
 	char events_path[PATH_MAX];
@@ -1294,6 +1339,14 @@ static struct evlist_test test__events[] = {
 		.name  = "{instructions,branch-misses}:Su",
 		.check = test__leader_sample2,
 	},
+	[40] = {
+		.name  = "instructions:uDp",
+		.check = test__checkevent_pinned_modifier,
+	},
+	[41] = {
+		.name  = "{cycles,cache-misses,branch-misses}:D",
+		.check = test__pinned_group,
+	},
 };
 
 static struct evlist_test test__events_pmu[] = {
-- 
cgit v0.10.2


From 8f76fcd902e3b3a7d6f6c695cc8bc053579eb179 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 15 Jul 2013 15:27:53 -0500
Subject: perf machine: Do not require /lib/modules/* on a guest

For some types of work loads and special guest environments, you might
have a kernel that has no kernel modules.  The perf kvm record tool
fails instantiate vmlinux maps when the kernel modules directory cannot
be opened, even though the kallsyms has been properly processed.  This
leads to a perf kvm report that has no guest symbols resolved.

This patch changes the failure to locate kernel modules to be non-fatal.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1373920073-4874-1-git-send-email-jason.wessel@windriver.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ef3b49c..6fcc358 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -806,7 +806,10 @@ static int machine__create_modules(struct machine *machine)
 	free(line);
 	fclose(file);
 
-	return machine__set_modules_path(machine);
+	if (machine__set_modules_path(machine) < 0) {
+		pr_debug("Problems setting modules path maps, continuing anyway...\n");
+	}
+	return 0;
 
 out_delete_line:
 	free(line);
-- 
cgit v0.10.2


From cecb977e24da1465cdb0ff2d10d22e5891dc3e6c Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Tue, 16 Jul 2013 10:03:34 +0900
Subject: Revert "tools lib lk: Fix for cross build"

This reverts commit 079787f209416416383c74ea5d5044be2d586f5e.

Below commit already resolve a cross build problem.
I have been noticed this too lately.

    commit 3c4797d46c14fa0c7cf733a77bd4b28875078b53
    Author: Rabin Vincent <rabin@rab.in>
    Date:   Fri May 17 22:27:44 2013 +0200

    tools lib lk: Respect CROSS_COMPILE

    Make lk use CROSS_COMPILE, in order to be able to cross compile perf
    again.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1373936614-22224-1-git-send-email-iamjoonsoo.kim@lge.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/lib/lk/Makefile b/tools/lib/lk/Makefile
index 280dd82..3dba0a4 100644
--- a/tools/lib/lk/Makefile
+++ b/tools/lib/lk/Makefile
@@ -3,21 +3,6 @@ include ../../scripts/Makefile.include
 CC = $(CROSS_COMPILE)gcc
 AR = $(CROSS_COMPILE)ar
 
-# Makefiles suck: This macro sets a default value of $(2) for the
-# variable named by $(1), unless the variable has been set by
-# environment or command line. This is necessary for CC and AR
-# because make sets default values, so the simpler ?= approach
-# won't work as expected.
-define allow-override
-  $(if $(or $(findstring environment,$(origin $(1))),\
-            $(findstring command line,$(origin $(1)))),,\
-    $(eval $(1) = $(2)))
-endef
-
-# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-
 # guard against environment variables
 LIB_H=
 LIB_OBJS=
-- 
cgit v0.10.2